sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_safe_divide_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 sha256_sql, 25 struct_extract_sql, 26 str_position_sql, 27 timestamptrunc_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_str, 31) 32from sqlglot.dialects.hive import Hive 33from sqlglot.dialects.mysql import MySQL 34from sqlglot.helper import apply_index_offset, seq_get 35from sqlglot.tokens import TokenType 36from sqlglot.transforms import unqualify_columns 37 38DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 39 40 41def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 42 if isinstance(expression.this, exp.Explode): 43 return self.sql( 44 exp.Join( 45 this=exp.Unnest( 46 expressions=[expression.this.this], 47 alias=expression.args.get("alias"), 48 offset=isinstance(expression.this, exp.Posexplode), 49 ), 50 kind="cross", 51 ) 52 ) 53 return self.lateral_sql(expression) 54 55 56def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 57 regex = r"(\w)(\w*)" 58 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 59 60 61def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 62 if expression.args.get("asc") == exp.false(): 63 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 64 else: 65 comparator = None 66 return self.func("ARRAY_SORT", expression.this, comparator) 67 68 69def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 70 if isinstance(expression.parent, exp.Property): 71 columns = ", ".join(f"'{c.name}'" for c in expression.expressions) 72 return f"ARRAY[{columns}]" 73 74 if expression.parent: 75 for schema in expression.parent.find_all(exp.Schema): 76 column_defs = schema.find_all(exp.ColumnDef) 77 if column_defs and isinstance(schema.parent, exp.Property): 78 expression.expressions.extend(column_defs) 79 80 return self.schema_sql(expression) 81 82 83def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 84 self.unsupported("Presto does not support exact quantiles") 85 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 86 87 88def _str_to_time_sql( 89 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 90) -> str: 91 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 92 93 94def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 95 time_format = self.format_time(expression) 96 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 97 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 98 return self.sql( 99 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 100 ) 101 102 103def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 104 expression = ts_or_ds_add_cast(expression) 105 unit = unit_to_str(expression) 106 return self.func("DATE_ADD", unit, expression.expression, expression.this) 107 108 109def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 110 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 111 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 112 unit = unit_to_str(expression) 113 return self.func("DATE_DIFF", unit, expr, this) 114 115 116def _build_approx_percentile(args: t.List) -> exp.Expression: 117 if len(args) == 4: 118 return exp.ApproxQuantile( 119 this=seq_get(args, 0), 120 weight=seq_get(args, 1), 121 quantile=seq_get(args, 2), 122 accuracy=seq_get(args, 3), 123 ) 124 if len(args) == 3: 125 return exp.ApproxQuantile( 126 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 127 ) 128 return exp.ApproxQuantile.from_arg_list(args) 129 130 131def _build_from_unixtime(args: t.List) -> exp.Expression: 132 if len(args) == 3: 133 return exp.UnixToTime( 134 this=seq_get(args, 0), 135 hours=seq_get(args, 1), 136 minutes=seq_get(args, 2), 137 ) 138 if len(args) == 2: 139 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 140 141 return exp.UnixToTime.from_arg_list(args) 142 143 144def _unnest_sequence(expression: exp.Expression) -> exp.Expression: 145 if isinstance(expression, exp.Table): 146 if isinstance(expression.this, exp.GenerateSeries): 147 unnest = exp.Unnest(expressions=[expression.this]) 148 149 if expression.alias: 150 return exp.alias_(unnest, alias="_u", table=[expression.alias], copy=False) 151 return unnest 152 return expression 153 154 155def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 156 """ 157 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 158 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 159 they're converted into an ARBITRARY call. 160 161 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 162 """ 163 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 164 return self.function_fallback_sql(expression) 165 166 return rename_func("ARBITRARY")(self, expression) 167 168 169def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 170 scale = expression.args.get("scale") 171 timestamp = self.sql(expression, "this") 172 if scale in (None, exp.UnixToTime.SECONDS): 173 return rename_func("FROM_UNIXTIME")(self, expression) 174 175 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 176 177 178def _jsonextract_sql(self: Presto.Generator, expression: exp.JSONExtract) -> str: 179 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 180 181 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 182 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 183 if not expression.args.get("variant_extract") or is_json_extract: 184 return self.func( 185 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 186 ) 187 188 this = self.sql(expression, "this") 189 190 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 191 segments = [] 192 for path_key in expression.expression.expressions[1:]: 193 if not isinstance(path_key, exp.JSONPathKey): 194 # Cannot transpile subscripts, wildcards etc to dot notation 195 self.unsupported(f"Cannot transpile JSONPath segment '{path_key}' to ROW access") 196 continue 197 key = path_key.this 198 if not exp.SAFE_IDENTIFIER_RE.match(key): 199 key = f'"{key}"' 200 segments.append(f".{key}") 201 202 expr = "".join(segments) 203 204 return f"{this}{expr}" 205 206 207def _to_int(expression: exp.Expression) -> exp.Expression: 208 if not expression.type: 209 from sqlglot.optimizer.annotate_types import annotate_types 210 211 annotate_types(expression) 212 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 213 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 214 return expression 215 216 217def _build_to_char(args: t.List) -> exp.TimeToStr: 218 fmt = seq_get(args, 1) 219 if isinstance(fmt, exp.Literal): 220 # We uppercase this to match Teradata's format mapping keys 221 fmt.set("this", fmt.this.upper()) 222 223 # We use "teradata" on purpose here, because the time formats are different in Presto. 224 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 225 return build_formatted_time(exp.TimeToStr, "teradata")(args) 226 227 228def _date_delta_sql( 229 name: str, negate_interval: bool = False 230) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 231 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 232 interval = _to_int(expression.expression) 233 return self.func( 234 name, 235 unit_to_str(expression), 236 interval * (-1) if negate_interval else interval, 237 expression.this, 238 ) 239 240 return _delta_sql 241 242 243class Presto(Dialect): 244 INDEX_OFFSET = 1 245 NULL_ORDERING = "nulls_are_last" 246 TIME_FORMAT = MySQL.TIME_FORMAT 247 TIME_MAPPING = MySQL.TIME_MAPPING 248 STRICT_STRING_CONCAT = True 249 SUPPORTS_SEMI_ANTI_JOIN = False 250 TYPED_DIVISION = True 251 TABLESAMPLE_SIZE_IS_PERCENT = True 252 LOG_BASE_FIRST: t.Optional[bool] = None 253 254 # https://github.com/trinodb/trino/issues/17 255 # https://github.com/trinodb/trino/issues/12289 256 # https://github.com/prestodb/presto/issues/2863 257 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 258 259 class Tokenizer(tokens.Tokenizer): 260 UNICODE_STRINGS = [ 261 (prefix + q, q) 262 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 263 for prefix in ("U&", "u&") 264 ] 265 266 KEYWORDS = { 267 **tokens.Tokenizer.KEYWORDS, 268 "START": TokenType.BEGIN, 269 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 270 "ROW": TokenType.STRUCT, 271 "IPADDRESS": TokenType.IPADDRESS, 272 "IPPREFIX": TokenType.IPPREFIX, 273 "TDIGEST": TokenType.TDIGEST, 274 "HYPERLOGLOG": TokenType.HLLSKETCH, 275 } 276 KEYWORDS.pop("/*+") 277 KEYWORDS.pop("QUALIFY") 278 279 class Parser(parser.Parser): 280 VALUES_FOLLOWED_BY_PAREN = False 281 282 FUNCTIONS = { 283 **parser.Parser.FUNCTIONS, 284 "ARBITRARY": exp.AnyValue.from_arg_list, 285 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 286 "APPROX_PERCENTILE": _build_approx_percentile, 287 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 288 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 289 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 290 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 291 "CARDINALITY": exp.ArraySize.from_arg_list, 292 "CONTAINS": exp.ArrayContains.from_arg_list, 293 "DATE_ADD": lambda args: exp.DateAdd( 294 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 295 ), 296 "DATE_DIFF": lambda args: exp.DateDiff( 297 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 298 ), 299 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 300 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 301 "DATE_TRUNC": date_trunc_to_time, 302 "ELEMENT_AT": lambda args: exp.Bracket( 303 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 304 ), 305 "FROM_HEX": exp.Unhex.from_arg_list, 306 "FROM_UNIXTIME": _build_from_unixtime, 307 "FROM_UTF8": lambda args: exp.Decode( 308 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 309 ), 310 "NOW": exp.CurrentTimestamp.from_arg_list, 311 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 312 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 313 ), 314 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 315 this=seq_get(args, 0), 316 expression=seq_get(args, 1), 317 replacement=seq_get(args, 2) or exp.Literal.string(""), 318 ), 319 "ROW": exp.Struct.from_arg_list, 320 "SEQUENCE": exp.GenerateSeries.from_arg_list, 321 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 322 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 323 "STRPOS": lambda args: exp.StrPosition( 324 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 325 ), 326 "TO_CHAR": _build_to_char, 327 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 328 "TO_UTF8": lambda args: exp.Encode( 329 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 330 ), 331 "MD5": exp.MD5Digest.from_arg_list, 332 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 333 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 334 } 335 336 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 337 FUNCTION_PARSERS.pop("TRIM") 338 339 class Generator(generator.Generator): 340 INTERVAL_ALLOWS_PLURAL_FORM = False 341 JOIN_HINTS = False 342 TABLE_HINTS = False 343 QUERY_HINTS = False 344 IS_BOOL_ALLOWED = False 345 TZ_TO_WITH_TIME_ZONE = True 346 NVL2_SUPPORTED = False 347 STRUCT_DELIMITER = ("(", ")") 348 LIMIT_ONLY_LITERALS = True 349 SUPPORTS_SINGLE_ARG_CONCAT = False 350 LIKE_PROPERTY_INSIDE_SCHEMA = True 351 MULTI_ARG_DISTINCT = False 352 SUPPORTS_TO_NUMBER = False 353 HEX_FUNC = "TO_HEX" 354 PARSE_JSON_NAME = "JSON_PARSE" 355 PAD_FILL_PATTERN_IS_REQUIRED = True 356 357 PROPERTIES_LOCATION = { 358 **generator.Generator.PROPERTIES_LOCATION, 359 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 360 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 361 } 362 363 TYPE_MAPPING = { 364 **generator.Generator.TYPE_MAPPING, 365 exp.DataType.Type.INT: "INTEGER", 366 exp.DataType.Type.FLOAT: "REAL", 367 exp.DataType.Type.BINARY: "VARBINARY", 368 exp.DataType.Type.TEXT: "VARCHAR", 369 exp.DataType.Type.TIMETZ: "TIME", 370 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 371 exp.DataType.Type.STRUCT: "ROW", 372 exp.DataType.Type.DATETIME: "TIMESTAMP", 373 exp.DataType.Type.DATETIME64: "TIMESTAMP", 374 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 375 } 376 377 TRANSFORMS = { 378 **generator.Generator.TRANSFORMS, 379 exp.AnyValue: rename_func("ARBITRARY"), 380 exp.ApproxDistinct: lambda self, e: self.func( 381 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 382 ), 383 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 384 exp.ArgMax: rename_func("MAX_BY"), 385 exp.ArgMin: rename_func("MIN_BY"), 386 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 387 exp.ArrayAny: rename_func("ANY_MATCH"), 388 exp.ArrayConcat: rename_func("CONCAT"), 389 exp.ArrayContains: rename_func("CONTAINS"), 390 exp.ArraySize: rename_func("CARDINALITY"), 391 exp.ArrayToString: rename_func("ARRAY_JOIN"), 392 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 393 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 394 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 395 exp.BitwiseLeftShift: lambda self, e: self.func( 396 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 397 ), 398 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 399 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 400 exp.BitwiseRightShift: lambda self, e: self.func( 401 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 402 ), 403 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 404 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 405 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 406 exp.DateAdd: _date_delta_sql("DATE_ADD"), 407 exp.DateDiff: lambda self, e: self.func( 408 "DATE_DIFF", unit_to_str(e), e.expression, e.this 409 ), 410 exp.DateStrToDate: datestrtodate_sql, 411 exp.DateToDi: lambda self, 412 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 413 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 414 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 415 exp.DiToDate: lambda self, 416 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 417 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 418 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 419 exp.First: _first_last_sql, 420 exp.FirstValue: _first_last_sql, 421 exp.FromTimeZone: lambda self, 422 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 423 exp.Group: transforms.preprocess([transforms.unalias_group]), 424 exp.GroupConcat: lambda self, e: self.func( 425 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 426 ), 427 exp.If: if_sql(), 428 exp.ILike: no_ilike_sql, 429 exp.Initcap: _initcap_sql, 430 exp.JSONExtract: _jsonextract_sql, 431 exp.Last: _first_last_sql, 432 exp.LastValue: _first_last_sql, 433 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 434 exp.Lateral: _explode_to_unnest_sql, 435 exp.Left: left_to_substring_sql, 436 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 437 exp.LogicalAnd: rename_func("BOOL_AND"), 438 exp.LogicalOr: rename_func("BOOL_OR"), 439 exp.Pivot: no_pivot_sql, 440 exp.Quantile: _quantile_sql, 441 exp.RegexpExtract: regexp_extract_sql, 442 exp.Right: right_to_substring_sql, 443 exp.SafeDivide: no_safe_divide_sql, 444 exp.Schema: _schema_sql, 445 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 446 exp.Select: transforms.preprocess( 447 [ 448 transforms.eliminate_qualify, 449 transforms.eliminate_distinct_on, 450 transforms.explode_to_unnest(1), 451 transforms.eliminate_semi_and_anti_joins, 452 ] 453 ), 454 exp.SortArray: _no_sort_array, 455 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 456 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 457 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 458 exp.StrToTime: _str_to_time_sql, 459 exp.StructExtract: struct_extract_sql, 460 exp.Table: transforms.preprocess([_unnest_sequence]), 461 exp.Timestamp: no_timestamp_sql, 462 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 463 exp.TimestampTrunc: timestamptrunc_sql(), 464 exp.TimeStrToDate: timestrtotime_sql, 465 exp.TimeStrToTime: timestrtotime_sql, 466 exp.TimeStrToUnix: lambda self, e: self.func( 467 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 468 ), 469 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 470 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 471 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 472 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 473 exp.TsOrDiToDi: lambda self, 474 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 475 exp.TsOrDsAdd: _ts_or_ds_add_sql, 476 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 477 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 478 exp.Unhex: rename_func("FROM_HEX"), 479 exp.UnixToStr: lambda self, 480 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 481 exp.UnixToTime: _unix_to_time_sql, 482 exp.UnixToTimeStr: lambda self, 483 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 484 exp.VariancePop: rename_func("VAR_POP"), 485 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 486 exp.WithinGroup: transforms.preprocess( 487 [transforms.remove_within_group_for_percentiles] 488 ), 489 exp.Xor: bool_xor_sql, 490 exp.MD5Digest: rename_func("MD5"), 491 exp.SHA: rename_func("SHA1"), 492 exp.SHA2: sha256_sql, 493 } 494 495 RESERVED_KEYWORDS = { 496 "alter", 497 "and", 498 "as", 499 "between", 500 "by", 501 "case", 502 "cast", 503 "constraint", 504 "create", 505 "cross", 506 "current_time", 507 "current_timestamp", 508 "deallocate", 509 "delete", 510 "describe", 511 "distinct", 512 "drop", 513 "else", 514 "end", 515 "escape", 516 "except", 517 "execute", 518 "exists", 519 "extract", 520 "false", 521 "for", 522 "from", 523 "full", 524 "group", 525 "having", 526 "in", 527 "inner", 528 "insert", 529 "intersect", 530 "into", 531 "is", 532 "join", 533 "left", 534 "like", 535 "natural", 536 "not", 537 "null", 538 "on", 539 "or", 540 "order", 541 "outer", 542 "prepare", 543 "right", 544 "select", 545 "table", 546 "then", 547 "true", 548 "union", 549 "using", 550 "values", 551 "when", 552 "where", 553 "with", 554 } 555 556 def md5_sql(self, expression: exp.MD5) -> str: 557 this = expression.this 558 559 if not this.type: 560 from sqlglot.optimizer.annotate_types import annotate_types 561 562 this = annotate_types(this) 563 564 if this.is_type(*exp.DataType.TEXT_TYPES): 565 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 566 567 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 568 569 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 570 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 571 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 572 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 573 # which seems to be using the same time mapping as Hive, as per: 574 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 575 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 576 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 577 parse_with_tz = self.func( 578 "PARSE_DATETIME", 579 value_as_text, 580 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 581 ) 582 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 583 return self.func("TO_UNIXTIME", coalesced) 584 585 def bracket_sql(self, expression: exp.Bracket) -> str: 586 if expression.args.get("safe"): 587 return self.func( 588 "ELEMENT_AT", 589 expression.this, 590 seq_get( 591 apply_index_offset( 592 expression.this, 593 expression.expressions, 594 1 - expression.args.get("offset", 0), 595 ), 596 0, 597 ), 598 ) 599 return super().bracket_sql(expression) 600 601 def struct_sql(self, expression: exp.Struct) -> str: 602 from sqlglot.optimizer.annotate_types import annotate_types 603 604 expression = annotate_types(expression) 605 values: t.List[str] = [] 606 schema: t.List[str] = [] 607 unknown_type = False 608 609 for e in expression.expressions: 610 if isinstance(e, exp.PropertyEQ): 611 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 612 unknown_type = True 613 else: 614 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 615 values.append(self.sql(e, "expression")) 616 else: 617 values.append(self.sql(e)) 618 619 size = len(expression.expressions) 620 621 if not size or len(schema) != size: 622 if unknown_type: 623 self.unsupported( 624 "Cannot convert untyped key-value definitions (try annotate_types)." 625 ) 626 return self.func("ROW", *values) 627 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 628 629 def interval_sql(self, expression: exp.Interval) -> str: 630 if expression.this and expression.text("unit").upper().startswith("WEEK"): 631 return f"({expression.this.name} * INTERVAL '7' DAY)" 632 return super().interval_sql(expression) 633 634 def transaction_sql(self, expression: exp.Transaction) -> str: 635 modes = expression.args.get("modes") 636 modes = f" {', '.join(modes)}" if modes else "" 637 return f"START TRANSACTION{modes}" 638 639 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 640 start = expression.args["start"] 641 end = expression.args["end"] 642 step = expression.args.get("step") 643 644 if isinstance(start, exp.Cast): 645 target_type = start.to 646 elif isinstance(end, exp.Cast): 647 target_type = end.to 648 else: 649 target_type = None 650 651 if target_type and target_type.is_type("timestamp"): 652 if target_type is start.to: 653 end = exp.cast(end, target_type) 654 else: 655 start = exp.cast(start, target_type) 656 657 return self.func("SEQUENCE", start, end, step) 658 659 def offset_limit_modifiers( 660 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 661 ) -> t.List[str]: 662 return [ 663 self.sql(expression, "offset"), 664 self.sql(limit), 665 ] 666 667 def create_sql(self, expression: exp.Create) -> str: 668 """ 669 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 670 so we need to remove them 671 """ 672 kind = expression.args["kind"] 673 schema = expression.this 674 if kind == "VIEW" and schema.expressions: 675 expression.this.set("expressions", None) 676 return super().create_sql(expression) 677 678 def delete_sql(self, expression: exp.Delete) -> str: 679 """ 680 Presto only supports DELETE FROM for a single table without an alias, so we need 681 to remove the unnecessary parts. If the original DELETE statement contains more 682 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 683 """ 684 tables = expression.args.get("tables") or [expression.this] 685 if len(tables) > 1: 686 return super().delete_sql(expression) 687 688 table = tables[0] 689 expression.set("this", table) 690 expression.set("tables", None) 691 692 if isinstance(table, exp.Table): 693 table_alias = table.args.get("alias") 694 if table_alias: 695 table_alias.pop() 696 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 697 698 return super().delete_sql(expression)
244class Presto(Dialect): 245 INDEX_OFFSET = 1 246 NULL_ORDERING = "nulls_are_last" 247 TIME_FORMAT = MySQL.TIME_FORMAT 248 TIME_MAPPING = MySQL.TIME_MAPPING 249 STRICT_STRING_CONCAT = True 250 SUPPORTS_SEMI_ANTI_JOIN = False 251 TYPED_DIVISION = True 252 TABLESAMPLE_SIZE_IS_PERCENT = True 253 LOG_BASE_FIRST: t.Optional[bool] = None 254 255 # https://github.com/trinodb/trino/issues/17 256 # https://github.com/trinodb/trino/issues/12289 257 # https://github.com/prestodb/presto/issues/2863 258 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 259 260 class Tokenizer(tokens.Tokenizer): 261 UNICODE_STRINGS = [ 262 (prefix + q, q) 263 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 264 for prefix in ("U&", "u&") 265 ] 266 267 KEYWORDS = { 268 **tokens.Tokenizer.KEYWORDS, 269 "START": TokenType.BEGIN, 270 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 271 "ROW": TokenType.STRUCT, 272 "IPADDRESS": TokenType.IPADDRESS, 273 "IPPREFIX": TokenType.IPPREFIX, 274 "TDIGEST": TokenType.TDIGEST, 275 "HYPERLOGLOG": TokenType.HLLSKETCH, 276 } 277 KEYWORDS.pop("/*+") 278 KEYWORDS.pop("QUALIFY") 279 280 class Parser(parser.Parser): 281 VALUES_FOLLOWED_BY_PAREN = False 282 283 FUNCTIONS = { 284 **parser.Parser.FUNCTIONS, 285 "ARBITRARY": exp.AnyValue.from_arg_list, 286 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 287 "APPROX_PERCENTILE": _build_approx_percentile, 288 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 289 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 290 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 291 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 292 "CARDINALITY": exp.ArraySize.from_arg_list, 293 "CONTAINS": exp.ArrayContains.from_arg_list, 294 "DATE_ADD": lambda args: exp.DateAdd( 295 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 296 ), 297 "DATE_DIFF": lambda args: exp.DateDiff( 298 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 299 ), 300 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 301 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 302 "DATE_TRUNC": date_trunc_to_time, 303 "ELEMENT_AT": lambda args: exp.Bracket( 304 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 305 ), 306 "FROM_HEX": exp.Unhex.from_arg_list, 307 "FROM_UNIXTIME": _build_from_unixtime, 308 "FROM_UTF8": lambda args: exp.Decode( 309 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 310 ), 311 "NOW": exp.CurrentTimestamp.from_arg_list, 312 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 313 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 314 ), 315 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 316 this=seq_get(args, 0), 317 expression=seq_get(args, 1), 318 replacement=seq_get(args, 2) or exp.Literal.string(""), 319 ), 320 "ROW": exp.Struct.from_arg_list, 321 "SEQUENCE": exp.GenerateSeries.from_arg_list, 322 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 323 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 324 "STRPOS": lambda args: exp.StrPosition( 325 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 326 ), 327 "TO_CHAR": _build_to_char, 328 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 329 "TO_UTF8": lambda args: exp.Encode( 330 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 331 ), 332 "MD5": exp.MD5Digest.from_arg_list, 333 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 334 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 335 } 336 337 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 338 FUNCTION_PARSERS.pop("TRIM") 339 340 class Generator(generator.Generator): 341 INTERVAL_ALLOWS_PLURAL_FORM = False 342 JOIN_HINTS = False 343 TABLE_HINTS = False 344 QUERY_HINTS = False 345 IS_BOOL_ALLOWED = False 346 TZ_TO_WITH_TIME_ZONE = True 347 NVL2_SUPPORTED = False 348 STRUCT_DELIMITER = ("(", ")") 349 LIMIT_ONLY_LITERALS = True 350 SUPPORTS_SINGLE_ARG_CONCAT = False 351 LIKE_PROPERTY_INSIDE_SCHEMA = True 352 MULTI_ARG_DISTINCT = False 353 SUPPORTS_TO_NUMBER = False 354 HEX_FUNC = "TO_HEX" 355 PARSE_JSON_NAME = "JSON_PARSE" 356 PAD_FILL_PATTERN_IS_REQUIRED = True 357 358 PROPERTIES_LOCATION = { 359 **generator.Generator.PROPERTIES_LOCATION, 360 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 361 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 362 } 363 364 TYPE_MAPPING = { 365 **generator.Generator.TYPE_MAPPING, 366 exp.DataType.Type.INT: "INTEGER", 367 exp.DataType.Type.FLOAT: "REAL", 368 exp.DataType.Type.BINARY: "VARBINARY", 369 exp.DataType.Type.TEXT: "VARCHAR", 370 exp.DataType.Type.TIMETZ: "TIME", 371 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 372 exp.DataType.Type.STRUCT: "ROW", 373 exp.DataType.Type.DATETIME: "TIMESTAMP", 374 exp.DataType.Type.DATETIME64: "TIMESTAMP", 375 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 376 } 377 378 TRANSFORMS = { 379 **generator.Generator.TRANSFORMS, 380 exp.AnyValue: rename_func("ARBITRARY"), 381 exp.ApproxDistinct: lambda self, e: self.func( 382 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 383 ), 384 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 385 exp.ArgMax: rename_func("MAX_BY"), 386 exp.ArgMin: rename_func("MIN_BY"), 387 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 388 exp.ArrayAny: rename_func("ANY_MATCH"), 389 exp.ArrayConcat: rename_func("CONCAT"), 390 exp.ArrayContains: rename_func("CONTAINS"), 391 exp.ArraySize: rename_func("CARDINALITY"), 392 exp.ArrayToString: rename_func("ARRAY_JOIN"), 393 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 394 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 395 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 396 exp.BitwiseLeftShift: lambda self, e: self.func( 397 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 398 ), 399 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 400 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 401 exp.BitwiseRightShift: lambda self, e: self.func( 402 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 403 ), 404 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 405 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 406 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 407 exp.DateAdd: _date_delta_sql("DATE_ADD"), 408 exp.DateDiff: lambda self, e: self.func( 409 "DATE_DIFF", unit_to_str(e), e.expression, e.this 410 ), 411 exp.DateStrToDate: datestrtodate_sql, 412 exp.DateToDi: lambda self, 413 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 414 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 415 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 416 exp.DiToDate: lambda self, 417 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 418 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 419 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 420 exp.First: _first_last_sql, 421 exp.FirstValue: _first_last_sql, 422 exp.FromTimeZone: lambda self, 423 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 424 exp.Group: transforms.preprocess([transforms.unalias_group]), 425 exp.GroupConcat: lambda self, e: self.func( 426 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 427 ), 428 exp.If: if_sql(), 429 exp.ILike: no_ilike_sql, 430 exp.Initcap: _initcap_sql, 431 exp.JSONExtract: _jsonextract_sql, 432 exp.Last: _first_last_sql, 433 exp.LastValue: _first_last_sql, 434 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 435 exp.Lateral: _explode_to_unnest_sql, 436 exp.Left: left_to_substring_sql, 437 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 438 exp.LogicalAnd: rename_func("BOOL_AND"), 439 exp.LogicalOr: rename_func("BOOL_OR"), 440 exp.Pivot: no_pivot_sql, 441 exp.Quantile: _quantile_sql, 442 exp.RegexpExtract: regexp_extract_sql, 443 exp.Right: right_to_substring_sql, 444 exp.SafeDivide: no_safe_divide_sql, 445 exp.Schema: _schema_sql, 446 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 447 exp.Select: transforms.preprocess( 448 [ 449 transforms.eliminate_qualify, 450 transforms.eliminate_distinct_on, 451 transforms.explode_to_unnest(1), 452 transforms.eliminate_semi_and_anti_joins, 453 ] 454 ), 455 exp.SortArray: _no_sort_array, 456 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 457 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 458 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 459 exp.StrToTime: _str_to_time_sql, 460 exp.StructExtract: struct_extract_sql, 461 exp.Table: transforms.preprocess([_unnest_sequence]), 462 exp.Timestamp: no_timestamp_sql, 463 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 464 exp.TimestampTrunc: timestamptrunc_sql(), 465 exp.TimeStrToDate: timestrtotime_sql, 466 exp.TimeStrToTime: timestrtotime_sql, 467 exp.TimeStrToUnix: lambda self, e: self.func( 468 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 469 ), 470 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 471 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 472 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 473 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 474 exp.TsOrDiToDi: lambda self, 475 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 476 exp.TsOrDsAdd: _ts_or_ds_add_sql, 477 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 478 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 479 exp.Unhex: rename_func("FROM_HEX"), 480 exp.UnixToStr: lambda self, 481 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 482 exp.UnixToTime: _unix_to_time_sql, 483 exp.UnixToTimeStr: lambda self, 484 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 485 exp.VariancePop: rename_func("VAR_POP"), 486 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 487 exp.WithinGroup: transforms.preprocess( 488 [transforms.remove_within_group_for_percentiles] 489 ), 490 exp.Xor: bool_xor_sql, 491 exp.MD5Digest: rename_func("MD5"), 492 exp.SHA: rename_func("SHA1"), 493 exp.SHA2: sha256_sql, 494 } 495 496 RESERVED_KEYWORDS = { 497 "alter", 498 "and", 499 "as", 500 "between", 501 "by", 502 "case", 503 "cast", 504 "constraint", 505 "create", 506 "cross", 507 "current_time", 508 "current_timestamp", 509 "deallocate", 510 "delete", 511 "describe", 512 "distinct", 513 "drop", 514 "else", 515 "end", 516 "escape", 517 "except", 518 "execute", 519 "exists", 520 "extract", 521 "false", 522 "for", 523 "from", 524 "full", 525 "group", 526 "having", 527 "in", 528 "inner", 529 "insert", 530 "intersect", 531 "into", 532 "is", 533 "join", 534 "left", 535 "like", 536 "natural", 537 "not", 538 "null", 539 "on", 540 "or", 541 "order", 542 "outer", 543 "prepare", 544 "right", 545 "select", 546 "table", 547 "then", 548 "true", 549 "union", 550 "using", 551 "values", 552 "when", 553 "where", 554 "with", 555 } 556 557 def md5_sql(self, expression: exp.MD5) -> str: 558 this = expression.this 559 560 if not this.type: 561 from sqlglot.optimizer.annotate_types import annotate_types 562 563 this = annotate_types(this) 564 565 if this.is_type(*exp.DataType.TEXT_TYPES): 566 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 567 568 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 569 570 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 571 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 572 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 573 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 574 # which seems to be using the same time mapping as Hive, as per: 575 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 576 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 577 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 578 parse_with_tz = self.func( 579 "PARSE_DATETIME", 580 value_as_text, 581 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 582 ) 583 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 584 return self.func("TO_UNIXTIME", coalesced) 585 586 def bracket_sql(self, expression: exp.Bracket) -> str: 587 if expression.args.get("safe"): 588 return self.func( 589 "ELEMENT_AT", 590 expression.this, 591 seq_get( 592 apply_index_offset( 593 expression.this, 594 expression.expressions, 595 1 - expression.args.get("offset", 0), 596 ), 597 0, 598 ), 599 ) 600 return super().bracket_sql(expression) 601 602 def struct_sql(self, expression: exp.Struct) -> str: 603 from sqlglot.optimizer.annotate_types import annotate_types 604 605 expression = annotate_types(expression) 606 values: t.List[str] = [] 607 schema: t.List[str] = [] 608 unknown_type = False 609 610 for e in expression.expressions: 611 if isinstance(e, exp.PropertyEQ): 612 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 613 unknown_type = True 614 else: 615 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 616 values.append(self.sql(e, "expression")) 617 else: 618 values.append(self.sql(e)) 619 620 size = len(expression.expressions) 621 622 if not size or len(schema) != size: 623 if unknown_type: 624 self.unsupported( 625 "Cannot convert untyped key-value definitions (try annotate_types)." 626 ) 627 return self.func("ROW", *values) 628 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 629 630 def interval_sql(self, expression: exp.Interval) -> str: 631 if expression.this and expression.text("unit").upper().startswith("WEEK"): 632 return f"({expression.this.name} * INTERVAL '7' DAY)" 633 return super().interval_sql(expression) 634 635 def transaction_sql(self, expression: exp.Transaction) -> str: 636 modes = expression.args.get("modes") 637 modes = f" {', '.join(modes)}" if modes else "" 638 return f"START TRANSACTION{modes}" 639 640 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 641 start = expression.args["start"] 642 end = expression.args["end"] 643 step = expression.args.get("step") 644 645 if isinstance(start, exp.Cast): 646 target_type = start.to 647 elif isinstance(end, exp.Cast): 648 target_type = end.to 649 else: 650 target_type = None 651 652 if target_type and target_type.is_type("timestamp"): 653 if target_type is start.to: 654 end = exp.cast(end, target_type) 655 else: 656 start = exp.cast(start, target_type) 657 658 return self.func("SEQUENCE", start, end, step) 659 660 def offset_limit_modifiers( 661 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 662 ) -> t.List[str]: 663 return [ 664 self.sql(expression, "offset"), 665 self.sql(limit), 666 ] 667 668 def create_sql(self, expression: exp.Create) -> str: 669 """ 670 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 671 so we need to remove them 672 """ 673 kind = expression.args["kind"] 674 schema = expression.this 675 if kind == "VIEW" and schema.expressions: 676 expression.this.set("expressions", None) 677 return super().create_sql(expression) 678 679 def delete_sql(self, expression: exp.Delete) -> str: 680 """ 681 Presto only supports DELETE FROM for a single table without an alias, so we need 682 to remove the unnecessary parts. If the original DELETE statement contains more 683 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 684 """ 685 tables = expression.args.get("tables") or [expression.this] 686 if len(tables) > 1: 687 return super().delete_sql(expression) 688 689 table = tables[0] 690 expression.set("this", table) 691 expression.set("tables", None) 692 693 if isinstance(table, exp.Table): 694 table_alias = table.args.get("alias") 695 if table_alias: 696 table_alias.pop() 697 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 698 699 return super().delete_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Associates this dialect's time formats with their equivalent Python strftime
formats.
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- SUPPORTS_USER_DEFINED_TYPES
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- DATE_PART_MAPPING
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
260 class Tokenizer(tokens.Tokenizer): 261 UNICODE_STRINGS = [ 262 (prefix + q, q) 263 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 264 for prefix in ("U&", "u&") 265 ] 266 267 KEYWORDS = { 268 **tokens.Tokenizer.KEYWORDS, 269 "START": TokenType.BEGIN, 270 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 271 "ROW": TokenType.STRUCT, 272 "IPADDRESS": TokenType.IPADDRESS, 273 "IPPREFIX": TokenType.IPPREFIX, 274 "TDIGEST": TokenType.TDIGEST, 275 "HYPERLOGLOG": TokenType.HLLSKETCH, 276 } 277 KEYWORDS.pop("/*+") 278 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
280 class Parser(parser.Parser): 281 VALUES_FOLLOWED_BY_PAREN = False 282 283 FUNCTIONS = { 284 **parser.Parser.FUNCTIONS, 285 "ARBITRARY": exp.AnyValue.from_arg_list, 286 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 287 "APPROX_PERCENTILE": _build_approx_percentile, 288 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 289 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 290 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 291 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 292 "CARDINALITY": exp.ArraySize.from_arg_list, 293 "CONTAINS": exp.ArrayContains.from_arg_list, 294 "DATE_ADD": lambda args: exp.DateAdd( 295 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 296 ), 297 "DATE_DIFF": lambda args: exp.DateDiff( 298 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 299 ), 300 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 301 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 302 "DATE_TRUNC": date_trunc_to_time, 303 "ELEMENT_AT": lambda args: exp.Bracket( 304 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 305 ), 306 "FROM_HEX": exp.Unhex.from_arg_list, 307 "FROM_UNIXTIME": _build_from_unixtime, 308 "FROM_UTF8": lambda args: exp.Decode( 309 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 310 ), 311 "NOW": exp.CurrentTimestamp.from_arg_list, 312 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 313 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 314 ), 315 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 316 this=seq_get(args, 0), 317 expression=seq_get(args, 1), 318 replacement=seq_get(args, 2) or exp.Literal.string(""), 319 ), 320 "ROW": exp.Struct.from_arg_list, 321 "SEQUENCE": exp.GenerateSeries.from_arg_list, 322 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 323 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 324 "STRPOS": lambda args: exp.StrPosition( 325 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 326 ), 327 "TO_CHAR": _build_to_char, 328 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 329 "TO_UTF8": lambda args: exp.Encode( 330 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 331 ), 332 "MD5": exp.MD5Digest.from_arg_list, 333 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 334 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 335 } 336 337 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 338 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
340 class Generator(generator.Generator): 341 INTERVAL_ALLOWS_PLURAL_FORM = False 342 JOIN_HINTS = False 343 TABLE_HINTS = False 344 QUERY_HINTS = False 345 IS_BOOL_ALLOWED = False 346 TZ_TO_WITH_TIME_ZONE = True 347 NVL2_SUPPORTED = False 348 STRUCT_DELIMITER = ("(", ")") 349 LIMIT_ONLY_LITERALS = True 350 SUPPORTS_SINGLE_ARG_CONCAT = False 351 LIKE_PROPERTY_INSIDE_SCHEMA = True 352 MULTI_ARG_DISTINCT = False 353 SUPPORTS_TO_NUMBER = False 354 HEX_FUNC = "TO_HEX" 355 PARSE_JSON_NAME = "JSON_PARSE" 356 PAD_FILL_PATTERN_IS_REQUIRED = True 357 358 PROPERTIES_LOCATION = { 359 **generator.Generator.PROPERTIES_LOCATION, 360 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 361 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 362 } 363 364 TYPE_MAPPING = { 365 **generator.Generator.TYPE_MAPPING, 366 exp.DataType.Type.INT: "INTEGER", 367 exp.DataType.Type.FLOAT: "REAL", 368 exp.DataType.Type.BINARY: "VARBINARY", 369 exp.DataType.Type.TEXT: "VARCHAR", 370 exp.DataType.Type.TIMETZ: "TIME", 371 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 372 exp.DataType.Type.STRUCT: "ROW", 373 exp.DataType.Type.DATETIME: "TIMESTAMP", 374 exp.DataType.Type.DATETIME64: "TIMESTAMP", 375 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 376 } 377 378 TRANSFORMS = { 379 **generator.Generator.TRANSFORMS, 380 exp.AnyValue: rename_func("ARBITRARY"), 381 exp.ApproxDistinct: lambda self, e: self.func( 382 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 383 ), 384 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 385 exp.ArgMax: rename_func("MAX_BY"), 386 exp.ArgMin: rename_func("MIN_BY"), 387 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 388 exp.ArrayAny: rename_func("ANY_MATCH"), 389 exp.ArrayConcat: rename_func("CONCAT"), 390 exp.ArrayContains: rename_func("CONTAINS"), 391 exp.ArraySize: rename_func("CARDINALITY"), 392 exp.ArrayToString: rename_func("ARRAY_JOIN"), 393 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 394 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 395 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 396 exp.BitwiseLeftShift: lambda self, e: self.func( 397 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 398 ), 399 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 400 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 401 exp.BitwiseRightShift: lambda self, e: self.func( 402 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 403 ), 404 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 405 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 406 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 407 exp.DateAdd: _date_delta_sql("DATE_ADD"), 408 exp.DateDiff: lambda self, e: self.func( 409 "DATE_DIFF", unit_to_str(e), e.expression, e.this 410 ), 411 exp.DateStrToDate: datestrtodate_sql, 412 exp.DateToDi: lambda self, 413 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 414 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 415 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 416 exp.DiToDate: lambda self, 417 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 418 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 419 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 420 exp.First: _first_last_sql, 421 exp.FirstValue: _first_last_sql, 422 exp.FromTimeZone: lambda self, 423 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 424 exp.Group: transforms.preprocess([transforms.unalias_group]), 425 exp.GroupConcat: lambda self, e: self.func( 426 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 427 ), 428 exp.If: if_sql(), 429 exp.ILike: no_ilike_sql, 430 exp.Initcap: _initcap_sql, 431 exp.JSONExtract: _jsonextract_sql, 432 exp.Last: _first_last_sql, 433 exp.LastValue: _first_last_sql, 434 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 435 exp.Lateral: _explode_to_unnest_sql, 436 exp.Left: left_to_substring_sql, 437 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 438 exp.LogicalAnd: rename_func("BOOL_AND"), 439 exp.LogicalOr: rename_func("BOOL_OR"), 440 exp.Pivot: no_pivot_sql, 441 exp.Quantile: _quantile_sql, 442 exp.RegexpExtract: regexp_extract_sql, 443 exp.Right: right_to_substring_sql, 444 exp.SafeDivide: no_safe_divide_sql, 445 exp.Schema: _schema_sql, 446 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 447 exp.Select: transforms.preprocess( 448 [ 449 transforms.eliminate_qualify, 450 transforms.eliminate_distinct_on, 451 transforms.explode_to_unnest(1), 452 transforms.eliminate_semi_and_anti_joins, 453 ] 454 ), 455 exp.SortArray: _no_sort_array, 456 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 457 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 458 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 459 exp.StrToTime: _str_to_time_sql, 460 exp.StructExtract: struct_extract_sql, 461 exp.Table: transforms.preprocess([_unnest_sequence]), 462 exp.Timestamp: no_timestamp_sql, 463 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 464 exp.TimestampTrunc: timestamptrunc_sql(), 465 exp.TimeStrToDate: timestrtotime_sql, 466 exp.TimeStrToTime: timestrtotime_sql, 467 exp.TimeStrToUnix: lambda self, e: self.func( 468 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 469 ), 470 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 471 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 472 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 473 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 474 exp.TsOrDiToDi: lambda self, 475 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 476 exp.TsOrDsAdd: _ts_or_ds_add_sql, 477 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 478 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 479 exp.Unhex: rename_func("FROM_HEX"), 480 exp.UnixToStr: lambda self, 481 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 482 exp.UnixToTime: _unix_to_time_sql, 483 exp.UnixToTimeStr: lambda self, 484 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 485 exp.VariancePop: rename_func("VAR_POP"), 486 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 487 exp.WithinGroup: transforms.preprocess( 488 [transforms.remove_within_group_for_percentiles] 489 ), 490 exp.Xor: bool_xor_sql, 491 exp.MD5Digest: rename_func("MD5"), 492 exp.SHA: rename_func("SHA1"), 493 exp.SHA2: sha256_sql, 494 } 495 496 RESERVED_KEYWORDS = { 497 "alter", 498 "and", 499 "as", 500 "between", 501 "by", 502 "case", 503 "cast", 504 "constraint", 505 "create", 506 "cross", 507 "current_time", 508 "current_timestamp", 509 "deallocate", 510 "delete", 511 "describe", 512 "distinct", 513 "drop", 514 "else", 515 "end", 516 "escape", 517 "except", 518 "execute", 519 "exists", 520 "extract", 521 "false", 522 "for", 523 "from", 524 "full", 525 "group", 526 "having", 527 "in", 528 "inner", 529 "insert", 530 "intersect", 531 "into", 532 "is", 533 "join", 534 "left", 535 "like", 536 "natural", 537 "not", 538 "null", 539 "on", 540 "or", 541 "order", 542 "outer", 543 "prepare", 544 "right", 545 "select", 546 "table", 547 "then", 548 "true", 549 "union", 550 "using", 551 "values", 552 "when", 553 "where", 554 "with", 555 } 556 557 def md5_sql(self, expression: exp.MD5) -> str: 558 this = expression.this 559 560 if not this.type: 561 from sqlglot.optimizer.annotate_types import annotate_types 562 563 this = annotate_types(this) 564 565 if this.is_type(*exp.DataType.TEXT_TYPES): 566 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 567 568 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 569 570 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 571 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 572 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 573 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 574 # which seems to be using the same time mapping as Hive, as per: 575 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 576 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 577 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 578 parse_with_tz = self.func( 579 "PARSE_DATETIME", 580 value_as_text, 581 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 582 ) 583 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 584 return self.func("TO_UNIXTIME", coalesced) 585 586 def bracket_sql(self, expression: exp.Bracket) -> str: 587 if expression.args.get("safe"): 588 return self.func( 589 "ELEMENT_AT", 590 expression.this, 591 seq_get( 592 apply_index_offset( 593 expression.this, 594 expression.expressions, 595 1 - expression.args.get("offset", 0), 596 ), 597 0, 598 ), 599 ) 600 return super().bracket_sql(expression) 601 602 def struct_sql(self, expression: exp.Struct) -> str: 603 from sqlglot.optimizer.annotate_types import annotate_types 604 605 expression = annotate_types(expression) 606 values: t.List[str] = [] 607 schema: t.List[str] = [] 608 unknown_type = False 609 610 for e in expression.expressions: 611 if isinstance(e, exp.PropertyEQ): 612 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 613 unknown_type = True 614 else: 615 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 616 values.append(self.sql(e, "expression")) 617 else: 618 values.append(self.sql(e)) 619 620 size = len(expression.expressions) 621 622 if not size or len(schema) != size: 623 if unknown_type: 624 self.unsupported( 625 "Cannot convert untyped key-value definitions (try annotate_types)." 626 ) 627 return self.func("ROW", *values) 628 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 629 630 def interval_sql(self, expression: exp.Interval) -> str: 631 if expression.this and expression.text("unit").upper().startswith("WEEK"): 632 return f"({expression.this.name} * INTERVAL '7' DAY)" 633 return super().interval_sql(expression) 634 635 def transaction_sql(self, expression: exp.Transaction) -> str: 636 modes = expression.args.get("modes") 637 modes = f" {', '.join(modes)}" if modes else "" 638 return f"START TRANSACTION{modes}" 639 640 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 641 start = expression.args["start"] 642 end = expression.args["end"] 643 step = expression.args.get("step") 644 645 if isinstance(start, exp.Cast): 646 target_type = start.to 647 elif isinstance(end, exp.Cast): 648 target_type = end.to 649 else: 650 target_type = None 651 652 if target_type and target_type.is_type("timestamp"): 653 if target_type is start.to: 654 end = exp.cast(end, target_type) 655 else: 656 start = exp.cast(start, target_type) 657 658 return self.func("SEQUENCE", start, end, step) 659 660 def offset_limit_modifiers( 661 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 662 ) -> t.List[str]: 663 return [ 664 self.sql(expression, "offset"), 665 self.sql(limit), 666 ] 667 668 def create_sql(self, expression: exp.Create) -> str: 669 """ 670 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 671 so we need to remove them 672 """ 673 kind = expression.args["kind"] 674 schema = expression.this 675 if kind == "VIEW" and schema.expressions: 676 expression.this.set("expressions", None) 677 return super().create_sql(expression) 678 679 def delete_sql(self, expression: exp.Delete) -> str: 680 """ 681 Presto only supports DELETE FROM for a single table without an alias, so we need 682 to remove the unnecessary parts. If the original DELETE statement contains more 683 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 684 """ 685 tables = expression.args.get("tables") or [expression.this] 686 if len(tables) > 1: 687 return super().delete_sql(expression) 688 689 table = tables[0] 690 expression.set("this", table) 691 expression.set("tables", None) 692 693 if isinstance(table, exp.Table): 694 table_alias = table.args.get("alias") 695 if table_alias: 696 table_alias.pop() 697 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 698 699 return super().delete_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
557 def md5_sql(self, expression: exp.MD5) -> str: 558 this = expression.this 559 560 if not this.type: 561 from sqlglot.optimizer.annotate_types import annotate_types 562 563 this = annotate_types(this) 564 565 if this.is_type(*exp.DataType.TEXT_TYPES): 566 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 567 568 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
570 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 571 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 572 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 573 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 574 # which seems to be using the same time mapping as Hive, as per: 575 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 576 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 577 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 578 parse_with_tz = self.func( 579 "PARSE_DATETIME", 580 value_as_text, 581 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 582 ) 583 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 584 return self.func("TO_UNIXTIME", coalesced)
586 def bracket_sql(self, expression: exp.Bracket) -> str: 587 if expression.args.get("safe"): 588 return self.func( 589 "ELEMENT_AT", 590 expression.this, 591 seq_get( 592 apply_index_offset( 593 expression.this, 594 expression.expressions, 595 1 - expression.args.get("offset", 0), 596 ), 597 0, 598 ), 599 ) 600 return super().bracket_sql(expression)
602 def struct_sql(self, expression: exp.Struct) -> str: 603 from sqlglot.optimizer.annotate_types import annotate_types 604 605 expression = annotate_types(expression) 606 values: t.List[str] = [] 607 schema: t.List[str] = [] 608 unknown_type = False 609 610 for e in expression.expressions: 611 if isinstance(e, exp.PropertyEQ): 612 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 613 unknown_type = True 614 else: 615 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 616 values.append(self.sql(e, "expression")) 617 else: 618 values.append(self.sql(e)) 619 620 size = len(expression.expressions) 621 622 if not size or len(schema) != size: 623 if unknown_type: 624 self.unsupported( 625 "Cannot convert untyped key-value definitions (try annotate_types)." 626 ) 627 return self.func("ROW", *values) 628 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
640 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 641 start = expression.args["start"] 642 end = expression.args["end"] 643 step = expression.args.get("step") 644 645 if isinstance(start, exp.Cast): 646 target_type = start.to 647 elif isinstance(end, exp.Cast): 648 target_type = end.to 649 else: 650 target_type = None 651 652 if target_type and target_type.is_type("timestamp"): 653 if target_type is start.to: 654 end = exp.cast(end, target_type) 655 else: 656 start = exp.cast(start, target_type) 657 658 return self.func("SEQUENCE", start, end, step)
668 def create_sql(self, expression: exp.Create) -> str: 669 """ 670 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 671 so we need to remove them 672 """ 673 kind = expression.args["kind"] 674 schema = expression.this 675 if kind == "VIEW" and schema.expressions: 676 expression.this.set("expressions", None) 677 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
679 def delete_sql(self, expression: exp.Delete) -> str: 680 """ 681 Presto only supports DELETE FROM for a single table without an alias, so we need 682 to remove the unnecessary parts. If the original DELETE statement contains more 683 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 684 """ 685 tables = expression.args.get("tables") or [expression.this] 686 if len(tables) > 1: 687 return super().delete_sql(expression) 688 689 table = tables[0] 690 expression.set("this", table) 691 expression.set("tables", None) 692 693 if isinstance(table, exp.Table): 694 table_alias = table.args.get("alias") 695 if table_alias: 696 table_alias.pop() 697 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 698 699 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- length_sql
- rand_sql
- strtodate_sql
- strtotime_sql
- changes_sql
- pad_sql