sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120class _Parser(type): 121 def __new__(cls, clsname, bases, attrs): 122 klass = super().__new__(cls, clsname, bases, attrs) 123 124 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 125 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 126 127 return klass 128 129 130class Parser(metaclass=_Parser): 131 """ 132 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 133 134 Args: 135 error_level: The desired error level. 136 Default: ErrorLevel.IMMEDIATE 137 error_message_context: The amount of context to capture from a query string when displaying 138 the error message (in number of characters). 139 Default: 100 140 max_errors: Maximum number of error messages to include in a raised ParseError. 141 This is only relevant if error_level is ErrorLevel.RAISE. 142 Default: 3 143 """ 144 145 FUNCTIONS: t.Dict[str, t.Callable] = { 146 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 147 "CONCAT": lambda args, dialect: exp.Concat( 148 expressions=args, 149 safe=not dialect.STRICT_STRING_CONCAT, 150 coalesce=dialect.CONCAT_COALESCE, 151 ), 152 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 153 expressions=args, 154 safe=not dialect.STRICT_STRING_CONCAT, 155 coalesce=dialect.CONCAT_COALESCE, 156 ), 157 "DATE_TO_DATE_STR": lambda args: exp.Cast( 158 this=seq_get(args, 0), 159 to=exp.DataType(this=exp.DataType.Type.TEXT), 160 ), 161 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 162 "HEX": build_hex, 163 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 164 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 165 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 166 "LIKE": build_like, 167 "LOG": build_logarithm, 168 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 169 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 170 "LOWER": build_lower, 171 "LPAD": lambda args: build_pad(args), 172 "LEFTPAD": lambda args: build_pad(args), 173 "MOD": build_mod, 174 "RPAD": lambda args: build_pad(args, is_left=False), 175 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 176 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 177 if len(args) != 2 178 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 179 "TIME_TO_TIME_STR": lambda args: exp.Cast( 180 this=seq_get(args, 0), 181 to=exp.DataType(this=exp.DataType.Type.TEXT), 182 ), 183 "TO_HEX": build_hex, 184 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 185 this=exp.Cast( 186 this=seq_get(args, 0), 187 to=exp.DataType(this=exp.DataType.Type.TEXT), 188 ), 189 start=exp.Literal.number(1), 190 length=exp.Literal.number(10), 191 ), 192 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 193 "UPPER": build_upper, 194 "VAR_MAP": build_var_map, 195 } 196 197 NO_PAREN_FUNCTIONS = { 198 TokenType.CURRENT_DATE: exp.CurrentDate, 199 TokenType.CURRENT_DATETIME: exp.CurrentDate, 200 TokenType.CURRENT_TIME: exp.CurrentTime, 201 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 202 TokenType.CURRENT_USER: exp.CurrentUser, 203 } 204 205 STRUCT_TYPE_TOKENS = { 206 TokenType.NESTED, 207 TokenType.OBJECT, 208 TokenType.STRUCT, 209 } 210 211 NESTED_TYPE_TOKENS = { 212 TokenType.ARRAY, 213 TokenType.LIST, 214 TokenType.LOWCARDINALITY, 215 TokenType.MAP, 216 TokenType.NULLABLE, 217 *STRUCT_TYPE_TOKENS, 218 } 219 220 ENUM_TYPE_TOKENS = { 221 TokenType.ENUM, 222 TokenType.ENUM8, 223 TokenType.ENUM16, 224 } 225 226 AGGREGATE_TYPE_TOKENS = { 227 TokenType.AGGREGATEFUNCTION, 228 TokenType.SIMPLEAGGREGATEFUNCTION, 229 } 230 231 TYPE_TOKENS = { 232 TokenType.BIT, 233 TokenType.BOOLEAN, 234 TokenType.TINYINT, 235 TokenType.UTINYINT, 236 TokenType.SMALLINT, 237 TokenType.USMALLINT, 238 TokenType.INT, 239 TokenType.UINT, 240 TokenType.BIGINT, 241 TokenType.UBIGINT, 242 TokenType.INT128, 243 TokenType.UINT128, 244 TokenType.INT256, 245 TokenType.UINT256, 246 TokenType.MEDIUMINT, 247 TokenType.UMEDIUMINT, 248 TokenType.FIXEDSTRING, 249 TokenType.FLOAT, 250 TokenType.DOUBLE, 251 TokenType.CHAR, 252 TokenType.NCHAR, 253 TokenType.VARCHAR, 254 TokenType.NVARCHAR, 255 TokenType.BPCHAR, 256 TokenType.TEXT, 257 TokenType.MEDIUMTEXT, 258 TokenType.LONGTEXT, 259 TokenType.MEDIUMBLOB, 260 TokenType.LONGBLOB, 261 TokenType.BINARY, 262 TokenType.VARBINARY, 263 TokenType.JSON, 264 TokenType.JSONB, 265 TokenType.INTERVAL, 266 TokenType.TINYBLOB, 267 TokenType.TINYTEXT, 268 TokenType.TIME, 269 TokenType.TIMETZ, 270 TokenType.TIMESTAMP, 271 TokenType.TIMESTAMP_S, 272 TokenType.TIMESTAMP_MS, 273 TokenType.TIMESTAMP_NS, 274 TokenType.TIMESTAMPTZ, 275 TokenType.TIMESTAMPLTZ, 276 TokenType.TIMESTAMPNTZ, 277 TokenType.DATETIME, 278 TokenType.DATETIME64, 279 TokenType.DATE, 280 TokenType.DATE32, 281 TokenType.INT4RANGE, 282 TokenType.INT4MULTIRANGE, 283 TokenType.INT8RANGE, 284 TokenType.INT8MULTIRANGE, 285 TokenType.NUMRANGE, 286 TokenType.NUMMULTIRANGE, 287 TokenType.TSRANGE, 288 TokenType.TSMULTIRANGE, 289 TokenType.TSTZRANGE, 290 TokenType.TSTZMULTIRANGE, 291 TokenType.DATERANGE, 292 TokenType.DATEMULTIRANGE, 293 TokenType.DECIMAL, 294 TokenType.UDECIMAL, 295 TokenType.BIGDECIMAL, 296 TokenType.UUID, 297 TokenType.GEOGRAPHY, 298 TokenType.GEOMETRY, 299 TokenType.HLLSKETCH, 300 TokenType.HSTORE, 301 TokenType.PSEUDO_TYPE, 302 TokenType.SUPER, 303 TokenType.SERIAL, 304 TokenType.SMALLSERIAL, 305 TokenType.BIGSERIAL, 306 TokenType.XML, 307 TokenType.YEAR, 308 TokenType.UNIQUEIDENTIFIER, 309 TokenType.USERDEFINED, 310 TokenType.MONEY, 311 TokenType.SMALLMONEY, 312 TokenType.ROWVERSION, 313 TokenType.IMAGE, 314 TokenType.VARIANT, 315 TokenType.VECTOR, 316 TokenType.OBJECT, 317 TokenType.OBJECT_IDENTIFIER, 318 TokenType.INET, 319 TokenType.IPADDRESS, 320 TokenType.IPPREFIX, 321 TokenType.IPV4, 322 TokenType.IPV6, 323 TokenType.UNKNOWN, 324 TokenType.NULL, 325 TokenType.NAME, 326 TokenType.TDIGEST, 327 *ENUM_TYPE_TOKENS, 328 *NESTED_TYPE_TOKENS, 329 *AGGREGATE_TYPE_TOKENS, 330 } 331 332 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 333 TokenType.BIGINT: TokenType.UBIGINT, 334 TokenType.INT: TokenType.UINT, 335 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 336 TokenType.SMALLINT: TokenType.USMALLINT, 337 TokenType.TINYINT: TokenType.UTINYINT, 338 TokenType.DECIMAL: TokenType.UDECIMAL, 339 } 340 341 SUBQUERY_PREDICATES = { 342 TokenType.ANY: exp.Any, 343 TokenType.ALL: exp.All, 344 TokenType.EXISTS: exp.Exists, 345 TokenType.SOME: exp.Any, 346 } 347 348 RESERVED_TOKENS = { 349 *Tokenizer.SINGLE_TOKENS.values(), 350 TokenType.SELECT, 351 } - {TokenType.IDENTIFIER} 352 353 DB_CREATABLES = { 354 TokenType.DATABASE, 355 TokenType.DICTIONARY, 356 TokenType.MODEL, 357 TokenType.SCHEMA, 358 TokenType.SEQUENCE, 359 TokenType.STORAGE_INTEGRATION, 360 TokenType.TABLE, 361 TokenType.TAG, 362 TokenType.VIEW, 363 TokenType.WAREHOUSE, 364 TokenType.STREAMLIT, 365 } 366 367 CREATABLES = { 368 TokenType.COLUMN, 369 TokenType.CONSTRAINT, 370 TokenType.FOREIGN_KEY, 371 TokenType.FUNCTION, 372 TokenType.INDEX, 373 TokenType.PROCEDURE, 374 *DB_CREATABLES, 375 } 376 377 # Tokens that can represent identifiers 378 ID_VAR_TOKENS = { 379 TokenType.VAR, 380 TokenType.ANTI, 381 TokenType.APPLY, 382 TokenType.ASC, 383 TokenType.ASOF, 384 TokenType.AUTO_INCREMENT, 385 TokenType.BEGIN, 386 TokenType.BPCHAR, 387 TokenType.CACHE, 388 TokenType.CASE, 389 TokenType.COLLATE, 390 TokenType.COMMAND, 391 TokenType.COMMENT, 392 TokenType.COMMIT, 393 TokenType.CONSTRAINT, 394 TokenType.COPY, 395 TokenType.DEFAULT, 396 TokenType.DELETE, 397 TokenType.DESC, 398 TokenType.DESCRIBE, 399 TokenType.DICTIONARY, 400 TokenType.DIV, 401 TokenType.END, 402 TokenType.EXECUTE, 403 TokenType.ESCAPE, 404 TokenType.FALSE, 405 TokenType.FIRST, 406 TokenType.FILTER, 407 TokenType.FINAL, 408 TokenType.FORMAT, 409 TokenType.FULL, 410 TokenType.IDENTIFIER, 411 TokenType.IS, 412 TokenType.ISNULL, 413 TokenType.INTERVAL, 414 TokenType.KEEP, 415 TokenType.KILL, 416 TokenType.LEFT, 417 TokenType.LOAD, 418 TokenType.MERGE, 419 TokenType.NATURAL, 420 TokenType.NEXT, 421 TokenType.OFFSET, 422 TokenType.OPERATOR, 423 TokenType.ORDINALITY, 424 TokenType.OVERLAPS, 425 TokenType.OVERWRITE, 426 TokenType.PARTITION, 427 TokenType.PERCENT, 428 TokenType.PIVOT, 429 TokenType.PRAGMA, 430 TokenType.RANGE, 431 TokenType.RECURSIVE, 432 TokenType.REFERENCES, 433 TokenType.REFRESH, 434 TokenType.REPLACE, 435 TokenType.RIGHT, 436 TokenType.ROLLUP, 437 TokenType.ROW, 438 TokenType.ROWS, 439 TokenType.SEMI, 440 TokenType.SET, 441 TokenType.SETTINGS, 442 TokenType.SHOW, 443 TokenType.TEMPORARY, 444 TokenType.TOP, 445 TokenType.TRUE, 446 TokenType.TRUNCATE, 447 TokenType.UNIQUE, 448 TokenType.UNNEST, 449 TokenType.UNPIVOT, 450 TokenType.UPDATE, 451 TokenType.USE, 452 TokenType.VOLATILE, 453 TokenType.WINDOW, 454 *CREATABLES, 455 *SUBQUERY_PREDICATES, 456 *TYPE_TOKENS, 457 *NO_PAREN_FUNCTIONS, 458 } 459 460 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 461 462 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 463 TokenType.ANTI, 464 TokenType.APPLY, 465 TokenType.ASOF, 466 TokenType.FULL, 467 TokenType.LEFT, 468 TokenType.LOCK, 469 TokenType.NATURAL, 470 TokenType.OFFSET, 471 TokenType.RIGHT, 472 TokenType.SEMI, 473 TokenType.WINDOW, 474 } 475 476 ALIAS_TOKENS = ID_VAR_TOKENS 477 478 ARRAY_CONSTRUCTORS = { 479 "ARRAY": exp.Array, 480 "LIST": exp.List, 481 } 482 483 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 484 485 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 486 487 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 488 489 FUNC_TOKENS = { 490 TokenType.COLLATE, 491 TokenType.COMMAND, 492 TokenType.CURRENT_DATE, 493 TokenType.CURRENT_DATETIME, 494 TokenType.CURRENT_TIMESTAMP, 495 TokenType.CURRENT_TIME, 496 TokenType.CURRENT_USER, 497 TokenType.FILTER, 498 TokenType.FIRST, 499 TokenType.FORMAT, 500 TokenType.GLOB, 501 TokenType.IDENTIFIER, 502 TokenType.INDEX, 503 TokenType.ISNULL, 504 TokenType.ILIKE, 505 TokenType.INSERT, 506 TokenType.LIKE, 507 TokenType.MERGE, 508 TokenType.OFFSET, 509 TokenType.PRIMARY_KEY, 510 TokenType.RANGE, 511 TokenType.REPLACE, 512 TokenType.RLIKE, 513 TokenType.ROW, 514 TokenType.UNNEST, 515 TokenType.VAR, 516 TokenType.LEFT, 517 TokenType.RIGHT, 518 TokenType.SEQUENCE, 519 TokenType.DATE, 520 TokenType.DATETIME, 521 TokenType.TABLE, 522 TokenType.TIMESTAMP, 523 TokenType.TIMESTAMPTZ, 524 TokenType.TRUNCATE, 525 TokenType.WINDOW, 526 TokenType.XOR, 527 *TYPE_TOKENS, 528 *SUBQUERY_PREDICATES, 529 } 530 531 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 532 TokenType.AND: exp.And, 533 } 534 535 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 536 TokenType.COLON_EQ: exp.PropertyEQ, 537 } 538 539 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 540 TokenType.OR: exp.Or, 541 } 542 543 EQUALITY = { 544 TokenType.EQ: exp.EQ, 545 TokenType.NEQ: exp.NEQ, 546 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 547 } 548 549 COMPARISON = { 550 TokenType.GT: exp.GT, 551 TokenType.GTE: exp.GTE, 552 TokenType.LT: exp.LT, 553 TokenType.LTE: exp.LTE, 554 } 555 556 BITWISE = { 557 TokenType.AMP: exp.BitwiseAnd, 558 TokenType.CARET: exp.BitwiseXor, 559 TokenType.PIPE: exp.BitwiseOr, 560 } 561 562 TERM = { 563 TokenType.DASH: exp.Sub, 564 TokenType.PLUS: exp.Add, 565 TokenType.MOD: exp.Mod, 566 TokenType.COLLATE: exp.Collate, 567 } 568 569 FACTOR = { 570 TokenType.DIV: exp.IntDiv, 571 TokenType.LR_ARROW: exp.Distance, 572 TokenType.SLASH: exp.Div, 573 TokenType.STAR: exp.Mul, 574 } 575 576 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 577 578 TIMES = { 579 TokenType.TIME, 580 TokenType.TIMETZ, 581 } 582 583 TIMESTAMPS = { 584 TokenType.TIMESTAMP, 585 TokenType.TIMESTAMPTZ, 586 TokenType.TIMESTAMPLTZ, 587 *TIMES, 588 } 589 590 SET_OPERATIONS = { 591 TokenType.UNION, 592 TokenType.INTERSECT, 593 TokenType.EXCEPT, 594 } 595 596 JOIN_METHODS = { 597 TokenType.ASOF, 598 TokenType.NATURAL, 599 TokenType.POSITIONAL, 600 } 601 602 JOIN_SIDES = { 603 TokenType.LEFT, 604 TokenType.RIGHT, 605 TokenType.FULL, 606 } 607 608 JOIN_KINDS = { 609 TokenType.ANTI, 610 TokenType.CROSS, 611 TokenType.INNER, 612 TokenType.OUTER, 613 TokenType.SEMI, 614 TokenType.STRAIGHT_JOIN, 615 } 616 617 JOIN_HINTS: t.Set[str] = set() 618 619 LAMBDAS = { 620 TokenType.ARROW: lambda self, expressions: self.expression( 621 exp.Lambda, 622 this=self._replace_lambda( 623 self._parse_assignment(), 624 expressions, 625 ), 626 expressions=expressions, 627 ), 628 TokenType.FARROW: lambda self, expressions: self.expression( 629 exp.Kwarg, 630 this=exp.var(expressions[0].name), 631 expression=self._parse_assignment(), 632 ), 633 } 634 635 COLUMN_OPERATORS = { 636 TokenType.DOT: None, 637 TokenType.DCOLON: lambda self, this, to: self.expression( 638 exp.Cast if self.STRICT_CAST else exp.TryCast, 639 this=this, 640 to=to, 641 ), 642 TokenType.ARROW: lambda self, this, path: self.expression( 643 exp.JSONExtract, 644 this=this, 645 expression=self.dialect.to_json_path(path), 646 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 647 ), 648 TokenType.DARROW: lambda self, this, path: self.expression( 649 exp.JSONExtractScalar, 650 this=this, 651 expression=self.dialect.to_json_path(path), 652 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 653 ), 654 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 655 exp.JSONBExtract, 656 this=this, 657 expression=path, 658 ), 659 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 660 exp.JSONBExtractScalar, 661 this=this, 662 expression=path, 663 ), 664 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 665 exp.JSONBContains, 666 this=this, 667 expression=key, 668 ), 669 } 670 671 EXPRESSION_PARSERS = { 672 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 673 exp.Column: lambda self: self._parse_column(), 674 exp.Condition: lambda self: self._parse_assignment(), 675 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 676 exp.Expression: lambda self: self._parse_expression(), 677 exp.From: lambda self: self._parse_from(joins=True), 678 exp.Group: lambda self: self._parse_group(), 679 exp.Having: lambda self: self._parse_having(), 680 exp.Identifier: lambda self: self._parse_id_var(), 681 exp.Join: lambda self: self._parse_join(), 682 exp.Lambda: lambda self: self._parse_lambda(), 683 exp.Lateral: lambda self: self._parse_lateral(), 684 exp.Limit: lambda self: self._parse_limit(), 685 exp.Offset: lambda self: self._parse_offset(), 686 exp.Order: lambda self: self._parse_order(), 687 exp.Ordered: lambda self: self._parse_ordered(), 688 exp.Properties: lambda self: self._parse_properties(), 689 exp.Qualify: lambda self: self._parse_qualify(), 690 exp.Returning: lambda self: self._parse_returning(), 691 exp.Select: lambda self: self._parse_select(), 692 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 693 exp.Table: lambda self: self._parse_table_parts(), 694 exp.TableAlias: lambda self: self._parse_table_alias(), 695 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 696 exp.Where: lambda self: self._parse_where(), 697 exp.Window: lambda self: self._parse_named_window(), 698 exp.With: lambda self: self._parse_with(), 699 "JOIN_TYPE": lambda self: self._parse_join_parts(), 700 } 701 702 STATEMENT_PARSERS = { 703 TokenType.ALTER: lambda self: self._parse_alter(), 704 TokenType.BEGIN: lambda self: self._parse_transaction(), 705 TokenType.CACHE: lambda self: self._parse_cache(), 706 TokenType.COMMENT: lambda self: self._parse_comment(), 707 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 708 TokenType.COPY: lambda self: self._parse_copy(), 709 TokenType.CREATE: lambda self: self._parse_create(), 710 TokenType.DELETE: lambda self: self._parse_delete(), 711 TokenType.DESC: lambda self: self._parse_describe(), 712 TokenType.DESCRIBE: lambda self: self._parse_describe(), 713 TokenType.DROP: lambda self: self._parse_drop(), 714 TokenType.INSERT: lambda self: self._parse_insert(), 715 TokenType.KILL: lambda self: self._parse_kill(), 716 TokenType.LOAD: lambda self: self._parse_load(), 717 TokenType.MERGE: lambda self: self._parse_merge(), 718 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 719 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 720 TokenType.REFRESH: lambda self: self._parse_refresh(), 721 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 722 TokenType.SET: lambda self: self._parse_set(), 723 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 724 TokenType.UNCACHE: lambda self: self._parse_uncache(), 725 TokenType.UPDATE: lambda self: self._parse_update(), 726 TokenType.USE: lambda self: self.expression( 727 exp.Use, 728 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 729 this=self._parse_table(schema=False), 730 ), 731 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 732 } 733 734 UNARY_PARSERS = { 735 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 736 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 737 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 738 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 739 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 740 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 741 } 742 743 STRING_PARSERS = { 744 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 745 exp.RawString, this=token.text 746 ), 747 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 748 exp.National, this=token.text 749 ), 750 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 751 TokenType.STRING: lambda self, token: self.expression( 752 exp.Literal, this=token.text, is_string=True 753 ), 754 TokenType.UNICODE_STRING: lambda self, token: self.expression( 755 exp.UnicodeString, 756 this=token.text, 757 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 758 ), 759 } 760 761 NUMERIC_PARSERS = { 762 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 763 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 764 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 765 TokenType.NUMBER: lambda self, token: self.expression( 766 exp.Literal, this=token.text, is_string=False 767 ), 768 } 769 770 PRIMARY_PARSERS = { 771 **STRING_PARSERS, 772 **NUMERIC_PARSERS, 773 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 774 TokenType.NULL: lambda self, _: self.expression(exp.Null), 775 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 776 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 777 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 778 TokenType.STAR: lambda self, _: self.expression( 779 exp.Star, 780 **{ 781 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 782 "replace": self._parse_star_op("REPLACE"), 783 "rename": self._parse_star_op("RENAME"), 784 }, 785 ), 786 } 787 788 PLACEHOLDER_PARSERS = { 789 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 790 TokenType.PARAMETER: lambda self: self._parse_parameter(), 791 TokenType.COLON: lambda self: ( 792 self.expression(exp.Placeholder, this=self._prev.text) 793 if self._match_set(self.ID_VAR_TOKENS) 794 else None 795 ), 796 } 797 798 RANGE_PARSERS = { 799 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 800 TokenType.GLOB: binary_range_parser(exp.Glob), 801 TokenType.ILIKE: binary_range_parser(exp.ILike), 802 TokenType.IN: lambda self, this: self._parse_in(this), 803 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 804 TokenType.IS: lambda self, this: self._parse_is(this), 805 TokenType.LIKE: binary_range_parser(exp.Like), 806 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 807 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 808 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 809 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 810 } 811 812 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 813 "ALLOWED_VALUES": lambda self: self.expression( 814 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 815 ), 816 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 817 "AUTO": lambda self: self._parse_auto_property(), 818 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 819 "BACKUP": lambda self: self.expression( 820 exp.BackupProperty, this=self._parse_var(any_token=True) 821 ), 822 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 823 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 824 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 825 "CHECKSUM": lambda self: self._parse_checksum(), 826 "CLUSTER BY": lambda self: self._parse_cluster(), 827 "CLUSTERED": lambda self: self._parse_clustered_by(), 828 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 829 exp.CollateProperty, **kwargs 830 ), 831 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 832 "CONTAINS": lambda self: self._parse_contains_property(), 833 "COPY": lambda self: self._parse_copy_property(), 834 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 835 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 836 "DEFINER": lambda self: self._parse_definer(), 837 "DETERMINISTIC": lambda self: self.expression( 838 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 839 ), 840 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 841 "DISTKEY": lambda self: self._parse_distkey(), 842 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 843 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 844 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 845 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 846 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 847 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 848 "FREESPACE": lambda self: self._parse_freespace(), 849 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 850 "HEAP": lambda self: self.expression(exp.HeapProperty), 851 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 852 "IMMUTABLE": lambda self: self.expression( 853 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 854 ), 855 "INHERITS": lambda self: self.expression( 856 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 857 ), 858 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 859 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 860 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 861 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 862 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 863 "LIKE": lambda self: self._parse_create_like(), 864 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 865 "LOCK": lambda self: self._parse_locking(), 866 "LOCKING": lambda self: self._parse_locking(), 867 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 868 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 869 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 870 "MODIFIES": lambda self: self._parse_modifies_property(), 871 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 872 "NO": lambda self: self._parse_no_property(), 873 "ON": lambda self: self._parse_on_property(), 874 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 875 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 876 "PARTITION": lambda self: self._parse_partitioned_of(), 877 "PARTITION BY": lambda self: self._parse_partitioned_by(), 878 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 879 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 880 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 881 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 882 "READS": lambda self: self._parse_reads_property(), 883 "REMOTE": lambda self: self._parse_remote_with_connection(), 884 "RETURNS": lambda self: self._parse_returns(), 885 "STRICT": lambda self: self.expression(exp.StrictProperty), 886 "ROW": lambda self: self._parse_row(), 887 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 888 "SAMPLE": lambda self: self.expression( 889 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 890 ), 891 "SECURE": lambda self: self.expression(exp.SecureProperty), 892 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 893 "SETTINGS": lambda self: self.expression( 894 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 895 ), 896 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 897 "SORTKEY": lambda self: self._parse_sortkey(), 898 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 899 "STABLE": lambda self: self.expression( 900 exp.StabilityProperty, this=exp.Literal.string("STABLE") 901 ), 902 "STORED": lambda self: self._parse_stored(), 903 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 904 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 905 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 906 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 907 "TO": lambda self: self._parse_to_table(), 908 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 909 "TRANSFORM": lambda self: self.expression( 910 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 911 ), 912 "TTL": lambda self: self._parse_ttl(), 913 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 914 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 915 "VOLATILE": lambda self: self._parse_volatile_property(), 916 "WITH": lambda self: self._parse_with_property(), 917 } 918 919 CONSTRAINT_PARSERS = { 920 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 921 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 922 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 923 "CHARACTER SET": lambda self: self.expression( 924 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 925 ), 926 "CHECK": lambda self: self.expression( 927 exp.CheckColumnConstraint, 928 this=self._parse_wrapped(self._parse_assignment), 929 enforced=self._match_text_seq("ENFORCED"), 930 ), 931 "COLLATE": lambda self: self.expression( 932 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 933 ), 934 "COMMENT": lambda self: self.expression( 935 exp.CommentColumnConstraint, this=self._parse_string() 936 ), 937 "COMPRESS": lambda self: self._parse_compress(), 938 "CLUSTERED": lambda self: self.expression( 939 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 940 ), 941 "NONCLUSTERED": lambda self: self.expression( 942 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 943 ), 944 "DEFAULT": lambda self: self.expression( 945 exp.DefaultColumnConstraint, this=self._parse_bitwise() 946 ), 947 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 948 "EPHEMERAL": lambda self: self.expression( 949 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 950 ), 951 "EXCLUDE": lambda self: self.expression( 952 exp.ExcludeColumnConstraint, this=self._parse_index_params() 953 ), 954 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 955 "FORMAT": lambda self: self.expression( 956 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 957 ), 958 "GENERATED": lambda self: self._parse_generated_as_identity(), 959 "IDENTITY": lambda self: self._parse_auto_increment(), 960 "INLINE": lambda self: self._parse_inline(), 961 "LIKE": lambda self: self._parse_create_like(), 962 "NOT": lambda self: self._parse_not_constraint(), 963 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 964 "ON": lambda self: ( 965 self._match(TokenType.UPDATE) 966 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 967 ) 968 or self.expression(exp.OnProperty, this=self._parse_id_var()), 969 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 970 "PERIOD": lambda self: self._parse_period_for_system_time(), 971 "PRIMARY KEY": lambda self: self._parse_primary_key(), 972 "REFERENCES": lambda self: self._parse_references(match=False), 973 "TITLE": lambda self: self.expression( 974 exp.TitleColumnConstraint, this=self._parse_var_or_string() 975 ), 976 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 977 "UNIQUE": lambda self: self._parse_unique(), 978 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 979 "WITH": lambda self: self.expression( 980 exp.Properties, expressions=self._parse_wrapped_properties() 981 ), 982 } 983 984 ALTER_PARSERS = { 985 "ADD": lambda self: self._parse_alter_table_add(), 986 "ALTER": lambda self: self._parse_alter_table_alter(), 987 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 988 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 989 "DROP": lambda self: self._parse_alter_table_drop(), 990 "RENAME": lambda self: self._parse_alter_table_rename(), 991 "SET": lambda self: self._parse_alter_table_set(), 992 } 993 994 ALTER_ALTER_PARSERS = { 995 "DISTKEY": lambda self: self._parse_alter_diststyle(), 996 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 997 "SORTKEY": lambda self: self._parse_alter_sortkey(), 998 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 999 } 1000 1001 SCHEMA_UNNAMED_CONSTRAINTS = { 1002 "CHECK", 1003 "EXCLUDE", 1004 "FOREIGN KEY", 1005 "LIKE", 1006 "PERIOD", 1007 "PRIMARY KEY", 1008 "UNIQUE", 1009 } 1010 1011 NO_PAREN_FUNCTION_PARSERS = { 1012 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1013 "CASE": lambda self: self._parse_case(), 1014 "CONNECT_BY_ROOT": lambda self: self.expression( 1015 exp.ConnectByRoot, this=self._parse_column() 1016 ), 1017 "IF": lambda self: self._parse_if(), 1018 "NEXT": lambda self: self._parse_next_value_for(), 1019 } 1020 1021 INVALID_FUNC_NAME_TOKENS = { 1022 TokenType.IDENTIFIER, 1023 TokenType.STRING, 1024 } 1025 1026 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1027 1028 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1029 1030 FUNCTION_PARSERS = { 1031 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1032 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1033 "DECODE": lambda self: self._parse_decode(), 1034 "EXTRACT": lambda self: self._parse_extract(), 1035 "GAP_FILL": lambda self: self._parse_gap_fill(), 1036 "JSON_OBJECT": lambda self: self._parse_json_object(), 1037 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1038 "JSON_TABLE": lambda self: self._parse_json_table(), 1039 "MATCH": lambda self: self._parse_match_against(), 1040 "OPENJSON": lambda self: self._parse_open_json(), 1041 "POSITION": lambda self: self._parse_position(), 1042 "PREDICT": lambda self: self._parse_predict(), 1043 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1044 "STRING_AGG": lambda self: self._parse_string_agg(), 1045 "SUBSTRING": lambda self: self._parse_substring(), 1046 "TRIM": lambda self: self._parse_trim(), 1047 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1048 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1049 } 1050 1051 QUERY_MODIFIER_PARSERS = { 1052 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1053 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1054 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1055 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1056 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1057 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1058 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1059 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1060 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1061 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1062 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1063 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1064 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1065 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1066 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1067 TokenType.CLUSTER_BY: lambda self: ( 1068 "cluster", 1069 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1070 ), 1071 TokenType.DISTRIBUTE_BY: lambda self: ( 1072 "distribute", 1073 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1074 ), 1075 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1076 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1077 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1078 } 1079 1080 SET_PARSERS = { 1081 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1082 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1083 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1084 "TRANSACTION": lambda self: self._parse_set_transaction(), 1085 } 1086 1087 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1088 1089 TYPE_LITERAL_PARSERS = { 1090 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1091 } 1092 1093 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1094 1095 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1096 1097 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1098 1099 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1100 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1101 "ISOLATION": ( 1102 ("LEVEL", "REPEATABLE", "READ"), 1103 ("LEVEL", "READ", "COMMITTED"), 1104 ("LEVEL", "READ", "UNCOMITTED"), 1105 ("LEVEL", "SERIALIZABLE"), 1106 ), 1107 "READ": ("WRITE", "ONLY"), 1108 } 1109 1110 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1111 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1112 ) 1113 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1114 1115 CREATE_SEQUENCE: OPTIONS_TYPE = { 1116 "SCALE": ("EXTEND", "NOEXTEND"), 1117 "SHARD": ("EXTEND", "NOEXTEND"), 1118 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1119 **dict.fromkeys( 1120 ( 1121 "SESSION", 1122 "GLOBAL", 1123 "KEEP", 1124 "NOKEEP", 1125 "ORDER", 1126 "NOORDER", 1127 "NOCACHE", 1128 "CYCLE", 1129 "NOCYCLE", 1130 "NOMINVALUE", 1131 "NOMAXVALUE", 1132 "NOSCALE", 1133 "NOSHARD", 1134 ), 1135 tuple(), 1136 ), 1137 } 1138 1139 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1140 1141 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1142 1143 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1144 1145 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1146 "TYPE": ("EVOLUTION",), 1147 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1148 } 1149 1150 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1151 1152 CLONE_KEYWORDS = {"CLONE", "COPY"} 1153 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1154 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1155 1156 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1157 1158 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1159 1160 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1161 1162 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1163 1164 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1165 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1166 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1167 1168 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1169 1170 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1171 1172 ADD_CONSTRAINT_TOKENS = { 1173 TokenType.CONSTRAINT, 1174 TokenType.FOREIGN_KEY, 1175 TokenType.INDEX, 1176 TokenType.KEY, 1177 TokenType.PRIMARY_KEY, 1178 TokenType.UNIQUE, 1179 } 1180 1181 DISTINCT_TOKENS = {TokenType.DISTINCT} 1182 1183 NULL_TOKENS = {TokenType.NULL} 1184 1185 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1186 1187 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1188 1189 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1190 1191 STRICT_CAST = True 1192 1193 PREFIXED_PIVOT_COLUMNS = False 1194 IDENTIFY_PIVOT_STRINGS = False 1195 1196 LOG_DEFAULTS_TO_LN = False 1197 1198 # Whether ADD is present for each column added by ALTER TABLE 1199 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1200 1201 # Whether the table sample clause expects CSV syntax 1202 TABLESAMPLE_CSV = False 1203 1204 # The default method used for table sampling 1205 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1206 1207 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1208 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1209 1210 # Whether the TRIM function expects the characters to trim as its first argument 1211 TRIM_PATTERN_FIRST = False 1212 1213 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1214 STRING_ALIASES = False 1215 1216 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1217 MODIFIERS_ATTACHED_TO_SET_OP = True 1218 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1219 1220 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1221 NO_PAREN_IF_COMMANDS = True 1222 1223 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1224 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1225 1226 # Whether the `:` operator is used to extract a value from a VARIANT column 1227 COLON_IS_VARIANT_EXTRACT = False 1228 1229 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1230 # If this is True and '(' is not found, the keyword will be treated as an identifier 1231 VALUES_FOLLOWED_BY_PAREN = True 1232 1233 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1234 SUPPORTS_IMPLICIT_UNNEST = False 1235 1236 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1237 INTERVAL_SPANS = True 1238 1239 # Whether a PARTITION clause can follow a table reference 1240 SUPPORTS_PARTITION_SELECTION = False 1241 1242 __slots__ = ( 1243 "error_level", 1244 "error_message_context", 1245 "max_errors", 1246 "dialect", 1247 "sql", 1248 "errors", 1249 "_tokens", 1250 "_index", 1251 "_curr", 1252 "_next", 1253 "_prev", 1254 "_prev_comments", 1255 ) 1256 1257 # Autofilled 1258 SHOW_TRIE: t.Dict = {} 1259 SET_TRIE: t.Dict = {} 1260 1261 def __init__( 1262 self, 1263 error_level: t.Optional[ErrorLevel] = None, 1264 error_message_context: int = 100, 1265 max_errors: int = 3, 1266 dialect: DialectType = None, 1267 ): 1268 from sqlglot.dialects import Dialect 1269 1270 self.error_level = error_level or ErrorLevel.IMMEDIATE 1271 self.error_message_context = error_message_context 1272 self.max_errors = max_errors 1273 self.dialect = Dialect.get_or_raise(dialect) 1274 self.reset() 1275 1276 def reset(self): 1277 self.sql = "" 1278 self.errors = [] 1279 self._tokens = [] 1280 self._index = 0 1281 self._curr = None 1282 self._next = None 1283 self._prev = None 1284 self._prev_comments = None 1285 1286 def parse( 1287 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1288 ) -> t.List[t.Optional[exp.Expression]]: 1289 """ 1290 Parses a list of tokens and returns a list of syntax trees, one tree 1291 per parsed SQL statement. 1292 1293 Args: 1294 raw_tokens: The list of tokens. 1295 sql: The original SQL string, used to produce helpful debug messages. 1296 1297 Returns: 1298 The list of the produced syntax trees. 1299 """ 1300 return self._parse( 1301 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1302 ) 1303 1304 def parse_into( 1305 self, 1306 expression_types: exp.IntoType, 1307 raw_tokens: t.List[Token], 1308 sql: t.Optional[str] = None, 1309 ) -> t.List[t.Optional[exp.Expression]]: 1310 """ 1311 Parses a list of tokens into a given Expression type. If a collection of Expression 1312 types is given instead, this method will try to parse the token list into each one 1313 of them, stopping at the first for which the parsing succeeds. 1314 1315 Args: 1316 expression_types: The expression type(s) to try and parse the token list into. 1317 raw_tokens: The list of tokens. 1318 sql: The original SQL string, used to produce helpful debug messages. 1319 1320 Returns: 1321 The target Expression. 1322 """ 1323 errors = [] 1324 for expression_type in ensure_list(expression_types): 1325 parser = self.EXPRESSION_PARSERS.get(expression_type) 1326 if not parser: 1327 raise TypeError(f"No parser registered for {expression_type}") 1328 1329 try: 1330 return self._parse(parser, raw_tokens, sql) 1331 except ParseError as e: 1332 e.errors[0]["into_expression"] = expression_type 1333 errors.append(e) 1334 1335 raise ParseError( 1336 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1337 errors=merge_errors(errors), 1338 ) from errors[-1] 1339 1340 def _parse( 1341 self, 1342 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1343 raw_tokens: t.List[Token], 1344 sql: t.Optional[str] = None, 1345 ) -> t.List[t.Optional[exp.Expression]]: 1346 self.reset() 1347 self.sql = sql or "" 1348 1349 total = len(raw_tokens) 1350 chunks: t.List[t.List[Token]] = [[]] 1351 1352 for i, token in enumerate(raw_tokens): 1353 if token.token_type == TokenType.SEMICOLON: 1354 if token.comments: 1355 chunks.append([token]) 1356 1357 if i < total - 1: 1358 chunks.append([]) 1359 else: 1360 chunks[-1].append(token) 1361 1362 expressions = [] 1363 1364 for tokens in chunks: 1365 self._index = -1 1366 self._tokens = tokens 1367 self._advance() 1368 1369 expressions.append(parse_method(self)) 1370 1371 if self._index < len(self._tokens): 1372 self.raise_error("Invalid expression / Unexpected token") 1373 1374 self.check_errors() 1375 1376 return expressions 1377 1378 def check_errors(self) -> None: 1379 """Logs or raises any found errors, depending on the chosen error level setting.""" 1380 if self.error_level == ErrorLevel.WARN: 1381 for error in self.errors: 1382 logger.error(str(error)) 1383 elif self.error_level == ErrorLevel.RAISE and self.errors: 1384 raise ParseError( 1385 concat_messages(self.errors, self.max_errors), 1386 errors=merge_errors(self.errors), 1387 ) 1388 1389 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1390 """ 1391 Appends an error in the list of recorded errors or raises it, depending on the chosen 1392 error level setting. 1393 """ 1394 token = token or self._curr or self._prev or Token.string("") 1395 start = token.start 1396 end = token.end + 1 1397 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1398 highlight = self.sql[start:end] 1399 end_context = self.sql[end : end + self.error_message_context] 1400 1401 error = ParseError.new( 1402 f"{message}. Line {token.line}, Col: {token.col}.\n" 1403 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1404 description=message, 1405 line=token.line, 1406 col=token.col, 1407 start_context=start_context, 1408 highlight=highlight, 1409 end_context=end_context, 1410 ) 1411 1412 if self.error_level == ErrorLevel.IMMEDIATE: 1413 raise error 1414 1415 self.errors.append(error) 1416 1417 def expression( 1418 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1419 ) -> E: 1420 """ 1421 Creates a new, validated Expression. 1422 1423 Args: 1424 exp_class: The expression class to instantiate. 1425 comments: An optional list of comments to attach to the expression. 1426 kwargs: The arguments to set for the expression along with their respective values. 1427 1428 Returns: 1429 The target expression. 1430 """ 1431 instance = exp_class(**kwargs) 1432 instance.add_comments(comments) if comments else self._add_comments(instance) 1433 return self.validate_expression(instance) 1434 1435 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1436 if expression and self._prev_comments: 1437 expression.add_comments(self._prev_comments) 1438 self._prev_comments = None 1439 1440 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1441 """ 1442 Validates an Expression, making sure that all its mandatory arguments are set. 1443 1444 Args: 1445 expression: The expression to validate. 1446 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1447 1448 Returns: 1449 The validated expression. 1450 """ 1451 if self.error_level != ErrorLevel.IGNORE: 1452 for error_message in expression.error_messages(args): 1453 self.raise_error(error_message) 1454 1455 return expression 1456 1457 def _find_sql(self, start: Token, end: Token) -> str: 1458 return self.sql[start.start : end.end + 1] 1459 1460 def _is_connected(self) -> bool: 1461 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1462 1463 def _advance(self, times: int = 1) -> None: 1464 self._index += times 1465 self._curr = seq_get(self._tokens, self._index) 1466 self._next = seq_get(self._tokens, self._index + 1) 1467 1468 if self._index > 0: 1469 self._prev = self._tokens[self._index - 1] 1470 self._prev_comments = self._prev.comments 1471 else: 1472 self._prev = None 1473 self._prev_comments = None 1474 1475 def _retreat(self, index: int) -> None: 1476 if index != self._index: 1477 self._advance(index - self._index) 1478 1479 def _warn_unsupported(self) -> None: 1480 if len(self._tokens) <= 1: 1481 return 1482 1483 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1484 # interested in emitting a warning for the one being currently processed. 1485 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1486 1487 logger.warning( 1488 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1489 ) 1490 1491 def _parse_command(self) -> exp.Command: 1492 self._warn_unsupported() 1493 return self.expression( 1494 exp.Command, 1495 comments=self._prev_comments, 1496 this=self._prev.text.upper(), 1497 expression=self._parse_string(), 1498 ) 1499 1500 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1501 """ 1502 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1503 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1504 solve this by setting & resetting the parser state accordingly 1505 """ 1506 index = self._index 1507 error_level = self.error_level 1508 1509 self.error_level = ErrorLevel.IMMEDIATE 1510 try: 1511 this = parse_method() 1512 except ParseError: 1513 this = None 1514 finally: 1515 if not this or retreat: 1516 self._retreat(index) 1517 self.error_level = error_level 1518 1519 return this 1520 1521 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1522 start = self._prev 1523 exists = self._parse_exists() if allow_exists else None 1524 1525 self._match(TokenType.ON) 1526 1527 materialized = self._match_text_seq("MATERIALIZED") 1528 kind = self._match_set(self.CREATABLES) and self._prev 1529 if not kind: 1530 return self._parse_as_command(start) 1531 1532 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1533 this = self._parse_user_defined_function(kind=kind.token_type) 1534 elif kind.token_type == TokenType.TABLE: 1535 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1536 elif kind.token_type == TokenType.COLUMN: 1537 this = self._parse_column() 1538 else: 1539 this = self._parse_id_var() 1540 1541 self._match(TokenType.IS) 1542 1543 return self.expression( 1544 exp.Comment, 1545 this=this, 1546 kind=kind.text, 1547 expression=self._parse_string(), 1548 exists=exists, 1549 materialized=materialized, 1550 ) 1551 1552 def _parse_to_table( 1553 self, 1554 ) -> exp.ToTableProperty: 1555 table = self._parse_table_parts(schema=True) 1556 return self.expression(exp.ToTableProperty, this=table) 1557 1558 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1559 def _parse_ttl(self) -> exp.Expression: 1560 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1561 this = self._parse_bitwise() 1562 1563 if self._match_text_seq("DELETE"): 1564 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1565 if self._match_text_seq("RECOMPRESS"): 1566 return self.expression( 1567 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1568 ) 1569 if self._match_text_seq("TO", "DISK"): 1570 return self.expression( 1571 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1572 ) 1573 if self._match_text_seq("TO", "VOLUME"): 1574 return self.expression( 1575 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1576 ) 1577 1578 return this 1579 1580 expressions = self._parse_csv(_parse_ttl_action) 1581 where = self._parse_where() 1582 group = self._parse_group() 1583 1584 aggregates = None 1585 if group and self._match(TokenType.SET): 1586 aggregates = self._parse_csv(self._parse_set_item) 1587 1588 return self.expression( 1589 exp.MergeTreeTTL, 1590 expressions=expressions, 1591 where=where, 1592 group=group, 1593 aggregates=aggregates, 1594 ) 1595 1596 def _parse_statement(self) -> t.Optional[exp.Expression]: 1597 if self._curr is None: 1598 return None 1599 1600 if self._match_set(self.STATEMENT_PARSERS): 1601 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1602 1603 if self._match_set(self.dialect.tokenizer.COMMANDS): 1604 return self._parse_command() 1605 1606 expression = self._parse_expression() 1607 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1608 return self._parse_query_modifiers(expression) 1609 1610 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1611 start = self._prev 1612 temporary = self._match(TokenType.TEMPORARY) 1613 materialized = self._match_text_seq("MATERIALIZED") 1614 1615 kind = self._match_set(self.CREATABLES) and self._prev.text 1616 if not kind: 1617 return self._parse_as_command(start) 1618 1619 if_exists = exists or self._parse_exists() 1620 table = self._parse_table_parts( 1621 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1622 ) 1623 1624 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1625 1626 if self._match(TokenType.L_PAREN, advance=False): 1627 expressions = self._parse_wrapped_csv(self._parse_types) 1628 else: 1629 expressions = None 1630 1631 return self.expression( 1632 exp.Drop, 1633 comments=start.comments, 1634 exists=if_exists, 1635 this=table, 1636 expressions=expressions, 1637 kind=kind.upper(), 1638 temporary=temporary, 1639 materialized=materialized, 1640 cascade=self._match_text_seq("CASCADE"), 1641 constraints=self._match_text_seq("CONSTRAINTS"), 1642 purge=self._match_text_seq("PURGE"), 1643 cluster=cluster, 1644 ) 1645 1646 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1647 return ( 1648 self._match_text_seq("IF") 1649 and (not not_ or self._match(TokenType.NOT)) 1650 and self._match(TokenType.EXISTS) 1651 ) 1652 1653 def _parse_create(self) -> exp.Create | exp.Command: 1654 # Note: this can't be None because we've matched a statement parser 1655 start = self._prev 1656 comments = self._prev_comments 1657 1658 replace = ( 1659 start.token_type == TokenType.REPLACE 1660 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1661 or self._match_pair(TokenType.OR, TokenType.ALTER) 1662 ) 1663 1664 unique = self._match(TokenType.UNIQUE) 1665 1666 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1667 self._advance() 1668 1669 properties = None 1670 create_token = self._match_set(self.CREATABLES) and self._prev 1671 1672 if not create_token: 1673 # exp.Properties.Location.POST_CREATE 1674 properties = self._parse_properties() 1675 create_token = self._match_set(self.CREATABLES) and self._prev 1676 1677 if not properties or not create_token: 1678 return self._parse_as_command(start) 1679 1680 exists = self._parse_exists(not_=True) 1681 this = None 1682 expression: t.Optional[exp.Expression] = None 1683 indexes = None 1684 no_schema_binding = None 1685 begin = None 1686 end = None 1687 clone = None 1688 1689 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1690 nonlocal properties 1691 if properties and temp_props: 1692 properties.expressions.extend(temp_props.expressions) 1693 elif temp_props: 1694 properties = temp_props 1695 1696 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1697 this = self._parse_user_defined_function(kind=create_token.token_type) 1698 1699 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1700 extend_props(self._parse_properties()) 1701 1702 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1703 extend_props(self._parse_properties()) 1704 1705 if not expression: 1706 if self._match(TokenType.COMMAND): 1707 expression = self._parse_as_command(self._prev) 1708 else: 1709 begin = self._match(TokenType.BEGIN) 1710 return_ = self._match_text_seq("RETURN") 1711 1712 if self._match(TokenType.STRING, advance=False): 1713 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1714 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1715 expression = self._parse_string() 1716 extend_props(self._parse_properties()) 1717 else: 1718 expression = self._parse_statement() 1719 1720 end = self._match_text_seq("END") 1721 1722 if return_: 1723 expression = self.expression(exp.Return, this=expression) 1724 elif create_token.token_type == TokenType.INDEX: 1725 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1726 if not self._match(TokenType.ON): 1727 index = self._parse_id_var() 1728 anonymous = False 1729 else: 1730 index = None 1731 anonymous = True 1732 1733 this = self._parse_index(index=index, anonymous=anonymous) 1734 elif create_token.token_type in self.DB_CREATABLES: 1735 table_parts = self._parse_table_parts( 1736 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1737 ) 1738 1739 # exp.Properties.Location.POST_NAME 1740 self._match(TokenType.COMMA) 1741 extend_props(self._parse_properties(before=True)) 1742 1743 this = self._parse_schema(this=table_parts) 1744 1745 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1746 extend_props(self._parse_properties()) 1747 1748 self._match(TokenType.ALIAS) 1749 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1750 # exp.Properties.Location.POST_ALIAS 1751 extend_props(self._parse_properties()) 1752 1753 if create_token.token_type == TokenType.SEQUENCE: 1754 expression = self._parse_types() 1755 extend_props(self._parse_properties()) 1756 else: 1757 expression = self._parse_ddl_select() 1758 1759 if create_token.token_type == TokenType.TABLE: 1760 # exp.Properties.Location.POST_EXPRESSION 1761 extend_props(self._parse_properties()) 1762 1763 indexes = [] 1764 while True: 1765 index = self._parse_index() 1766 1767 # exp.Properties.Location.POST_INDEX 1768 extend_props(self._parse_properties()) 1769 1770 if not index: 1771 break 1772 else: 1773 self._match(TokenType.COMMA) 1774 indexes.append(index) 1775 elif create_token.token_type == TokenType.VIEW: 1776 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1777 no_schema_binding = True 1778 1779 shallow = self._match_text_seq("SHALLOW") 1780 1781 if self._match_texts(self.CLONE_KEYWORDS): 1782 copy = self._prev.text.lower() == "copy" 1783 clone = self.expression( 1784 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1785 ) 1786 1787 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1788 return self._parse_as_command(start) 1789 1790 return self.expression( 1791 exp.Create, 1792 comments=comments, 1793 this=this, 1794 kind=create_token.text.upper(), 1795 replace=replace, 1796 unique=unique, 1797 expression=expression, 1798 exists=exists, 1799 properties=properties, 1800 indexes=indexes, 1801 no_schema_binding=no_schema_binding, 1802 begin=begin, 1803 end=end, 1804 clone=clone, 1805 ) 1806 1807 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1808 seq = exp.SequenceProperties() 1809 1810 options = [] 1811 index = self._index 1812 1813 while self._curr: 1814 self._match(TokenType.COMMA) 1815 if self._match_text_seq("INCREMENT"): 1816 self._match_text_seq("BY") 1817 self._match_text_seq("=") 1818 seq.set("increment", self._parse_term()) 1819 elif self._match_text_seq("MINVALUE"): 1820 seq.set("minvalue", self._parse_term()) 1821 elif self._match_text_seq("MAXVALUE"): 1822 seq.set("maxvalue", self._parse_term()) 1823 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1824 self._match_text_seq("=") 1825 seq.set("start", self._parse_term()) 1826 elif self._match_text_seq("CACHE"): 1827 # T-SQL allows empty CACHE which is initialized dynamically 1828 seq.set("cache", self._parse_number() or True) 1829 elif self._match_text_seq("OWNED", "BY"): 1830 # "OWNED BY NONE" is the default 1831 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1832 else: 1833 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1834 if opt: 1835 options.append(opt) 1836 else: 1837 break 1838 1839 seq.set("options", options if options else None) 1840 return None if self._index == index else seq 1841 1842 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1843 # only used for teradata currently 1844 self._match(TokenType.COMMA) 1845 1846 kwargs = { 1847 "no": self._match_text_seq("NO"), 1848 "dual": self._match_text_seq("DUAL"), 1849 "before": self._match_text_seq("BEFORE"), 1850 "default": self._match_text_seq("DEFAULT"), 1851 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1852 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1853 "after": self._match_text_seq("AFTER"), 1854 "minimum": self._match_texts(("MIN", "MINIMUM")), 1855 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1856 } 1857 1858 if self._match_texts(self.PROPERTY_PARSERS): 1859 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1860 try: 1861 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1862 except TypeError: 1863 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1864 1865 return None 1866 1867 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1868 return self._parse_wrapped_csv(self._parse_property) 1869 1870 def _parse_property(self) -> t.Optional[exp.Expression]: 1871 if self._match_texts(self.PROPERTY_PARSERS): 1872 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1873 1874 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1875 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1876 1877 if self._match_text_seq("COMPOUND", "SORTKEY"): 1878 return self._parse_sortkey(compound=True) 1879 1880 if self._match_text_seq("SQL", "SECURITY"): 1881 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1882 1883 index = self._index 1884 key = self._parse_column() 1885 1886 if not self._match(TokenType.EQ): 1887 self._retreat(index) 1888 return self._parse_sequence_properties() 1889 1890 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1891 if isinstance(key, exp.Column): 1892 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1893 1894 value = self._parse_bitwise() or self._parse_var(any_token=True) 1895 1896 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1897 if isinstance(value, exp.Column): 1898 value = exp.var(value.name) 1899 1900 return self.expression(exp.Property, this=key, value=value) 1901 1902 def _parse_stored(self) -> exp.FileFormatProperty: 1903 self._match(TokenType.ALIAS) 1904 1905 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1906 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1907 1908 return self.expression( 1909 exp.FileFormatProperty, 1910 this=( 1911 self.expression( 1912 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1913 ) 1914 if input_format or output_format 1915 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1916 ), 1917 ) 1918 1919 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1920 field = self._parse_field() 1921 if isinstance(field, exp.Identifier) and not field.quoted: 1922 field = exp.var(field) 1923 1924 return field 1925 1926 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1927 self._match(TokenType.EQ) 1928 self._match(TokenType.ALIAS) 1929 1930 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1931 1932 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1933 properties = [] 1934 while True: 1935 if before: 1936 prop = self._parse_property_before() 1937 else: 1938 prop = self._parse_property() 1939 if not prop: 1940 break 1941 for p in ensure_list(prop): 1942 properties.append(p) 1943 1944 if properties: 1945 return self.expression(exp.Properties, expressions=properties) 1946 1947 return None 1948 1949 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1950 return self.expression( 1951 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1952 ) 1953 1954 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1955 if self._index >= 2: 1956 pre_volatile_token = self._tokens[self._index - 2] 1957 else: 1958 pre_volatile_token = None 1959 1960 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1961 return exp.VolatileProperty() 1962 1963 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1964 1965 def _parse_retention_period(self) -> exp.Var: 1966 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1967 number = self._parse_number() 1968 number_str = f"{number} " if number else "" 1969 unit = self._parse_var(any_token=True) 1970 return exp.var(f"{number_str}{unit}") 1971 1972 def _parse_system_versioning_property( 1973 self, with_: bool = False 1974 ) -> exp.WithSystemVersioningProperty: 1975 self._match(TokenType.EQ) 1976 prop = self.expression( 1977 exp.WithSystemVersioningProperty, 1978 **{ # type: ignore 1979 "on": True, 1980 "with": with_, 1981 }, 1982 ) 1983 1984 if self._match_text_seq("OFF"): 1985 prop.set("on", False) 1986 return prop 1987 1988 self._match(TokenType.ON) 1989 if self._match(TokenType.L_PAREN): 1990 while self._curr and not self._match(TokenType.R_PAREN): 1991 if self._match_text_seq("HISTORY_TABLE", "="): 1992 prop.set("this", self._parse_table_parts()) 1993 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1994 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1995 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1996 prop.set("retention_period", self._parse_retention_period()) 1997 1998 self._match(TokenType.COMMA) 1999 2000 return prop 2001 2002 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2003 self._match(TokenType.EQ) 2004 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2005 prop = self.expression(exp.DataDeletionProperty, on=on) 2006 2007 if self._match(TokenType.L_PAREN): 2008 while self._curr and not self._match(TokenType.R_PAREN): 2009 if self._match_text_seq("FILTER_COLUMN", "="): 2010 prop.set("filter_column", self._parse_column()) 2011 elif self._match_text_seq("RETENTION_PERIOD", "="): 2012 prop.set("retention_period", self._parse_retention_period()) 2013 2014 self._match(TokenType.COMMA) 2015 2016 return prop 2017 2018 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2019 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2020 prop = self._parse_system_versioning_property(with_=True) 2021 self._match_r_paren() 2022 return prop 2023 2024 if self._match(TokenType.L_PAREN, advance=False): 2025 return self._parse_wrapped_properties() 2026 2027 if self._match_text_seq("JOURNAL"): 2028 return self._parse_withjournaltable() 2029 2030 if self._match_texts(self.VIEW_ATTRIBUTES): 2031 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2032 2033 if self._match_text_seq("DATA"): 2034 return self._parse_withdata(no=False) 2035 elif self._match_text_seq("NO", "DATA"): 2036 return self._parse_withdata(no=True) 2037 2038 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2039 return self._parse_serde_properties(with_=True) 2040 2041 if self._match(TokenType.SCHEMA): 2042 return self.expression( 2043 exp.WithSchemaBindingProperty, 2044 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2045 ) 2046 2047 if not self._next: 2048 return None 2049 2050 return self._parse_withisolatedloading() 2051 2052 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2053 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2054 self._match(TokenType.EQ) 2055 2056 user = self._parse_id_var() 2057 self._match(TokenType.PARAMETER) 2058 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2059 2060 if not user or not host: 2061 return None 2062 2063 return exp.DefinerProperty(this=f"{user}@{host}") 2064 2065 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2066 self._match(TokenType.TABLE) 2067 self._match(TokenType.EQ) 2068 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2069 2070 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2071 return self.expression(exp.LogProperty, no=no) 2072 2073 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2074 return self.expression(exp.JournalProperty, **kwargs) 2075 2076 def _parse_checksum(self) -> exp.ChecksumProperty: 2077 self._match(TokenType.EQ) 2078 2079 on = None 2080 if self._match(TokenType.ON): 2081 on = True 2082 elif self._match_text_seq("OFF"): 2083 on = False 2084 2085 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2086 2087 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2088 return self.expression( 2089 exp.Cluster, 2090 expressions=( 2091 self._parse_wrapped_csv(self._parse_ordered) 2092 if wrapped 2093 else self._parse_csv(self._parse_ordered) 2094 ), 2095 ) 2096 2097 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2098 self._match_text_seq("BY") 2099 2100 self._match_l_paren() 2101 expressions = self._parse_csv(self._parse_column) 2102 self._match_r_paren() 2103 2104 if self._match_text_seq("SORTED", "BY"): 2105 self._match_l_paren() 2106 sorted_by = self._parse_csv(self._parse_ordered) 2107 self._match_r_paren() 2108 else: 2109 sorted_by = None 2110 2111 self._match(TokenType.INTO) 2112 buckets = self._parse_number() 2113 self._match_text_seq("BUCKETS") 2114 2115 return self.expression( 2116 exp.ClusteredByProperty, 2117 expressions=expressions, 2118 sorted_by=sorted_by, 2119 buckets=buckets, 2120 ) 2121 2122 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2123 if not self._match_text_seq("GRANTS"): 2124 self._retreat(self._index - 1) 2125 return None 2126 2127 return self.expression(exp.CopyGrantsProperty) 2128 2129 def _parse_freespace(self) -> exp.FreespaceProperty: 2130 self._match(TokenType.EQ) 2131 return self.expression( 2132 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2133 ) 2134 2135 def _parse_mergeblockratio( 2136 self, no: bool = False, default: bool = False 2137 ) -> exp.MergeBlockRatioProperty: 2138 if self._match(TokenType.EQ): 2139 return self.expression( 2140 exp.MergeBlockRatioProperty, 2141 this=self._parse_number(), 2142 percent=self._match(TokenType.PERCENT), 2143 ) 2144 2145 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2146 2147 def _parse_datablocksize( 2148 self, 2149 default: t.Optional[bool] = None, 2150 minimum: t.Optional[bool] = None, 2151 maximum: t.Optional[bool] = None, 2152 ) -> exp.DataBlocksizeProperty: 2153 self._match(TokenType.EQ) 2154 size = self._parse_number() 2155 2156 units = None 2157 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2158 units = self._prev.text 2159 2160 return self.expression( 2161 exp.DataBlocksizeProperty, 2162 size=size, 2163 units=units, 2164 default=default, 2165 minimum=minimum, 2166 maximum=maximum, 2167 ) 2168 2169 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2170 self._match(TokenType.EQ) 2171 always = self._match_text_seq("ALWAYS") 2172 manual = self._match_text_seq("MANUAL") 2173 never = self._match_text_seq("NEVER") 2174 default = self._match_text_seq("DEFAULT") 2175 2176 autotemp = None 2177 if self._match_text_seq("AUTOTEMP"): 2178 autotemp = self._parse_schema() 2179 2180 return self.expression( 2181 exp.BlockCompressionProperty, 2182 always=always, 2183 manual=manual, 2184 never=never, 2185 default=default, 2186 autotemp=autotemp, 2187 ) 2188 2189 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2190 index = self._index 2191 no = self._match_text_seq("NO") 2192 concurrent = self._match_text_seq("CONCURRENT") 2193 2194 if not self._match_text_seq("ISOLATED", "LOADING"): 2195 self._retreat(index) 2196 return None 2197 2198 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2199 return self.expression( 2200 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2201 ) 2202 2203 def _parse_locking(self) -> exp.LockingProperty: 2204 if self._match(TokenType.TABLE): 2205 kind = "TABLE" 2206 elif self._match(TokenType.VIEW): 2207 kind = "VIEW" 2208 elif self._match(TokenType.ROW): 2209 kind = "ROW" 2210 elif self._match_text_seq("DATABASE"): 2211 kind = "DATABASE" 2212 else: 2213 kind = None 2214 2215 if kind in ("DATABASE", "TABLE", "VIEW"): 2216 this = self._parse_table_parts() 2217 else: 2218 this = None 2219 2220 if self._match(TokenType.FOR): 2221 for_or_in = "FOR" 2222 elif self._match(TokenType.IN): 2223 for_or_in = "IN" 2224 else: 2225 for_or_in = None 2226 2227 if self._match_text_seq("ACCESS"): 2228 lock_type = "ACCESS" 2229 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2230 lock_type = "EXCLUSIVE" 2231 elif self._match_text_seq("SHARE"): 2232 lock_type = "SHARE" 2233 elif self._match_text_seq("READ"): 2234 lock_type = "READ" 2235 elif self._match_text_seq("WRITE"): 2236 lock_type = "WRITE" 2237 elif self._match_text_seq("CHECKSUM"): 2238 lock_type = "CHECKSUM" 2239 else: 2240 lock_type = None 2241 2242 override = self._match_text_seq("OVERRIDE") 2243 2244 return self.expression( 2245 exp.LockingProperty, 2246 this=this, 2247 kind=kind, 2248 for_or_in=for_or_in, 2249 lock_type=lock_type, 2250 override=override, 2251 ) 2252 2253 def _parse_partition_by(self) -> t.List[exp.Expression]: 2254 if self._match(TokenType.PARTITION_BY): 2255 return self._parse_csv(self._parse_assignment) 2256 return [] 2257 2258 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2259 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2260 if self._match_text_seq("MINVALUE"): 2261 return exp.var("MINVALUE") 2262 if self._match_text_seq("MAXVALUE"): 2263 return exp.var("MAXVALUE") 2264 return self._parse_bitwise() 2265 2266 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2267 expression = None 2268 from_expressions = None 2269 to_expressions = None 2270 2271 if self._match(TokenType.IN): 2272 this = self._parse_wrapped_csv(self._parse_bitwise) 2273 elif self._match(TokenType.FROM): 2274 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2275 self._match_text_seq("TO") 2276 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2277 elif self._match_text_seq("WITH", "(", "MODULUS"): 2278 this = self._parse_number() 2279 self._match_text_seq(",", "REMAINDER") 2280 expression = self._parse_number() 2281 self._match_r_paren() 2282 else: 2283 self.raise_error("Failed to parse partition bound spec.") 2284 2285 return self.expression( 2286 exp.PartitionBoundSpec, 2287 this=this, 2288 expression=expression, 2289 from_expressions=from_expressions, 2290 to_expressions=to_expressions, 2291 ) 2292 2293 # https://www.postgresql.org/docs/current/sql-createtable.html 2294 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2295 if not self._match_text_seq("OF"): 2296 self._retreat(self._index - 1) 2297 return None 2298 2299 this = self._parse_table(schema=True) 2300 2301 if self._match(TokenType.DEFAULT): 2302 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2303 elif self._match_text_seq("FOR", "VALUES"): 2304 expression = self._parse_partition_bound_spec() 2305 else: 2306 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2307 2308 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2309 2310 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2311 self._match(TokenType.EQ) 2312 return self.expression( 2313 exp.PartitionedByProperty, 2314 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2315 ) 2316 2317 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2318 if self._match_text_seq("AND", "STATISTICS"): 2319 statistics = True 2320 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2321 statistics = False 2322 else: 2323 statistics = None 2324 2325 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2326 2327 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2328 if self._match_text_seq("SQL"): 2329 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2330 return None 2331 2332 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2333 if self._match_text_seq("SQL", "DATA"): 2334 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2335 return None 2336 2337 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2338 if self._match_text_seq("PRIMARY", "INDEX"): 2339 return exp.NoPrimaryIndexProperty() 2340 if self._match_text_seq("SQL"): 2341 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2342 return None 2343 2344 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2345 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2346 return exp.OnCommitProperty() 2347 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2348 return exp.OnCommitProperty(delete=True) 2349 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2350 2351 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2352 if self._match_text_seq("SQL", "DATA"): 2353 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2354 return None 2355 2356 def _parse_distkey(self) -> exp.DistKeyProperty: 2357 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2358 2359 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2360 table = self._parse_table(schema=True) 2361 2362 options = [] 2363 while self._match_texts(("INCLUDING", "EXCLUDING")): 2364 this = self._prev.text.upper() 2365 2366 id_var = self._parse_id_var() 2367 if not id_var: 2368 return None 2369 2370 options.append( 2371 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2372 ) 2373 2374 return self.expression(exp.LikeProperty, this=table, expressions=options) 2375 2376 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2377 return self.expression( 2378 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2379 ) 2380 2381 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2382 self._match(TokenType.EQ) 2383 return self.expression( 2384 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2385 ) 2386 2387 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2388 self._match_text_seq("WITH", "CONNECTION") 2389 return self.expression( 2390 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2391 ) 2392 2393 def _parse_returns(self) -> exp.ReturnsProperty: 2394 value: t.Optional[exp.Expression] 2395 null = None 2396 is_table = self._match(TokenType.TABLE) 2397 2398 if is_table: 2399 if self._match(TokenType.LT): 2400 value = self.expression( 2401 exp.Schema, 2402 this="TABLE", 2403 expressions=self._parse_csv(self._parse_struct_types), 2404 ) 2405 if not self._match(TokenType.GT): 2406 self.raise_error("Expecting >") 2407 else: 2408 value = self._parse_schema(exp.var("TABLE")) 2409 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2410 null = True 2411 value = None 2412 else: 2413 value = self._parse_types() 2414 2415 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2416 2417 def _parse_describe(self) -> exp.Describe: 2418 kind = self._match_set(self.CREATABLES) and self._prev.text 2419 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2420 if self._match(TokenType.DOT): 2421 style = None 2422 self._retreat(self._index - 2) 2423 this = self._parse_table(schema=True) 2424 properties = self._parse_properties() 2425 expressions = properties.expressions if properties else None 2426 return self.expression( 2427 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2428 ) 2429 2430 def _parse_insert(self) -> exp.Insert: 2431 comments = ensure_list(self._prev_comments) 2432 hint = self._parse_hint() 2433 overwrite = self._match(TokenType.OVERWRITE) 2434 ignore = self._match(TokenType.IGNORE) 2435 local = self._match_text_seq("LOCAL") 2436 alternative = None 2437 is_function = None 2438 2439 if self._match_text_seq("DIRECTORY"): 2440 this: t.Optional[exp.Expression] = self.expression( 2441 exp.Directory, 2442 this=self._parse_var_or_string(), 2443 local=local, 2444 row_format=self._parse_row_format(match_row=True), 2445 ) 2446 else: 2447 if self._match(TokenType.OR): 2448 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2449 2450 self._match(TokenType.INTO) 2451 comments += ensure_list(self._prev_comments) 2452 self._match(TokenType.TABLE) 2453 is_function = self._match(TokenType.FUNCTION) 2454 2455 this = ( 2456 self._parse_table(schema=True, parse_partition=True) 2457 if not is_function 2458 else self._parse_function() 2459 ) 2460 2461 returning = self._parse_returning() 2462 2463 return self.expression( 2464 exp.Insert, 2465 comments=comments, 2466 hint=hint, 2467 is_function=is_function, 2468 this=this, 2469 stored=self._match_text_seq("STORED") and self._parse_stored(), 2470 by_name=self._match_text_seq("BY", "NAME"), 2471 exists=self._parse_exists(), 2472 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2473 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2474 conflict=self._parse_on_conflict(), 2475 returning=returning or self._parse_returning(), 2476 overwrite=overwrite, 2477 alternative=alternative, 2478 ignore=ignore, 2479 ) 2480 2481 def _parse_kill(self) -> exp.Kill: 2482 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2483 2484 return self.expression( 2485 exp.Kill, 2486 this=self._parse_primary(), 2487 kind=kind, 2488 ) 2489 2490 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2491 conflict = self._match_text_seq("ON", "CONFLICT") 2492 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2493 2494 if not conflict and not duplicate: 2495 return None 2496 2497 conflict_keys = None 2498 constraint = None 2499 2500 if conflict: 2501 if self._match_text_seq("ON", "CONSTRAINT"): 2502 constraint = self._parse_id_var() 2503 elif self._match(TokenType.L_PAREN): 2504 conflict_keys = self._parse_csv(self._parse_id_var) 2505 self._match_r_paren() 2506 2507 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2508 if self._prev.token_type == TokenType.UPDATE: 2509 self._match(TokenType.SET) 2510 expressions = self._parse_csv(self._parse_equality) 2511 else: 2512 expressions = None 2513 2514 return self.expression( 2515 exp.OnConflict, 2516 duplicate=duplicate, 2517 expressions=expressions, 2518 action=action, 2519 conflict_keys=conflict_keys, 2520 constraint=constraint, 2521 ) 2522 2523 def _parse_returning(self) -> t.Optional[exp.Returning]: 2524 if not self._match(TokenType.RETURNING): 2525 return None 2526 return self.expression( 2527 exp.Returning, 2528 expressions=self._parse_csv(self._parse_expression), 2529 into=self._match(TokenType.INTO) and self._parse_table_part(), 2530 ) 2531 2532 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2533 if not self._match(TokenType.FORMAT): 2534 return None 2535 return self._parse_row_format() 2536 2537 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2538 index = self._index 2539 with_ = with_ or self._match_text_seq("WITH") 2540 2541 if not self._match(TokenType.SERDE_PROPERTIES): 2542 self._retreat(index) 2543 return None 2544 return self.expression( 2545 exp.SerdeProperties, 2546 **{ # type: ignore 2547 "expressions": self._parse_wrapped_properties(), 2548 "with": with_, 2549 }, 2550 ) 2551 2552 def _parse_row_format( 2553 self, match_row: bool = False 2554 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2555 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2556 return None 2557 2558 if self._match_text_seq("SERDE"): 2559 this = self._parse_string() 2560 2561 serde_properties = self._parse_serde_properties() 2562 2563 return self.expression( 2564 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2565 ) 2566 2567 self._match_text_seq("DELIMITED") 2568 2569 kwargs = {} 2570 2571 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2572 kwargs["fields"] = self._parse_string() 2573 if self._match_text_seq("ESCAPED", "BY"): 2574 kwargs["escaped"] = self._parse_string() 2575 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2576 kwargs["collection_items"] = self._parse_string() 2577 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2578 kwargs["map_keys"] = self._parse_string() 2579 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2580 kwargs["lines"] = self._parse_string() 2581 if self._match_text_seq("NULL", "DEFINED", "AS"): 2582 kwargs["null"] = self._parse_string() 2583 2584 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2585 2586 def _parse_load(self) -> exp.LoadData | exp.Command: 2587 if self._match_text_seq("DATA"): 2588 local = self._match_text_seq("LOCAL") 2589 self._match_text_seq("INPATH") 2590 inpath = self._parse_string() 2591 overwrite = self._match(TokenType.OVERWRITE) 2592 self._match_pair(TokenType.INTO, TokenType.TABLE) 2593 2594 return self.expression( 2595 exp.LoadData, 2596 this=self._parse_table(schema=True), 2597 local=local, 2598 overwrite=overwrite, 2599 inpath=inpath, 2600 partition=self._parse_partition(), 2601 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2602 serde=self._match_text_seq("SERDE") and self._parse_string(), 2603 ) 2604 return self._parse_as_command(self._prev) 2605 2606 def _parse_delete(self) -> exp.Delete: 2607 # This handles MySQL's "Multiple-Table Syntax" 2608 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2609 tables = None 2610 comments = self._prev_comments 2611 if not self._match(TokenType.FROM, advance=False): 2612 tables = self._parse_csv(self._parse_table) or None 2613 2614 returning = self._parse_returning() 2615 2616 return self.expression( 2617 exp.Delete, 2618 comments=comments, 2619 tables=tables, 2620 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2621 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2622 where=self._parse_where(), 2623 returning=returning or self._parse_returning(), 2624 limit=self._parse_limit(), 2625 ) 2626 2627 def _parse_update(self) -> exp.Update: 2628 comments = self._prev_comments 2629 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2630 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2631 returning = self._parse_returning() 2632 return self.expression( 2633 exp.Update, 2634 comments=comments, 2635 **{ # type: ignore 2636 "this": this, 2637 "expressions": expressions, 2638 "from": self._parse_from(joins=True), 2639 "where": self._parse_where(), 2640 "returning": returning or self._parse_returning(), 2641 "order": self._parse_order(), 2642 "limit": self._parse_limit(), 2643 }, 2644 ) 2645 2646 def _parse_uncache(self) -> exp.Uncache: 2647 if not self._match(TokenType.TABLE): 2648 self.raise_error("Expecting TABLE after UNCACHE") 2649 2650 return self.expression( 2651 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2652 ) 2653 2654 def _parse_cache(self) -> exp.Cache: 2655 lazy = self._match_text_seq("LAZY") 2656 self._match(TokenType.TABLE) 2657 table = self._parse_table(schema=True) 2658 2659 options = [] 2660 if self._match_text_seq("OPTIONS"): 2661 self._match_l_paren() 2662 k = self._parse_string() 2663 self._match(TokenType.EQ) 2664 v = self._parse_string() 2665 options = [k, v] 2666 self._match_r_paren() 2667 2668 self._match(TokenType.ALIAS) 2669 return self.expression( 2670 exp.Cache, 2671 this=table, 2672 lazy=lazy, 2673 options=options, 2674 expression=self._parse_select(nested=True), 2675 ) 2676 2677 def _parse_partition(self) -> t.Optional[exp.Partition]: 2678 if not self._match(TokenType.PARTITION): 2679 return None 2680 2681 return self.expression( 2682 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2683 ) 2684 2685 def _parse_value(self) -> t.Optional[exp.Tuple]: 2686 if self._match(TokenType.L_PAREN): 2687 expressions = self._parse_csv(self._parse_expression) 2688 self._match_r_paren() 2689 return self.expression(exp.Tuple, expressions=expressions) 2690 2691 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2692 expression = self._parse_expression() 2693 if expression: 2694 return self.expression(exp.Tuple, expressions=[expression]) 2695 return None 2696 2697 def _parse_projections(self) -> t.List[exp.Expression]: 2698 return self._parse_expressions() 2699 2700 def _parse_select( 2701 self, 2702 nested: bool = False, 2703 table: bool = False, 2704 parse_subquery_alias: bool = True, 2705 parse_set_operation: bool = True, 2706 ) -> t.Optional[exp.Expression]: 2707 cte = self._parse_with() 2708 2709 if cte: 2710 this = self._parse_statement() 2711 2712 if not this: 2713 self.raise_error("Failed to parse any statement following CTE") 2714 return cte 2715 2716 if "with" in this.arg_types: 2717 this.set("with", cte) 2718 else: 2719 self.raise_error(f"{this.key} does not support CTE") 2720 this = cte 2721 2722 return this 2723 2724 # duckdb supports leading with FROM x 2725 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2726 2727 if self._match(TokenType.SELECT): 2728 comments = self._prev_comments 2729 2730 hint = self._parse_hint() 2731 all_ = self._match(TokenType.ALL) 2732 distinct = self._match_set(self.DISTINCT_TOKENS) 2733 2734 kind = ( 2735 self._match(TokenType.ALIAS) 2736 and self._match_texts(("STRUCT", "VALUE")) 2737 and self._prev.text.upper() 2738 ) 2739 2740 if distinct: 2741 distinct = self.expression( 2742 exp.Distinct, 2743 on=self._parse_value() if self._match(TokenType.ON) else None, 2744 ) 2745 2746 if all_ and distinct: 2747 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2748 2749 limit = self._parse_limit(top=True) 2750 projections = self._parse_projections() 2751 2752 this = self.expression( 2753 exp.Select, 2754 kind=kind, 2755 hint=hint, 2756 distinct=distinct, 2757 expressions=projections, 2758 limit=limit, 2759 ) 2760 this.comments = comments 2761 2762 into = self._parse_into() 2763 if into: 2764 this.set("into", into) 2765 2766 if not from_: 2767 from_ = self._parse_from() 2768 2769 if from_: 2770 this.set("from", from_) 2771 2772 this = self._parse_query_modifiers(this) 2773 elif (table or nested) and self._match(TokenType.L_PAREN): 2774 if self._match(TokenType.PIVOT): 2775 this = self._parse_simplified_pivot() 2776 elif self._match(TokenType.FROM): 2777 this = exp.select("*").from_( 2778 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2779 ) 2780 else: 2781 this = ( 2782 self._parse_table() 2783 if table 2784 else self._parse_select(nested=True, parse_set_operation=False) 2785 ) 2786 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2787 2788 self._match_r_paren() 2789 2790 # We return early here so that the UNION isn't attached to the subquery by the 2791 # following call to _parse_set_operations, but instead becomes the parent node 2792 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2793 elif self._match(TokenType.VALUES, advance=False): 2794 this = self._parse_derived_table_values() 2795 elif from_: 2796 this = exp.select("*").from_(from_.this, copy=False) 2797 else: 2798 this = None 2799 2800 if parse_set_operation: 2801 return self._parse_set_operations(this) 2802 return this 2803 2804 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2805 if not skip_with_token and not self._match(TokenType.WITH): 2806 return None 2807 2808 comments = self._prev_comments 2809 recursive = self._match(TokenType.RECURSIVE) 2810 2811 expressions = [] 2812 while True: 2813 expressions.append(self._parse_cte()) 2814 2815 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2816 break 2817 else: 2818 self._match(TokenType.WITH) 2819 2820 return self.expression( 2821 exp.With, comments=comments, expressions=expressions, recursive=recursive 2822 ) 2823 2824 def _parse_cte(self) -> exp.CTE: 2825 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2826 if not alias or not alias.this: 2827 self.raise_error("Expected CTE to have alias") 2828 2829 self._match(TokenType.ALIAS) 2830 2831 if self._match_text_seq("NOT", "MATERIALIZED"): 2832 materialized = False 2833 elif self._match_text_seq("MATERIALIZED"): 2834 materialized = True 2835 else: 2836 materialized = None 2837 2838 return self.expression( 2839 exp.CTE, 2840 this=self._parse_wrapped(self._parse_statement), 2841 alias=alias, 2842 materialized=materialized, 2843 ) 2844 2845 def _parse_table_alias( 2846 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2847 ) -> t.Optional[exp.TableAlias]: 2848 any_token = self._match(TokenType.ALIAS) 2849 alias = ( 2850 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2851 or self._parse_string_as_identifier() 2852 ) 2853 2854 index = self._index 2855 if self._match(TokenType.L_PAREN): 2856 columns = self._parse_csv(self._parse_function_parameter) 2857 self._match_r_paren() if columns else self._retreat(index) 2858 else: 2859 columns = None 2860 2861 if not alias and not columns: 2862 return None 2863 2864 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2865 2866 # We bubble up comments from the Identifier to the TableAlias 2867 if isinstance(alias, exp.Identifier): 2868 table_alias.add_comments(alias.pop_comments()) 2869 2870 return table_alias 2871 2872 def _parse_subquery( 2873 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2874 ) -> t.Optional[exp.Subquery]: 2875 if not this: 2876 return None 2877 2878 return self.expression( 2879 exp.Subquery, 2880 this=this, 2881 pivots=self._parse_pivots(), 2882 alias=self._parse_table_alias() if parse_alias else None, 2883 ) 2884 2885 def _implicit_unnests_to_explicit(self, this: E) -> E: 2886 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2887 2888 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2889 for i, join in enumerate(this.args.get("joins") or []): 2890 table = join.this 2891 normalized_table = table.copy() 2892 normalized_table.meta["maybe_column"] = True 2893 normalized_table = _norm(normalized_table, dialect=self.dialect) 2894 2895 if isinstance(table, exp.Table) and not join.args.get("on"): 2896 if normalized_table.parts[0].name in refs: 2897 table_as_column = table.to_column() 2898 unnest = exp.Unnest(expressions=[table_as_column]) 2899 2900 # Table.to_column creates a parent Alias node that we want to convert to 2901 # a TableAlias and attach to the Unnest, so it matches the parser's output 2902 if isinstance(table.args.get("alias"), exp.TableAlias): 2903 table_as_column.replace(table_as_column.this) 2904 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2905 2906 table.replace(unnest) 2907 2908 refs.add(normalized_table.alias_or_name) 2909 2910 return this 2911 2912 def _parse_query_modifiers( 2913 self, this: t.Optional[exp.Expression] 2914 ) -> t.Optional[exp.Expression]: 2915 if isinstance(this, (exp.Query, exp.Table)): 2916 for join in self._parse_joins(): 2917 this.append("joins", join) 2918 for lateral in iter(self._parse_lateral, None): 2919 this.append("laterals", lateral) 2920 2921 while True: 2922 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2923 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2924 key, expression = parser(self) 2925 2926 if expression: 2927 this.set(key, expression) 2928 if key == "limit": 2929 offset = expression.args.pop("offset", None) 2930 2931 if offset: 2932 offset = exp.Offset(expression=offset) 2933 this.set("offset", offset) 2934 2935 limit_by_expressions = expression.expressions 2936 expression.set("expressions", None) 2937 offset.set("expressions", limit_by_expressions) 2938 continue 2939 break 2940 2941 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2942 this = self._implicit_unnests_to_explicit(this) 2943 2944 return this 2945 2946 def _parse_hint(self) -> t.Optional[exp.Hint]: 2947 if self._match(TokenType.HINT): 2948 hints = [] 2949 for hint in iter( 2950 lambda: self._parse_csv( 2951 lambda: self._parse_function() or self._parse_var(upper=True) 2952 ), 2953 [], 2954 ): 2955 hints.extend(hint) 2956 2957 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2958 self.raise_error("Expected */ after HINT") 2959 2960 return self.expression(exp.Hint, expressions=hints) 2961 2962 return None 2963 2964 def _parse_into(self) -> t.Optional[exp.Into]: 2965 if not self._match(TokenType.INTO): 2966 return None 2967 2968 temp = self._match(TokenType.TEMPORARY) 2969 unlogged = self._match_text_seq("UNLOGGED") 2970 self._match(TokenType.TABLE) 2971 2972 return self.expression( 2973 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2974 ) 2975 2976 def _parse_from( 2977 self, joins: bool = False, skip_from_token: bool = False 2978 ) -> t.Optional[exp.From]: 2979 if not skip_from_token and not self._match(TokenType.FROM): 2980 return None 2981 2982 return self.expression( 2983 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2984 ) 2985 2986 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2987 return self.expression( 2988 exp.MatchRecognizeMeasure, 2989 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2990 this=self._parse_expression(), 2991 ) 2992 2993 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2994 if not self._match(TokenType.MATCH_RECOGNIZE): 2995 return None 2996 2997 self._match_l_paren() 2998 2999 partition = self._parse_partition_by() 3000 order = self._parse_order() 3001 3002 measures = ( 3003 self._parse_csv(self._parse_match_recognize_measure) 3004 if self._match_text_seq("MEASURES") 3005 else None 3006 ) 3007 3008 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3009 rows = exp.var("ONE ROW PER MATCH") 3010 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3011 text = "ALL ROWS PER MATCH" 3012 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3013 text += " SHOW EMPTY MATCHES" 3014 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3015 text += " OMIT EMPTY MATCHES" 3016 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3017 text += " WITH UNMATCHED ROWS" 3018 rows = exp.var(text) 3019 else: 3020 rows = None 3021 3022 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3023 text = "AFTER MATCH SKIP" 3024 if self._match_text_seq("PAST", "LAST", "ROW"): 3025 text += " PAST LAST ROW" 3026 elif self._match_text_seq("TO", "NEXT", "ROW"): 3027 text += " TO NEXT ROW" 3028 elif self._match_text_seq("TO", "FIRST"): 3029 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3030 elif self._match_text_seq("TO", "LAST"): 3031 text += f" TO LAST {self._advance_any().text}" # type: ignore 3032 after = exp.var(text) 3033 else: 3034 after = None 3035 3036 if self._match_text_seq("PATTERN"): 3037 self._match_l_paren() 3038 3039 if not self._curr: 3040 self.raise_error("Expecting )", self._curr) 3041 3042 paren = 1 3043 start = self._curr 3044 3045 while self._curr and paren > 0: 3046 if self._curr.token_type == TokenType.L_PAREN: 3047 paren += 1 3048 if self._curr.token_type == TokenType.R_PAREN: 3049 paren -= 1 3050 3051 end = self._prev 3052 self._advance() 3053 3054 if paren > 0: 3055 self.raise_error("Expecting )", self._curr) 3056 3057 pattern = exp.var(self._find_sql(start, end)) 3058 else: 3059 pattern = None 3060 3061 define = ( 3062 self._parse_csv(self._parse_name_as_expression) 3063 if self._match_text_seq("DEFINE") 3064 else None 3065 ) 3066 3067 self._match_r_paren() 3068 3069 return self.expression( 3070 exp.MatchRecognize, 3071 partition_by=partition, 3072 order=order, 3073 measures=measures, 3074 rows=rows, 3075 after=after, 3076 pattern=pattern, 3077 define=define, 3078 alias=self._parse_table_alias(), 3079 ) 3080 3081 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3082 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3083 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3084 cross_apply = False 3085 3086 if cross_apply is not None: 3087 this = self._parse_select(table=True) 3088 view = None 3089 outer = None 3090 elif self._match(TokenType.LATERAL): 3091 this = self._parse_select(table=True) 3092 view = self._match(TokenType.VIEW) 3093 outer = self._match(TokenType.OUTER) 3094 else: 3095 return None 3096 3097 if not this: 3098 this = ( 3099 self._parse_unnest() 3100 or self._parse_function() 3101 or self._parse_id_var(any_token=False) 3102 ) 3103 3104 while self._match(TokenType.DOT): 3105 this = exp.Dot( 3106 this=this, 3107 expression=self._parse_function() or self._parse_id_var(any_token=False), 3108 ) 3109 3110 if view: 3111 table = self._parse_id_var(any_token=False) 3112 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3113 table_alias: t.Optional[exp.TableAlias] = self.expression( 3114 exp.TableAlias, this=table, columns=columns 3115 ) 3116 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3117 # We move the alias from the lateral's child node to the lateral itself 3118 table_alias = this.args["alias"].pop() 3119 else: 3120 table_alias = self._parse_table_alias() 3121 3122 return self.expression( 3123 exp.Lateral, 3124 this=this, 3125 view=view, 3126 outer=outer, 3127 alias=table_alias, 3128 cross_apply=cross_apply, 3129 ) 3130 3131 def _parse_join_parts( 3132 self, 3133 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3134 return ( 3135 self._match_set(self.JOIN_METHODS) and self._prev, 3136 self._match_set(self.JOIN_SIDES) and self._prev, 3137 self._match_set(self.JOIN_KINDS) and self._prev, 3138 ) 3139 3140 def _parse_join( 3141 self, skip_join_token: bool = False, parse_bracket: bool = False 3142 ) -> t.Optional[exp.Join]: 3143 if self._match(TokenType.COMMA): 3144 return self.expression(exp.Join, this=self._parse_table()) 3145 3146 index = self._index 3147 method, side, kind = self._parse_join_parts() 3148 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3149 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3150 3151 if not skip_join_token and not join: 3152 self._retreat(index) 3153 kind = None 3154 method = None 3155 side = None 3156 3157 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3158 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3159 3160 if not skip_join_token and not join and not outer_apply and not cross_apply: 3161 return None 3162 3163 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3164 3165 if method: 3166 kwargs["method"] = method.text 3167 if side: 3168 kwargs["side"] = side.text 3169 if kind: 3170 kwargs["kind"] = kind.text 3171 if hint: 3172 kwargs["hint"] = hint 3173 3174 if self._match(TokenType.MATCH_CONDITION): 3175 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3176 3177 if self._match(TokenType.ON): 3178 kwargs["on"] = self._parse_assignment() 3179 elif self._match(TokenType.USING): 3180 kwargs["using"] = self._parse_wrapped_id_vars() 3181 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3182 kind and kind.token_type == TokenType.CROSS 3183 ): 3184 index = self._index 3185 joins: t.Optional[list] = list(self._parse_joins()) 3186 3187 if joins and self._match(TokenType.ON): 3188 kwargs["on"] = self._parse_assignment() 3189 elif joins and self._match(TokenType.USING): 3190 kwargs["using"] = self._parse_wrapped_id_vars() 3191 else: 3192 joins = None 3193 self._retreat(index) 3194 3195 kwargs["this"].set("joins", joins if joins else None) 3196 3197 comments = [c for token in (method, side, kind) if token for c in token.comments] 3198 return self.expression(exp.Join, comments=comments, **kwargs) 3199 3200 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3201 this = self._parse_assignment() 3202 3203 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3204 return this 3205 3206 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3207 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3208 3209 return this 3210 3211 def _parse_index_params(self) -> exp.IndexParameters: 3212 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3213 3214 if self._match(TokenType.L_PAREN, advance=False): 3215 columns = self._parse_wrapped_csv(self._parse_with_operator) 3216 else: 3217 columns = None 3218 3219 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3220 partition_by = self._parse_partition_by() 3221 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3222 tablespace = ( 3223 self._parse_var(any_token=True) 3224 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3225 else None 3226 ) 3227 where = self._parse_where() 3228 3229 on = self._parse_field() if self._match(TokenType.ON) else None 3230 3231 return self.expression( 3232 exp.IndexParameters, 3233 using=using, 3234 columns=columns, 3235 include=include, 3236 partition_by=partition_by, 3237 where=where, 3238 with_storage=with_storage, 3239 tablespace=tablespace, 3240 on=on, 3241 ) 3242 3243 def _parse_index( 3244 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3245 ) -> t.Optional[exp.Index]: 3246 if index or anonymous: 3247 unique = None 3248 primary = None 3249 amp = None 3250 3251 self._match(TokenType.ON) 3252 self._match(TokenType.TABLE) # hive 3253 table = self._parse_table_parts(schema=True) 3254 else: 3255 unique = self._match(TokenType.UNIQUE) 3256 primary = self._match_text_seq("PRIMARY") 3257 amp = self._match_text_seq("AMP") 3258 3259 if not self._match(TokenType.INDEX): 3260 return None 3261 3262 index = self._parse_id_var() 3263 table = None 3264 3265 params = self._parse_index_params() 3266 3267 return self.expression( 3268 exp.Index, 3269 this=index, 3270 table=table, 3271 unique=unique, 3272 primary=primary, 3273 amp=amp, 3274 params=params, 3275 ) 3276 3277 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3278 hints: t.List[exp.Expression] = [] 3279 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3280 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3281 hints.append( 3282 self.expression( 3283 exp.WithTableHint, 3284 expressions=self._parse_csv( 3285 lambda: self._parse_function() or self._parse_var(any_token=True) 3286 ), 3287 ) 3288 ) 3289 self._match_r_paren() 3290 else: 3291 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3292 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3293 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3294 3295 self._match_set((TokenType.INDEX, TokenType.KEY)) 3296 if self._match(TokenType.FOR): 3297 hint.set("target", self._advance_any() and self._prev.text.upper()) 3298 3299 hint.set("expressions", self._parse_wrapped_id_vars()) 3300 hints.append(hint) 3301 3302 return hints or None 3303 3304 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3305 return ( 3306 (not schema and self._parse_function(optional_parens=False)) 3307 or self._parse_id_var(any_token=False) 3308 or self._parse_string_as_identifier() 3309 or self._parse_placeholder() 3310 ) 3311 3312 def _parse_table_parts( 3313 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3314 ) -> exp.Table: 3315 catalog = None 3316 db = None 3317 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3318 3319 while self._match(TokenType.DOT): 3320 if catalog: 3321 # This allows nesting the table in arbitrarily many dot expressions if needed 3322 table = self.expression( 3323 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3324 ) 3325 else: 3326 catalog = db 3327 db = table 3328 # "" used for tsql FROM a..b case 3329 table = self._parse_table_part(schema=schema) or "" 3330 3331 if ( 3332 wildcard 3333 and self._is_connected() 3334 and (isinstance(table, exp.Identifier) or not table) 3335 and self._match(TokenType.STAR) 3336 ): 3337 if isinstance(table, exp.Identifier): 3338 table.args["this"] += "*" 3339 else: 3340 table = exp.Identifier(this="*") 3341 3342 # We bubble up comments from the Identifier to the Table 3343 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3344 3345 if is_db_reference: 3346 catalog = db 3347 db = table 3348 table = None 3349 3350 if not table and not is_db_reference: 3351 self.raise_error(f"Expected table name but got {self._curr}") 3352 if not db and is_db_reference: 3353 self.raise_error(f"Expected database name but got {self._curr}") 3354 3355 return self.expression( 3356 exp.Table, 3357 comments=comments, 3358 this=table, 3359 db=db, 3360 catalog=catalog, 3361 pivots=self._parse_pivots(), 3362 ) 3363 3364 def _parse_table( 3365 self, 3366 schema: bool = False, 3367 joins: bool = False, 3368 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3369 parse_bracket: bool = False, 3370 is_db_reference: bool = False, 3371 parse_partition: bool = False, 3372 ) -> t.Optional[exp.Expression]: 3373 lateral = self._parse_lateral() 3374 if lateral: 3375 return lateral 3376 3377 unnest = self._parse_unnest() 3378 if unnest: 3379 return unnest 3380 3381 values = self._parse_derived_table_values() 3382 if values: 3383 return values 3384 3385 subquery = self._parse_select(table=True) 3386 if subquery: 3387 if not subquery.args.get("pivots"): 3388 subquery.set("pivots", self._parse_pivots()) 3389 return subquery 3390 3391 bracket = parse_bracket and self._parse_bracket(None) 3392 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3393 3394 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3395 self._parse_table 3396 ) 3397 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3398 3399 only = self._match(TokenType.ONLY) 3400 3401 this = t.cast( 3402 exp.Expression, 3403 bracket 3404 or rows_from 3405 or self._parse_bracket( 3406 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3407 ), 3408 ) 3409 3410 if only: 3411 this.set("only", only) 3412 3413 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3414 self._match_text_seq("*") 3415 3416 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3417 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3418 this.set("partition", self._parse_partition()) 3419 3420 if schema: 3421 return self._parse_schema(this=this) 3422 3423 version = self._parse_version() 3424 3425 if version: 3426 this.set("version", version) 3427 3428 if self.dialect.ALIAS_POST_TABLESAMPLE: 3429 table_sample = self._parse_table_sample() 3430 3431 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3432 if alias: 3433 this.set("alias", alias) 3434 3435 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3436 return self.expression( 3437 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3438 ) 3439 3440 this.set("hints", self._parse_table_hints()) 3441 3442 if not this.args.get("pivots"): 3443 this.set("pivots", self._parse_pivots()) 3444 3445 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3446 table_sample = self._parse_table_sample() 3447 3448 if table_sample: 3449 table_sample.set("this", this) 3450 this = table_sample 3451 3452 if joins: 3453 for join in self._parse_joins(): 3454 this.append("joins", join) 3455 3456 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3457 this.set("ordinality", True) 3458 this.set("alias", self._parse_table_alias()) 3459 3460 return this 3461 3462 def _parse_version(self) -> t.Optional[exp.Version]: 3463 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3464 this = "TIMESTAMP" 3465 elif self._match(TokenType.VERSION_SNAPSHOT): 3466 this = "VERSION" 3467 else: 3468 return None 3469 3470 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3471 kind = self._prev.text.upper() 3472 start = self._parse_bitwise() 3473 self._match_texts(("TO", "AND")) 3474 end = self._parse_bitwise() 3475 expression: t.Optional[exp.Expression] = self.expression( 3476 exp.Tuple, expressions=[start, end] 3477 ) 3478 elif self._match_text_seq("CONTAINED", "IN"): 3479 kind = "CONTAINED IN" 3480 expression = self.expression( 3481 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3482 ) 3483 elif self._match(TokenType.ALL): 3484 kind = "ALL" 3485 expression = None 3486 else: 3487 self._match_text_seq("AS", "OF") 3488 kind = "AS OF" 3489 expression = self._parse_type() 3490 3491 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3492 3493 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3494 if not self._match(TokenType.UNNEST): 3495 return None 3496 3497 expressions = self._parse_wrapped_csv(self._parse_equality) 3498 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3499 3500 alias = self._parse_table_alias() if with_alias else None 3501 3502 if alias: 3503 if self.dialect.UNNEST_COLUMN_ONLY: 3504 if alias.args.get("columns"): 3505 self.raise_error("Unexpected extra column alias in unnest.") 3506 3507 alias.set("columns", [alias.this]) 3508 alias.set("this", None) 3509 3510 columns = alias.args.get("columns") or [] 3511 if offset and len(expressions) < len(columns): 3512 offset = columns.pop() 3513 3514 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3515 self._match(TokenType.ALIAS) 3516 offset = self._parse_id_var( 3517 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3518 ) or exp.to_identifier("offset") 3519 3520 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3521 3522 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3523 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3524 if not is_derived and not self._match_text_seq("VALUES"): 3525 return None 3526 3527 expressions = self._parse_csv(self._parse_value) 3528 alias = self._parse_table_alias() 3529 3530 if is_derived: 3531 self._match_r_paren() 3532 3533 return self.expression( 3534 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3535 ) 3536 3537 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3538 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3539 as_modifier and self._match_text_seq("USING", "SAMPLE") 3540 ): 3541 return None 3542 3543 bucket_numerator = None 3544 bucket_denominator = None 3545 bucket_field = None 3546 percent = None 3547 size = None 3548 seed = None 3549 3550 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3551 matched_l_paren = self._match(TokenType.L_PAREN) 3552 3553 if self.TABLESAMPLE_CSV: 3554 num = None 3555 expressions = self._parse_csv(self._parse_primary) 3556 else: 3557 expressions = None 3558 num = ( 3559 self._parse_factor() 3560 if self._match(TokenType.NUMBER, advance=False) 3561 else self._parse_primary() or self._parse_placeholder() 3562 ) 3563 3564 if self._match_text_seq("BUCKET"): 3565 bucket_numerator = self._parse_number() 3566 self._match_text_seq("OUT", "OF") 3567 bucket_denominator = bucket_denominator = self._parse_number() 3568 self._match(TokenType.ON) 3569 bucket_field = self._parse_field() 3570 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3571 percent = num 3572 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3573 size = num 3574 else: 3575 percent = num 3576 3577 if matched_l_paren: 3578 self._match_r_paren() 3579 3580 if self._match(TokenType.L_PAREN): 3581 method = self._parse_var(upper=True) 3582 seed = self._match(TokenType.COMMA) and self._parse_number() 3583 self._match_r_paren() 3584 elif self._match_texts(("SEED", "REPEATABLE")): 3585 seed = self._parse_wrapped(self._parse_number) 3586 3587 if not method and self.DEFAULT_SAMPLING_METHOD: 3588 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3589 3590 return self.expression( 3591 exp.TableSample, 3592 expressions=expressions, 3593 method=method, 3594 bucket_numerator=bucket_numerator, 3595 bucket_denominator=bucket_denominator, 3596 bucket_field=bucket_field, 3597 percent=percent, 3598 size=size, 3599 seed=seed, 3600 ) 3601 3602 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3603 return list(iter(self._parse_pivot, None)) or None 3604 3605 def _parse_joins(self) -> t.Iterator[exp.Join]: 3606 return iter(self._parse_join, None) 3607 3608 # https://duckdb.org/docs/sql/statements/pivot 3609 def _parse_simplified_pivot(self) -> exp.Pivot: 3610 def _parse_on() -> t.Optional[exp.Expression]: 3611 this = self._parse_bitwise() 3612 return self._parse_in(this) if self._match(TokenType.IN) else this 3613 3614 this = self._parse_table() 3615 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3616 using = self._match(TokenType.USING) and self._parse_csv( 3617 lambda: self._parse_alias(self._parse_function()) 3618 ) 3619 group = self._parse_group() 3620 return self.expression( 3621 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3622 ) 3623 3624 def _parse_pivot_in(self) -> exp.In: 3625 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3626 this = self._parse_assignment() 3627 3628 self._match(TokenType.ALIAS) 3629 alias = self._parse_field() 3630 if alias: 3631 return self.expression(exp.PivotAlias, this=this, alias=alias) 3632 3633 return this 3634 3635 value = self._parse_column() 3636 3637 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3638 self.raise_error("Expecting IN (") 3639 3640 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3641 3642 self._match_r_paren() 3643 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3644 3645 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3646 index = self._index 3647 include_nulls = None 3648 3649 if self._match(TokenType.PIVOT): 3650 unpivot = False 3651 elif self._match(TokenType.UNPIVOT): 3652 unpivot = True 3653 3654 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3655 if self._match_text_seq("INCLUDE", "NULLS"): 3656 include_nulls = True 3657 elif self._match_text_seq("EXCLUDE", "NULLS"): 3658 include_nulls = False 3659 else: 3660 return None 3661 3662 expressions = [] 3663 3664 if not self._match(TokenType.L_PAREN): 3665 self._retreat(index) 3666 return None 3667 3668 if unpivot: 3669 expressions = self._parse_csv(self._parse_column) 3670 else: 3671 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3672 3673 if not expressions: 3674 self.raise_error("Failed to parse PIVOT's aggregation list") 3675 3676 if not self._match(TokenType.FOR): 3677 self.raise_error("Expecting FOR") 3678 3679 field = self._parse_pivot_in() 3680 3681 self._match_r_paren() 3682 3683 pivot = self.expression( 3684 exp.Pivot, 3685 expressions=expressions, 3686 field=field, 3687 unpivot=unpivot, 3688 include_nulls=include_nulls, 3689 ) 3690 3691 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3692 pivot.set("alias", self._parse_table_alias()) 3693 3694 if not unpivot: 3695 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3696 3697 columns: t.List[exp.Expression] = [] 3698 for fld in pivot.args["field"].expressions: 3699 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3700 for name in names: 3701 if self.PREFIXED_PIVOT_COLUMNS: 3702 name = f"{name}_{field_name}" if name else field_name 3703 else: 3704 name = f"{field_name}_{name}" if name else field_name 3705 3706 columns.append(exp.to_identifier(name)) 3707 3708 pivot.set("columns", columns) 3709 3710 return pivot 3711 3712 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3713 return [agg.alias for agg in aggregations] 3714 3715 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3716 if not skip_where_token and not self._match(TokenType.PREWHERE): 3717 return None 3718 3719 return self.expression( 3720 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3721 ) 3722 3723 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3724 if not skip_where_token and not self._match(TokenType.WHERE): 3725 return None 3726 3727 return self.expression( 3728 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3729 ) 3730 3731 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3732 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3733 return None 3734 3735 elements: t.Dict[str, t.Any] = defaultdict(list) 3736 3737 if self._match(TokenType.ALL): 3738 elements["all"] = True 3739 elif self._match(TokenType.DISTINCT): 3740 elements["all"] = False 3741 3742 while True: 3743 expressions = self._parse_csv( 3744 lambda: None 3745 if self._match(TokenType.ROLLUP, advance=False) 3746 else self._parse_assignment() 3747 ) 3748 if expressions: 3749 elements["expressions"].extend(expressions) 3750 3751 grouping_sets = self._parse_grouping_sets() 3752 if grouping_sets: 3753 elements["grouping_sets"].extend(grouping_sets) 3754 3755 rollup = None 3756 cube = None 3757 totals = None 3758 3759 index = self._index 3760 with_ = self._match(TokenType.WITH) 3761 if self._match(TokenType.ROLLUP): 3762 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3763 elements["rollup"].extend(ensure_list(rollup)) 3764 3765 if self._match(TokenType.CUBE): 3766 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3767 elements["cube"].extend(ensure_list(cube)) 3768 3769 if self._match_text_seq("TOTALS"): 3770 totals = True 3771 elements["totals"] = True # type: ignore 3772 3773 if not (grouping_sets or rollup or cube or totals): 3774 if with_: 3775 self._retreat(index) 3776 break 3777 3778 return self.expression(exp.Group, **elements) # type: ignore 3779 3780 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3781 if not self._match(TokenType.GROUPING_SETS): 3782 return None 3783 3784 return self._parse_wrapped_csv(self._parse_grouping_set) 3785 3786 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3787 if self._match(TokenType.L_PAREN): 3788 grouping_set = self._parse_csv(self._parse_column) 3789 self._match_r_paren() 3790 return self.expression(exp.Tuple, expressions=grouping_set) 3791 3792 return self._parse_column() 3793 3794 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3795 if not skip_having_token and not self._match(TokenType.HAVING): 3796 return None 3797 return self.expression(exp.Having, this=self._parse_assignment()) 3798 3799 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3800 if not self._match(TokenType.QUALIFY): 3801 return None 3802 return self.expression(exp.Qualify, this=self._parse_assignment()) 3803 3804 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3805 if skip_start_token: 3806 start = None 3807 elif self._match(TokenType.START_WITH): 3808 start = self._parse_assignment() 3809 else: 3810 return None 3811 3812 self._match(TokenType.CONNECT_BY) 3813 nocycle = self._match_text_seq("NOCYCLE") 3814 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3815 exp.Prior, this=self._parse_bitwise() 3816 ) 3817 connect = self._parse_assignment() 3818 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3819 3820 if not start and self._match(TokenType.START_WITH): 3821 start = self._parse_assignment() 3822 3823 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3824 3825 def _parse_name_as_expression(self) -> exp.Alias: 3826 return self.expression( 3827 exp.Alias, 3828 alias=self._parse_id_var(any_token=True), 3829 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3830 ) 3831 3832 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3833 if self._match_text_seq("INTERPOLATE"): 3834 return self._parse_wrapped_csv(self._parse_name_as_expression) 3835 return None 3836 3837 def _parse_order( 3838 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3839 ) -> t.Optional[exp.Expression]: 3840 siblings = None 3841 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3842 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3843 return this 3844 3845 siblings = True 3846 3847 return self.expression( 3848 exp.Order, 3849 this=this, 3850 expressions=self._parse_csv(self._parse_ordered), 3851 interpolate=self._parse_interpolate(), 3852 siblings=siblings, 3853 ) 3854 3855 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3856 if not self._match(token): 3857 return None 3858 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3859 3860 def _parse_ordered( 3861 self, parse_method: t.Optional[t.Callable] = None 3862 ) -> t.Optional[exp.Ordered]: 3863 this = parse_method() if parse_method else self._parse_assignment() 3864 if not this: 3865 return None 3866 3867 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 3868 this = exp.var("ALL") 3869 3870 asc = self._match(TokenType.ASC) 3871 desc = self._match(TokenType.DESC) or (asc and False) 3872 3873 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3874 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3875 3876 nulls_first = is_nulls_first or False 3877 explicitly_null_ordered = is_nulls_first or is_nulls_last 3878 3879 if ( 3880 not explicitly_null_ordered 3881 and ( 3882 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3883 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3884 ) 3885 and self.dialect.NULL_ORDERING != "nulls_are_last" 3886 ): 3887 nulls_first = True 3888 3889 if self._match_text_seq("WITH", "FILL"): 3890 with_fill = self.expression( 3891 exp.WithFill, 3892 **{ # type: ignore 3893 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3894 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3895 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3896 }, 3897 ) 3898 else: 3899 with_fill = None 3900 3901 return self.expression( 3902 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3903 ) 3904 3905 def _parse_limit( 3906 self, 3907 this: t.Optional[exp.Expression] = None, 3908 top: bool = False, 3909 skip_limit_token: bool = False, 3910 ) -> t.Optional[exp.Expression]: 3911 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3912 comments = self._prev_comments 3913 if top: 3914 limit_paren = self._match(TokenType.L_PAREN) 3915 expression = self._parse_term() if limit_paren else self._parse_number() 3916 3917 if limit_paren: 3918 self._match_r_paren() 3919 else: 3920 expression = self._parse_term() 3921 3922 if self._match(TokenType.COMMA): 3923 offset = expression 3924 expression = self._parse_term() 3925 else: 3926 offset = None 3927 3928 limit_exp = self.expression( 3929 exp.Limit, 3930 this=this, 3931 expression=expression, 3932 offset=offset, 3933 comments=comments, 3934 expressions=self._parse_limit_by(), 3935 ) 3936 3937 return limit_exp 3938 3939 if self._match(TokenType.FETCH): 3940 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3941 direction = self._prev.text.upper() if direction else "FIRST" 3942 3943 count = self._parse_field(tokens=self.FETCH_TOKENS) 3944 percent = self._match(TokenType.PERCENT) 3945 3946 self._match_set((TokenType.ROW, TokenType.ROWS)) 3947 3948 only = self._match_text_seq("ONLY") 3949 with_ties = self._match_text_seq("WITH", "TIES") 3950 3951 if only and with_ties: 3952 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3953 3954 return self.expression( 3955 exp.Fetch, 3956 direction=direction, 3957 count=count, 3958 percent=percent, 3959 with_ties=with_ties, 3960 ) 3961 3962 return this 3963 3964 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3965 if not self._match(TokenType.OFFSET): 3966 return this 3967 3968 count = self._parse_term() 3969 self._match_set((TokenType.ROW, TokenType.ROWS)) 3970 3971 return self.expression( 3972 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3973 ) 3974 3975 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3976 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3977 3978 def _parse_locks(self) -> t.List[exp.Lock]: 3979 locks = [] 3980 while True: 3981 if self._match_text_seq("FOR", "UPDATE"): 3982 update = True 3983 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3984 "LOCK", "IN", "SHARE", "MODE" 3985 ): 3986 update = False 3987 else: 3988 break 3989 3990 expressions = None 3991 if self._match_text_seq("OF"): 3992 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3993 3994 wait: t.Optional[bool | exp.Expression] = None 3995 if self._match_text_seq("NOWAIT"): 3996 wait = True 3997 elif self._match_text_seq("WAIT"): 3998 wait = self._parse_primary() 3999 elif self._match_text_seq("SKIP", "LOCKED"): 4000 wait = False 4001 4002 locks.append( 4003 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4004 ) 4005 4006 return locks 4007 4008 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4009 while this and self._match_set(self.SET_OPERATIONS): 4010 token_type = self._prev.token_type 4011 4012 if token_type == TokenType.UNION: 4013 operation: t.Type[exp.SetOperation] = exp.Union 4014 elif token_type == TokenType.EXCEPT: 4015 operation = exp.Except 4016 else: 4017 operation = exp.Intersect 4018 4019 comments = self._prev.comments 4020 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4021 by_name = self._match_text_seq("BY", "NAME") 4022 expression = self._parse_select(nested=True, parse_set_operation=False) 4023 4024 this = self.expression( 4025 operation, 4026 comments=comments, 4027 this=this, 4028 distinct=distinct, 4029 by_name=by_name, 4030 expression=expression, 4031 ) 4032 4033 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4034 expression = this.expression 4035 4036 if expression: 4037 for arg in self.SET_OP_MODIFIERS: 4038 expr = expression.args.get(arg) 4039 if expr: 4040 this.set(arg, expr.pop()) 4041 4042 return this 4043 4044 def _parse_expression(self) -> t.Optional[exp.Expression]: 4045 return self._parse_alias(self._parse_assignment()) 4046 4047 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4048 this = self._parse_disjunction() 4049 4050 while self._match_set(self.ASSIGNMENT): 4051 this = self.expression( 4052 self.ASSIGNMENT[self._prev.token_type], 4053 this=this, 4054 comments=self._prev_comments, 4055 expression=self._parse_assignment(), 4056 ) 4057 4058 return this 4059 4060 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4061 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4062 4063 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4064 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4065 4066 def _parse_equality(self) -> t.Optional[exp.Expression]: 4067 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4068 4069 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4070 return self._parse_tokens(self._parse_range, self.COMPARISON) 4071 4072 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4073 this = this or self._parse_bitwise() 4074 negate = self._match(TokenType.NOT) 4075 4076 if self._match_set(self.RANGE_PARSERS): 4077 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4078 if not expression: 4079 return this 4080 4081 this = expression 4082 elif self._match(TokenType.ISNULL): 4083 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4084 4085 # Postgres supports ISNULL and NOTNULL for conditions. 4086 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4087 if self._match(TokenType.NOTNULL): 4088 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4089 this = self.expression(exp.Not, this=this) 4090 4091 if negate: 4092 this = self.expression(exp.Not, this=this) 4093 4094 if self._match(TokenType.IS): 4095 this = self._parse_is(this) 4096 4097 return this 4098 4099 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4100 index = self._index - 1 4101 negate = self._match(TokenType.NOT) 4102 4103 if self._match_text_seq("DISTINCT", "FROM"): 4104 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4105 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4106 4107 expression = self._parse_null() or self._parse_boolean() 4108 if not expression: 4109 self._retreat(index) 4110 return None 4111 4112 this = self.expression(exp.Is, this=this, expression=expression) 4113 return self.expression(exp.Not, this=this) if negate else this 4114 4115 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4116 unnest = self._parse_unnest(with_alias=False) 4117 if unnest: 4118 this = self.expression(exp.In, this=this, unnest=unnest) 4119 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4120 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4121 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4122 4123 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4124 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4125 else: 4126 this = self.expression(exp.In, this=this, expressions=expressions) 4127 4128 if matched_l_paren: 4129 self._match_r_paren(this) 4130 elif not self._match(TokenType.R_BRACKET, expression=this): 4131 self.raise_error("Expecting ]") 4132 else: 4133 this = self.expression(exp.In, this=this, field=self._parse_field()) 4134 4135 return this 4136 4137 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4138 low = self._parse_bitwise() 4139 self._match(TokenType.AND) 4140 high = self._parse_bitwise() 4141 return self.expression(exp.Between, this=this, low=low, high=high) 4142 4143 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4144 if not self._match(TokenType.ESCAPE): 4145 return this 4146 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4147 4148 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4149 index = self._index 4150 4151 if not self._match(TokenType.INTERVAL) and match_interval: 4152 return None 4153 4154 if self._match(TokenType.STRING, advance=False): 4155 this = self._parse_primary() 4156 else: 4157 this = self._parse_term() 4158 4159 if not this or ( 4160 isinstance(this, exp.Column) 4161 and not this.table 4162 and not this.this.quoted 4163 and this.name.upper() == "IS" 4164 ): 4165 self._retreat(index) 4166 return None 4167 4168 unit = self._parse_function() or ( 4169 not self._match(TokenType.ALIAS, advance=False) 4170 and self._parse_var(any_token=True, upper=True) 4171 ) 4172 4173 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4174 # each INTERVAL expression into this canonical form so it's easy to transpile 4175 if this and this.is_number: 4176 this = exp.Literal.string(this.to_py()) 4177 elif this and this.is_string: 4178 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4179 if len(parts) == 1: 4180 if unit: 4181 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4182 self._retreat(self._index - 1) 4183 4184 this = exp.Literal.string(parts[0][0]) 4185 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4186 4187 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4188 unit = self.expression( 4189 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4190 ) 4191 4192 interval = self.expression(exp.Interval, this=this, unit=unit) 4193 4194 index = self._index 4195 self._match(TokenType.PLUS) 4196 4197 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4198 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4199 return self.expression( 4200 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4201 ) 4202 4203 self._retreat(index) 4204 return interval 4205 4206 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4207 this = self._parse_term() 4208 4209 while True: 4210 if self._match_set(self.BITWISE): 4211 this = self.expression( 4212 self.BITWISE[self._prev.token_type], 4213 this=this, 4214 expression=self._parse_term(), 4215 ) 4216 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4217 this = self.expression( 4218 exp.DPipe, 4219 this=this, 4220 expression=self._parse_term(), 4221 safe=not self.dialect.STRICT_STRING_CONCAT, 4222 ) 4223 elif self._match(TokenType.DQMARK): 4224 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4225 elif self._match_pair(TokenType.LT, TokenType.LT): 4226 this = self.expression( 4227 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4228 ) 4229 elif self._match_pair(TokenType.GT, TokenType.GT): 4230 this = self.expression( 4231 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4232 ) 4233 else: 4234 break 4235 4236 return this 4237 4238 def _parse_term(self) -> t.Optional[exp.Expression]: 4239 return self._parse_tokens(self._parse_factor, self.TERM) 4240 4241 def _parse_factor(self) -> t.Optional[exp.Expression]: 4242 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4243 this = parse_method() 4244 4245 while self._match_set(self.FACTOR): 4246 klass = self.FACTOR[self._prev.token_type] 4247 comments = self._prev_comments 4248 expression = parse_method() 4249 4250 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4251 self._retreat(self._index - 1) 4252 return this 4253 4254 this = self.expression(klass, this=this, comments=comments, expression=expression) 4255 4256 if isinstance(this, exp.Div): 4257 this.args["typed"] = self.dialect.TYPED_DIVISION 4258 this.args["safe"] = self.dialect.SAFE_DIVISION 4259 4260 return this 4261 4262 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4263 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4264 4265 def _parse_unary(self) -> t.Optional[exp.Expression]: 4266 if self._match_set(self.UNARY_PARSERS): 4267 return self.UNARY_PARSERS[self._prev.token_type](self) 4268 return self._parse_at_time_zone(self._parse_type()) 4269 4270 def _parse_type( 4271 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4272 ) -> t.Optional[exp.Expression]: 4273 interval = parse_interval and self._parse_interval() 4274 if interval: 4275 return interval 4276 4277 index = self._index 4278 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4279 4280 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4281 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4282 if isinstance(data_type, exp.Cast): 4283 # This constructor can contain ops directly after it, for instance struct unnesting: 4284 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4285 return self._parse_column_ops(data_type) 4286 4287 if data_type: 4288 index2 = self._index 4289 this = self._parse_primary() 4290 4291 if isinstance(this, exp.Literal): 4292 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4293 if parser: 4294 return parser(self, this, data_type) 4295 4296 return self.expression(exp.Cast, this=this, to=data_type) 4297 4298 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4299 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4300 # 4301 # If the index difference here is greater than 1, that means the parser itself must have 4302 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4303 # 4304 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4305 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4306 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4307 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4308 # 4309 # In these cases, we don't really want to return the converted type, but instead retreat 4310 # and try to parse a Column or Identifier in the section below. 4311 if data_type.expressions and index2 - index > 1: 4312 self._retreat(index2) 4313 return self._parse_column_ops(data_type) 4314 4315 self._retreat(index) 4316 4317 if fallback_to_identifier: 4318 return self._parse_id_var() 4319 4320 this = self._parse_column() 4321 return this and self._parse_column_ops(this) 4322 4323 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4324 this = self._parse_type() 4325 if not this: 4326 return None 4327 4328 if isinstance(this, exp.Column) and not this.table: 4329 this = exp.var(this.name.upper()) 4330 4331 return self.expression( 4332 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4333 ) 4334 4335 def _parse_types( 4336 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4337 ) -> t.Optional[exp.Expression]: 4338 index = self._index 4339 4340 this: t.Optional[exp.Expression] = None 4341 prefix = self._match_text_seq("SYSUDTLIB", ".") 4342 4343 if not self._match_set(self.TYPE_TOKENS): 4344 identifier = allow_identifiers and self._parse_id_var( 4345 any_token=False, tokens=(TokenType.VAR,) 4346 ) 4347 if isinstance(identifier, exp.Identifier): 4348 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4349 4350 if len(tokens) != 1: 4351 self.raise_error("Unexpected identifier", self._prev) 4352 4353 if tokens[0].token_type in self.TYPE_TOKENS: 4354 self._prev = tokens[0] 4355 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4356 type_name = identifier.name 4357 4358 while self._match(TokenType.DOT): 4359 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4360 4361 this = exp.DataType.build(type_name, udt=True) 4362 else: 4363 self._retreat(self._index - 1) 4364 return None 4365 else: 4366 return None 4367 4368 type_token = self._prev.token_type 4369 4370 if type_token == TokenType.PSEUDO_TYPE: 4371 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4372 4373 if type_token == TokenType.OBJECT_IDENTIFIER: 4374 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4375 4376 # https://materialize.com/docs/sql/types/map/ 4377 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4378 key_type = self._parse_types( 4379 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4380 ) 4381 if not self._match(TokenType.FARROW): 4382 self._retreat(index) 4383 return None 4384 4385 value_type = self._parse_types( 4386 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4387 ) 4388 if not self._match(TokenType.R_BRACKET): 4389 self._retreat(index) 4390 return None 4391 4392 return exp.DataType( 4393 this=exp.DataType.Type.MAP, 4394 expressions=[key_type, value_type], 4395 nested=True, 4396 prefix=prefix, 4397 ) 4398 4399 nested = type_token in self.NESTED_TYPE_TOKENS 4400 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4401 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4402 expressions = None 4403 maybe_func = False 4404 4405 if self._match(TokenType.L_PAREN): 4406 if is_struct: 4407 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4408 elif nested: 4409 expressions = self._parse_csv( 4410 lambda: self._parse_types( 4411 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4412 ) 4413 ) 4414 elif type_token in self.ENUM_TYPE_TOKENS: 4415 expressions = self._parse_csv(self._parse_equality) 4416 elif is_aggregate: 4417 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4418 any_token=False, tokens=(TokenType.VAR,) 4419 ) 4420 if not func_or_ident or not self._match(TokenType.COMMA): 4421 return None 4422 expressions = self._parse_csv( 4423 lambda: self._parse_types( 4424 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4425 ) 4426 ) 4427 expressions.insert(0, func_or_ident) 4428 else: 4429 expressions = self._parse_csv(self._parse_type_size) 4430 4431 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4432 if type_token == TokenType.VECTOR and len(expressions) == 2: 4433 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4434 4435 if not expressions or not self._match(TokenType.R_PAREN): 4436 self._retreat(index) 4437 return None 4438 4439 maybe_func = True 4440 4441 values: t.Optional[t.List[exp.Expression]] = None 4442 4443 if nested and self._match(TokenType.LT): 4444 if is_struct: 4445 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4446 else: 4447 expressions = self._parse_csv( 4448 lambda: self._parse_types( 4449 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4450 ) 4451 ) 4452 4453 if not self._match(TokenType.GT): 4454 self.raise_error("Expecting >") 4455 4456 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4457 values = self._parse_csv(self._parse_assignment) 4458 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4459 4460 if type_token in self.TIMESTAMPS: 4461 if self._match_text_seq("WITH", "TIME", "ZONE"): 4462 maybe_func = False 4463 tz_type = ( 4464 exp.DataType.Type.TIMETZ 4465 if type_token in self.TIMES 4466 else exp.DataType.Type.TIMESTAMPTZ 4467 ) 4468 this = exp.DataType(this=tz_type, expressions=expressions) 4469 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4470 maybe_func = False 4471 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4472 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4473 maybe_func = False 4474 elif type_token == TokenType.INTERVAL: 4475 unit = self._parse_var(upper=True) 4476 if unit: 4477 if self._match_text_seq("TO"): 4478 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4479 4480 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4481 else: 4482 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4483 4484 if maybe_func and check_func: 4485 index2 = self._index 4486 peek = self._parse_string() 4487 4488 if not peek: 4489 self._retreat(index) 4490 return None 4491 4492 self._retreat(index2) 4493 4494 if not this: 4495 if self._match_text_seq("UNSIGNED"): 4496 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4497 if not unsigned_type_token: 4498 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4499 4500 type_token = unsigned_type_token or type_token 4501 4502 this = exp.DataType( 4503 this=exp.DataType.Type[type_token.value], 4504 expressions=expressions, 4505 nested=nested, 4506 prefix=prefix, 4507 ) 4508 4509 # Empty arrays/structs are allowed 4510 if values is not None: 4511 cls = exp.Struct if is_struct else exp.Array 4512 this = exp.cast(cls(expressions=values), this, copy=False) 4513 4514 elif expressions: 4515 this.set("expressions", expressions) 4516 4517 # https://materialize.com/docs/sql/types/list/#type-name 4518 while self._match(TokenType.LIST): 4519 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4520 4521 index = self._index 4522 4523 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4524 matched_array = self._match(TokenType.ARRAY) 4525 4526 while self._curr: 4527 matched_l_bracket = self._match(TokenType.L_BRACKET) 4528 if not matched_l_bracket and not matched_array: 4529 break 4530 4531 matched_array = False 4532 values = self._parse_csv(self._parse_assignment) or None 4533 if values and not schema: 4534 self._retreat(index) 4535 break 4536 4537 this = exp.DataType( 4538 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4539 ) 4540 self._match(TokenType.R_BRACKET) 4541 4542 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4543 converter = self.TYPE_CONVERTERS.get(this.this) 4544 if converter: 4545 this = converter(t.cast(exp.DataType, this)) 4546 4547 return this 4548 4549 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4550 index = self._index 4551 4552 if ( 4553 self._curr 4554 and self._next 4555 and self._curr.token_type in self.TYPE_TOKENS 4556 and self._next.token_type in self.TYPE_TOKENS 4557 ): 4558 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4559 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4560 this = self._parse_id_var() 4561 else: 4562 this = ( 4563 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4564 or self._parse_id_var() 4565 ) 4566 4567 self._match(TokenType.COLON) 4568 4569 if ( 4570 type_required 4571 and not isinstance(this, exp.DataType) 4572 and not self._match_set(self.TYPE_TOKENS, advance=False) 4573 ): 4574 self._retreat(index) 4575 return self._parse_types() 4576 4577 return self._parse_column_def(this) 4578 4579 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4580 if not self._match_text_seq("AT", "TIME", "ZONE"): 4581 return this 4582 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4583 4584 def _parse_column(self) -> t.Optional[exp.Expression]: 4585 this = self._parse_column_reference() 4586 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4587 4588 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4589 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4590 4591 return column 4592 4593 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4594 this = self._parse_field() 4595 if ( 4596 not this 4597 and self._match(TokenType.VALUES, advance=False) 4598 and self.VALUES_FOLLOWED_BY_PAREN 4599 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4600 ): 4601 this = self._parse_id_var() 4602 4603 if isinstance(this, exp.Identifier): 4604 # We bubble up comments from the Identifier to the Column 4605 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4606 4607 return this 4608 4609 def _parse_colon_as_variant_extract( 4610 self, this: t.Optional[exp.Expression] 4611 ) -> t.Optional[exp.Expression]: 4612 casts = [] 4613 json_path = [] 4614 4615 while self._match(TokenType.COLON): 4616 start_index = self._index 4617 4618 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4619 path = self._parse_column_ops( 4620 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4621 ) 4622 4623 # The cast :: operator has a lower precedence than the extraction operator :, so 4624 # we rearrange the AST appropriately to avoid casting the JSON path 4625 while isinstance(path, exp.Cast): 4626 casts.append(path.to) 4627 path = path.this 4628 4629 if casts: 4630 dcolon_offset = next( 4631 i 4632 for i, t in enumerate(self._tokens[start_index:]) 4633 if t.token_type == TokenType.DCOLON 4634 ) 4635 end_token = self._tokens[start_index + dcolon_offset - 1] 4636 else: 4637 end_token = self._prev 4638 4639 if path: 4640 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4641 4642 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4643 # Databricks transforms it back to the colon/dot notation 4644 if json_path: 4645 this = self.expression( 4646 exp.JSONExtract, 4647 this=this, 4648 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4649 variant_extract=True, 4650 ) 4651 4652 while casts: 4653 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4654 4655 return this 4656 4657 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4658 return self._parse_types() 4659 4660 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4661 this = self._parse_bracket(this) 4662 4663 while self._match_set(self.COLUMN_OPERATORS): 4664 op_token = self._prev.token_type 4665 op = self.COLUMN_OPERATORS.get(op_token) 4666 4667 if op_token == TokenType.DCOLON: 4668 field = self._parse_dcolon() 4669 if not field: 4670 self.raise_error("Expected type") 4671 elif op and self._curr: 4672 field = self._parse_column_reference() 4673 else: 4674 field = self._parse_field(any_token=True, anonymous_func=True) 4675 4676 if isinstance(field, exp.Func) and this: 4677 # bigquery allows function calls like x.y.count(...) 4678 # SAFE.SUBSTR(...) 4679 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4680 this = exp.replace_tree( 4681 this, 4682 lambda n: ( 4683 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4684 if n.table 4685 else n.this 4686 ) 4687 if isinstance(n, exp.Column) 4688 else n, 4689 ) 4690 4691 if op: 4692 this = op(self, this, field) 4693 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4694 this = self.expression( 4695 exp.Column, 4696 this=field, 4697 table=this.this, 4698 db=this.args.get("table"), 4699 catalog=this.args.get("db"), 4700 ) 4701 else: 4702 this = self.expression(exp.Dot, this=this, expression=field) 4703 4704 this = self._parse_bracket(this) 4705 4706 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4707 4708 def _parse_primary(self) -> t.Optional[exp.Expression]: 4709 if self._match_set(self.PRIMARY_PARSERS): 4710 token_type = self._prev.token_type 4711 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4712 4713 if token_type == TokenType.STRING: 4714 expressions = [primary] 4715 while self._match(TokenType.STRING): 4716 expressions.append(exp.Literal.string(self._prev.text)) 4717 4718 if len(expressions) > 1: 4719 return self.expression(exp.Concat, expressions=expressions) 4720 4721 return primary 4722 4723 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4724 return exp.Literal.number(f"0.{self._prev.text}") 4725 4726 if self._match(TokenType.L_PAREN): 4727 comments = self._prev_comments 4728 query = self._parse_select() 4729 4730 if query: 4731 expressions = [query] 4732 else: 4733 expressions = self._parse_expressions() 4734 4735 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4736 4737 if not this and self._match(TokenType.R_PAREN, advance=False): 4738 this = self.expression(exp.Tuple) 4739 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4740 this = self._parse_subquery(this=this, parse_alias=False) 4741 elif isinstance(this, exp.Subquery): 4742 this = self._parse_subquery( 4743 this=self._parse_set_operations(this), parse_alias=False 4744 ) 4745 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4746 this = self.expression(exp.Tuple, expressions=expressions) 4747 else: 4748 this = self.expression(exp.Paren, this=this) 4749 4750 if this: 4751 this.add_comments(comments) 4752 4753 self._match_r_paren(expression=this) 4754 return this 4755 4756 return None 4757 4758 def _parse_field( 4759 self, 4760 any_token: bool = False, 4761 tokens: t.Optional[t.Collection[TokenType]] = None, 4762 anonymous_func: bool = False, 4763 ) -> t.Optional[exp.Expression]: 4764 if anonymous_func: 4765 field = ( 4766 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4767 or self._parse_primary() 4768 ) 4769 else: 4770 field = self._parse_primary() or self._parse_function( 4771 anonymous=anonymous_func, any_token=any_token 4772 ) 4773 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4774 4775 def _parse_function( 4776 self, 4777 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4778 anonymous: bool = False, 4779 optional_parens: bool = True, 4780 any_token: bool = False, 4781 ) -> t.Optional[exp.Expression]: 4782 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4783 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4784 fn_syntax = False 4785 if ( 4786 self._match(TokenType.L_BRACE, advance=False) 4787 and self._next 4788 and self._next.text.upper() == "FN" 4789 ): 4790 self._advance(2) 4791 fn_syntax = True 4792 4793 func = self._parse_function_call( 4794 functions=functions, 4795 anonymous=anonymous, 4796 optional_parens=optional_parens, 4797 any_token=any_token, 4798 ) 4799 4800 if fn_syntax: 4801 self._match(TokenType.R_BRACE) 4802 4803 return func 4804 4805 def _parse_function_call( 4806 self, 4807 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4808 anonymous: bool = False, 4809 optional_parens: bool = True, 4810 any_token: bool = False, 4811 ) -> t.Optional[exp.Expression]: 4812 if not self._curr: 4813 return None 4814 4815 comments = self._curr.comments 4816 token_type = self._curr.token_type 4817 this = self._curr.text 4818 upper = this.upper() 4819 4820 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4821 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4822 self._advance() 4823 return self._parse_window(parser(self)) 4824 4825 if not self._next or self._next.token_type != TokenType.L_PAREN: 4826 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4827 self._advance() 4828 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4829 4830 return None 4831 4832 if any_token: 4833 if token_type in self.RESERVED_TOKENS: 4834 return None 4835 elif token_type not in self.FUNC_TOKENS: 4836 return None 4837 4838 self._advance(2) 4839 4840 parser = self.FUNCTION_PARSERS.get(upper) 4841 if parser and not anonymous: 4842 this = parser(self) 4843 else: 4844 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4845 4846 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4847 this = self.expression(subquery_predicate, this=self._parse_select()) 4848 self._match_r_paren() 4849 return this 4850 4851 if functions is None: 4852 functions = self.FUNCTIONS 4853 4854 function = functions.get(upper) 4855 4856 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4857 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4858 4859 if alias: 4860 args = self._kv_to_prop_eq(args) 4861 4862 if function and not anonymous: 4863 if "dialect" in function.__code__.co_varnames: 4864 func = function(args, dialect=self.dialect) 4865 else: 4866 func = function(args) 4867 4868 func = self.validate_expression(func, args) 4869 if not self.dialect.NORMALIZE_FUNCTIONS: 4870 func.meta["name"] = this 4871 4872 this = func 4873 else: 4874 if token_type == TokenType.IDENTIFIER: 4875 this = exp.Identifier(this=this, quoted=True) 4876 this = self.expression(exp.Anonymous, this=this, expressions=args) 4877 4878 if isinstance(this, exp.Expression): 4879 this.add_comments(comments) 4880 4881 self._match_r_paren(this) 4882 return self._parse_window(this) 4883 4884 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4885 transformed = [] 4886 4887 for e in expressions: 4888 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4889 if isinstance(e, exp.Alias): 4890 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4891 4892 if not isinstance(e, exp.PropertyEQ): 4893 e = self.expression( 4894 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4895 ) 4896 4897 if isinstance(e.this, exp.Column): 4898 e.this.replace(e.this.this) 4899 4900 transformed.append(e) 4901 4902 return transformed 4903 4904 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4905 return self._parse_column_def(self._parse_id_var()) 4906 4907 def _parse_user_defined_function( 4908 self, kind: t.Optional[TokenType] = None 4909 ) -> t.Optional[exp.Expression]: 4910 this = self._parse_id_var() 4911 4912 while self._match(TokenType.DOT): 4913 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4914 4915 if not self._match(TokenType.L_PAREN): 4916 return this 4917 4918 expressions = self._parse_csv(self._parse_function_parameter) 4919 self._match_r_paren() 4920 return self.expression( 4921 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4922 ) 4923 4924 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4925 literal = self._parse_primary() 4926 if literal: 4927 return self.expression(exp.Introducer, this=token.text, expression=literal) 4928 4929 return self.expression(exp.Identifier, this=token.text) 4930 4931 def _parse_session_parameter(self) -> exp.SessionParameter: 4932 kind = None 4933 this = self._parse_id_var() or self._parse_primary() 4934 4935 if this and self._match(TokenType.DOT): 4936 kind = this.name 4937 this = self._parse_var() or self._parse_primary() 4938 4939 return self.expression(exp.SessionParameter, this=this, kind=kind) 4940 4941 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4942 return self._parse_id_var() 4943 4944 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4945 index = self._index 4946 4947 if self._match(TokenType.L_PAREN): 4948 expressions = t.cast( 4949 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4950 ) 4951 4952 if not self._match(TokenType.R_PAREN): 4953 self._retreat(index) 4954 else: 4955 expressions = [self._parse_lambda_arg()] 4956 4957 if self._match_set(self.LAMBDAS): 4958 return self.LAMBDAS[self._prev.token_type](self, expressions) 4959 4960 self._retreat(index) 4961 4962 this: t.Optional[exp.Expression] 4963 4964 if self._match(TokenType.DISTINCT): 4965 this = self.expression( 4966 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4967 ) 4968 else: 4969 this = self._parse_select_or_expression(alias=alias) 4970 4971 return self._parse_limit( 4972 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4973 ) 4974 4975 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4976 index = self._index 4977 if not self._match(TokenType.L_PAREN): 4978 return this 4979 4980 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4981 # expr can be of both types 4982 if self._match_set(self.SELECT_START_TOKENS): 4983 self._retreat(index) 4984 return this 4985 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4986 self._match_r_paren() 4987 return self.expression(exp.Schema, this=this, expressions=args) 4988 4989 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4990 return self._parse_column_def(self._parse_field(any_token=True)) 4991 4992 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4993 # column defs are not really columns, they're identifiers 4994 if isinstance(this, exp.Column): 4995 this = this.this 4996 4997 kind = self._parse_types(schema=True) 4998 4999 if self._match_text_seq("FOR", "ORDINALITY"): 5000 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5001 5002 constraints: t.List[exp.Expression] = [] 5003 5004 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5005 ("ALIAS", "MATERIALIZED") 5006 ): 5007 persisted = self._prev.text.upper() == "MATERIALIZED" 5008 constraints.append( 5009 self.expression( 5010 exp.ComputedColumnConstraint, 5011 this=self._parse_assignment(), 5012 persisted=persisted or self._match_text_seq("PERSISTED"), 5013 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5014 ) 5015 ) 5016 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5017 self._match(TokenType.ALIAS) 5018 constraints.append( 5019 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5020 ) 5021 5022 while True: 5023 constraint = self._parse_column_constraint() 5024 if not constraint: 5025 break 5026 constraints.append(constraint) 5027 5028 if not kind and not constraints: 5029 return this 5030 5031 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5032 5033 def _parse_auto_increment( 5034 self, 5035 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5036 start = None 5037 increment = None 5038 5039 if self._match(TokenType.L_PAREN, advance=False): 5040 args = self._parse_wrapped_csv(self._parse_bitwise) 5041 start = seq_get(args, 0) 5042 increment = seq_get(args, 1) 5043 elif self._match_text_seq("START"): 5044 start = self._parse_bitwise() 5045 self._match_text_seq("INCREMENT") 5046 increment = self._parse_bitwise() 5047 5048 if start and increment: 5049 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5050 5051 return exp.AutoIncrementColumnConstraint() 5052 5053 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5054 if not self._match_text_seq("REFRESH"): 5055 self._retreat(self._index - 1) 5056 return None 5057 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5058 5059 def _parse_compress(self) -> exp.CompressColumnConstraint: 5060 if self._match(TokenType.L_PAREN, advance=False): 5061 return self.expression( 5062 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5063 ) 5064 5065 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5066 5067 def _parse_generated_as_identity( 5068 self, 5069 ) -> ( 5070 exp.GeneratedAsIdentityColumnConstraint 5071 | exp.ComputedColumnConstraint 5072 | exp.GeneratedAsRowColumnConstraint 5073 ): 5074 if self._match_text_seq("BY", "DEFAULT"): 5075 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5076 this = self.expression( 5077 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5078 ) 5079 else: 5080 self._match_text_seq("ALWAYS") 5081 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5082 5083 self._match(TokenType.ALIAS) 5084 5085 if self._match_text_seq("ROW"): 5086 start = self._match_text_seq("START") 5087 if not start: 5088 self._match(TokenType.END) 5089 hidden = self._match_text_seq("HIDDEN") 5090 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5091 5092 identity = self._match_text_seq("IDENTITY") 5093 5094 if self._match(TokenType.L_PAREN): 5095 if self._match(TokenType.START_WITH): 5096 this.set("start", self._parse_bitwise()) 5097 if self._match_text_seq("INCREMENT", "BY"): 5098 this.set("increment", self._parse_bitwise()) 5099 if self._match_text_seq("MINVALUE"): 5100 this.set("minvalue", self._parse_bitwise()) 5101 if self._match_text_seq("MAXVALUE"): 5102 this.set("maxvalue", self._parse_bitwise()) 5103 5104 if self._match_text_seq("CYCLE"): 5105 this.set("cycle", True) 5106 elif self._match_text_seq("NO", "CYCLE"): 5107 this.set("cycle", False) 5108 5109 if not identity: 5110 this.set("expression", self._parse_range()) 5111 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5112 args = self._parse_csv(self._parse_bitwise) 5113 this.set("start", seq_get(args, 0)) 5114 this.set("increment", seq_get(args, 1)) 5115 5116 self._match_r_paren() 5117 5118 return this 5119 5120 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5121 self._match_text_seq("LENGTH") 5122 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5123 5124 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5125 if self._match_text_seq("NULL"): 5126 return self.expression(exp.NotNullColumnConstraint) 5127 if self._match_text_seq("CASESPECIFIC"): 5128 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5129 if self._match_text_seq("FOR", "REPLICATION"): 5130 return self.expression(exp.NotForReplicationColumnConstraint) 5131 return None 5132 5133 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5134 if self._match(TokenType.CONSTRAINT): 5135 this = self._parse_id_var() 5136 else: 5137 this = None 5138 5139 if self._match_texts(self.CONSTRAINT_PARSERS): 5140 return self.expression( 5141 exp.ColumnConstraint, 5142 this=this, 5143 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5144 ) 5145 5146 return this 5147 5148 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5149 if not self._match(TokenType.CONSTRAINT): 5150 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5151 5152 return self.expression( 5153 exp.Constraint, 5154 this=self._parse_id_var(), 5155 expressions=self._parse_unnamed_constraints(), 5156 ) 5157 5158 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5159 constraints = [] 5160 while True: 5161 constraint = self._parse_unnamed_constraint() or self._parse_function() 5162 if not constraint: 5163 break 5164 constraints.append(constraint) 5165 5166 return constraints 5167 5168 def _parse_unnamed_constraint( 5169 self, constraints: t.Optional[t.Collection[str]] = None 5170 ) -> t.Optional[exp.Expression]: 5171 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5172 constraints or self.CONSTRAINT_PARSERS 5173 ): 5174 return None 5175 5176 constraint = self._prev.text.upper() 5177 if constraint not in self.CONSTRAINT_PARSERS: 5178 self.raise_error(f"No parser found for schema constraint {constraint}.") 5179 5180 return self.CONSTRAINT_PARSERS[constraint](self) 5181 5182 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5183 self._match_text_seq("KEY") 5184 return self.expression( 5185 exp.UniqueColumnConstraint, 5186 this=self._parse_schema(self._parse_id_var(any_token=False)), 5187 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5188 on_conflict=self._parse_on_conflict(), 5189 ) 5190 5191 def _parse_key_constraint_options(self) -> t.List[str]: 5192 options = [] 5193 while True: 5194 if not self._curr: 5195 break 5196 5197 if self._match(TokenType.ON): 5198 action = None 5199 on = self._advance_any() and self._prev.text 5200 5201 if self._match_text_seq("NO", "ACTION"): 5202 action = "NO ACTION" 5203 elif self._match_text_seq("CASCADE"): 5204 action = "CASCADE" 5205 elif self._match_text_seq("RESTRICT"): 5206 action = "RESTRICT" 5207 elif self._match_pair(TokenType.SET, TokenType.NULL): 5208 action = "SET NULL" 5209 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5210 action = "SET DEFAULT" 5211 else: 5212 self.raise_error("Invalid key constraint") 5213 5214 options.append(f"ON {on} {action}") 5215 elif self._match_text_seq("NOT", "ENFORCED"): 5216 options.append("NOT ENFORCED") 5217 elif self._match_text_seq("DEFERRABLE"): 5218 options.append("DEFERRABLE") 5219 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5220 options.append("INITIALLY DEFERRED") 5221 elif self._match_text_seq("NORELY"): 5222 options.append("NORELY") 5223 elif self._match_text_seq("MATCH", "FULL"): 5224 options.append("MATCH FULL") 5225 else: 5226 break 5227 5228 return options 5229 5230 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5231 if match and not self._match(TokenType.REFERENCES): 5232 return None 5233 5234 expressions = None 5235 this = self._parse_table(schema=True) 5236 options = self._parse_key_constraint_options() 5237 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5238 5239 def _parse_foreign_key(self) -> exp.ForeignKey: 5240 expressions = self._parse_wrapped_id_vars() 5241 reference = self._parse_references() 5242 options = {} 5243 5244 while self._match(TokenType.ON): 5245 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5246 self.raise_error("Expected DELETE or UPDATE") 5247 5248 kind = self._prev.text.lower() 5249 5250 if self._match_text_seq("NO", "ACTION"): 5251 action = "NO ACTION" 5252 elif self._match(TokenType.SET): 5253 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5254 action = "SET " + self._prev.text.upper() 5255 else: 5256 self._advance() 5257 action = self._prev.text.upper() 5258 5259 options[kind] = action 5260 5261 return self.expression( 5262 exp.ForeignKey, 5263 expressions=expressions, 5264 reference=reference, 5265 **options, # type: ignore 5266 ) 5267 5268 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5269 return self._parse_field() 5270 5271 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5272 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5273 self._retreat(self._index - 1) 5274 return None 5275 5276 id_vars = self._parse_wrapped_id_vars() 5277 return self.expression( 5278 exp.PeriodForSystemTimeConstraint, 5279 this=seq_get(id_vars, 0), 5280 expression=seq_get(id_vars, 1), 5281 ) 5282 5283 def _parse_primary_key( 5284 self, wrapped_optional: bool = False, in_props: bool = False 5285 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5286 desc = ( 5287 self._match_set((TokenType.ASC, TokenType.DESC)) 5288 and self._prev.token_type == TokenType.DESC 5289 ) 5290 5291 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5292 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5293 5294 expressions = self._parse_wrapped_csv( 5295 self._parse_primary_key_part, optional=wrapped_optional 5296 ) 5297 options = self._parse_key_constraint_options() 5298 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5299 5300 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5301 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5302 5303 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5304 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5305 return this 5306 5307 bracket_kind = self._prev.token_type 5308 expressions = self._parse_csv( 5309 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5310 ) 5311 5312 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5313 self.raise_error("Expected ]") 5314 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5315 self.raise_error("Expected }") 5316 5317 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5318 if bracket_kind == TokenType.L_BRACE: 5319 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5320 elif not this: 5321 this = self.expression(exp.Array, expressions=expressions) 5322 else: 5323 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5324 if constructor_type: 5325 return self.expression(constructor_type, expressions=expressions) 5326 5327 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5328 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5329 5330 self._add_comments(this) 5331 return self._parse_bracket(this) 5332 5333 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5334 if self._match(TokenType.COLON): 5335 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5336 return this 5337 5338 def _parse_case(self) -> t.Optional[exp.Expression]: 5339 ifs = [] 5340 default = None 5341 5342 comments = self._prev_comments 5343 expression = self._parse_assignment() 5344 5345 while self._match(TokenType.WHEN): 5346 this = self._parse_assignment() 5347 self._match(TokenType.THEN) 5348 then = self._parse_assignment() 5349 ifs.append(self.expression(exp.If, this=this, true=then)) 5350 5351 if self._match(TokenType.ELSE): 5352 default = self._parse_assignment() 5353 5354 if not self._match(TokenType.END): 5355 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5356 default = exp.column("interval") 5357 else: 5358 self.raise_error("Expected END after CASE", self._prev) 5359 5360 return self.expression( 5361 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5362 ) 5363 5364 def _parse_if(self) -> t.Optional[exp.Expression]: 5365 if self._match(TokenType.L_PAREN): 5366 args = self._parse_csv(self._parse_assignment) 5367 this = self.validate_expression(exp.If.from_arg_list(args), args) 5368 self._match_r_paren() 5369 else: 5370 index = self._index - 1 5371 5372 if self.NO_PAREN_IF_COMMANDS and index == 0: 5373 return self._parse_as_command(self._prev) 5374 5375 condition = self._parse_assignment() 5376 5377 if not condition: 5378 self._retreat(index) 5379 return None 5380 5381 self._match(TokenType.THEN) 5382 true = self._parse_assignment() 5383 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5384 self._match(TokenType.END) 5385 this = self.expression(exp.If, this=condition, true=true, false=false) 5386 5387 return this 5388 5389 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5390 if not self._match_text_seq("VALUE", "FOR"): 5391 self._retreat(self._index - 1) 5392 return None 5393 5394 return self.expression( 5395 exp.NextValueFor, 5396 this=self._parse_column(), 5397 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5398 ) 5399 5400 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5401 this = self._parse_function() or self._parse_var_or_string(upper=True) 5402 5403 if self._match(TokenType.FROM): 5404 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5405 5406 if not self._match(TokenType.COMMA): 5407 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5408 5409 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5410 5411 def _parse_gap_fill(self) -> exp.GapFill: 5412 self._match(TokenType.TABLE) 5413 this = self._parse_table() 5414 5415 self._match(TokenType.COMMA) 5416 args = [this, *self._parse_csv(self._parse_lambda)] 5417 5418 gap_fill = exp.GapFill.from_arg_list(args) 5419 return self.validate_expression(gap_fill, args) 5420 5421 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5422 this = self._parse_assignment() 5423 5424 if not self._match(TokenType.ALIAS): 5425 if self._match(TokenType.COMMA): 5426 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5427 5428 self.raise_error("Expected AS after CAST") 5429 5430 fmt = None 5431 to = self._parse_types() 5432 5433 if self._match(TokenType.FORMAT): 5434 fmt_string = self._parse_string() 5435 fmt = self._parse_at_time_zone(fmt_string) 5436 5437 if not to: 5438 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5439 if to.this in exp.DataType.TEMPORAL_TYPES: 5440 this = self.expression( 5441 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5442 this=this, 5443 format=exp.Literal.string( 5444 format_time( 5445 fmt_string.this if fmt_string else "", 5446 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5447 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5448 ) 5449 ), 5450 safe=safe, 5451 ) 5452 5453 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5454 this.set("zone", fmt.args["zone"]) 5455 return this 5456 elif not to: 5457 self.raise_error("Expected TYPE after CAST") 5458 elif isinstance(to, exp.Identifier): 5459 to = exp.DataType.build(to.name, udt=True) 5460 elif to.this == exp.DataType.Type.CHAR: 5461 if self._match(TokenType.CHARACTER_SET): 5462 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5463 5464 return self.expression( 5465 exp.Cast if strict else exp.TryCast, 5466 this=this, 5467 to=to, 5468 format=fmt, 5469 safe=safe, 5470 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5471 ) 5472 5473 def _parse_string_agg(self) -> exp.Expression: 5474 if self._match(TokenType.DISTINCT): 5475 args: t.List[t.Optional[exp.Expression]] = [ 5476 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5477 ] 5478 if self._match(TokenType.COMMA): 5479 args.extend(self._parse_csv(self._parse_assignment)) 5480 else: 5481 args = self._parse_csv(self._parse_assignment) # type: ignore 5482 5483 index = self._index 5484 if not self._match(TokenType.R_PAREN) and args: 5485 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5486 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5487 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5488 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5489 5490 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5491 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5492 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5493 if not self._match_text_seq("WITHIN", "GROUP"): 5494 self._retreat(index) 5495 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5496 5497 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5498 order = self._parse_order(this=seq_get(args, 0)) 5499 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5500 5501 def _parse_convert( 5502 self, strict: bool, safe: t.Optional[bool] = None 5503 ) -> t.Optional[exp.Expression]: 5504 this = self._parse_bitwise() 5505 5506 if self._match(TokenType.USING): 5507 to: t.Optional[exp.Expression] = self.expression( 5508 exp.CharacterSet, this=self._parse_var() 5509 ) 5510 elif self._match(TokenType.COMMA): 5511 to = self._parse_types() 5512 else: 5513 to = None 5514 5515 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5516 5517 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5518 """ 5519 There are generally two variants of the DECODE function: 5520 5521 - DECODE(bin, charset) 5522 - DECODE(expression, search, result [, search, result] ... [, default]) 5523 5524 The second variant will always be parsed into a CASE expression. Note that NULL 5525 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5526 instead of relying on pattern matching. 5527 """ 5528 args = self._parse_csv(self._parse_assignment) 5529 5530 if len(args) < 3: 5531 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5532 5533 expression, *expressions = args 5534 if not expression: 5535 return None 5536 5537 ifs = [] 5538 for search, result in zip(expressions[::2], expressions[1::2]): 5539 if not search or not result: 5540 return None 5541 5542 if isinstance(search, exp.Literal): 5543 ifs.append( 5544 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5545 ) 5546 elif isinstance(search, exp.Null): 5547 ifs.append( 5548 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5549 ) 5550 else: 5551 cond = exp.or_( 5552 exp.EQ(this=expression.copy(), expression=search), 5553 exp.and_( 5554 exp.Is(this=expression.copy(), expression=exp.Null()), 5555 exp.Is(this=search.copy(), expression=exp.Null()), 5556 copy=False, 5557 ), 5558 copy=False, 5559 ) 5560 ifs.append(exp.If(this=cond, true=result)) 5561 5562 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5563 5564 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5565 self._match_text_seq("KEY") 5566 key = self._parse_column() 5567 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5568 self._match_text_seq("VALUE") 5569 value = self._parse_bitwise() 5570 5571 if not key and not value: 5572 return None 5573 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5574 5575 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5576 if not this or not self._match_text_seq("FORMAT", "JSON"): 5577 return this 5578 5579 return self.expression(exp.FormatJson, this=this) 5580 5581 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5582 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5583 for value in values: 5584 if self._match_text_seq(value, "ON", on): 5585 return f"{value} ON {on}" 5586 5587 return None 5588 5589 @t.overload 5590 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5591 5592 @t.overload 5593 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5594 5595 def _parse_json_object(self, agg=False): 5596 star = self._parse_star() 5597 expressions = ( 5598 [star] 5599 if star 5600 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5601 ) 5602 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5603 5604 unique_keys = None 5605 if self._match_text_seq("WITH", "UNIQUE"): 5606 unique_keys = True 5607 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5608 unique_keys = False 5609 5610 self._match_text_seq("KEYS") 5611 5612 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5613 self._parse_type() 5614 ) 5615 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5616 5617 return self.expression( 5618 exp.JSONObjectAgg if agg else exp.JSONObject, 5619 expressions=expressions, 5620 null_handling=null_handling, 5621 unique_keys=unique_keys, 5622 return_type=return_type, 5623 encoding=encoding, 5624 ) 5625 5626 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5627 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5628 if not self._match_text_seq("NESTED"): 5629 this = self._parse_id_var() 5630 kind = self._parse_types(allow_identifiers=False) 5631 nested = None 5632 else: 5633 this = None 5634 kind = None 5635 nested = True 5636 5637 path = self._match_text_seq("PATH") and self._parse_string() 5638 nested_schema = nested and self._parse_json_schema() 5639 5640 return self.expression( 5641 exp.JSONColumnDef, 5642 this=this, 5643 kind=kind, 5644 path=path, 5645 nested_schema=nested_schema, 5646 ) 5647 5648 def _parse_json_schema(self) -> exp.JSONSchema: 5649 self._match_text_seq("COLUMNS") 5650 return self.expression( 5651 exp.JSONSchema, 5652 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5653 ) 5654 5655 def _parse_json_table(self) -> exp.JSONTable: 5656 this = self._parse_format_json(self._parse_bitwise()) 5657 path = self._match(TokenType.COMMA) and self._parse_string() 5658 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5659 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5660 schema = self._parse_json_schema() 5661 5662 return exp.JSONTable( 5663 this=this, 5664 schema=schema, 5665 path=path, 5666 error_handling=error_handling, 5667 empty_handling=empty_handling, 5668 ) 5669 5670 def _parse_match_against(self) -> exp.MatchAgainst: 5671 expressions = self._parse_csv(self._parse_column) 5672 5673 self._match_text_seq(")", "AGAINST", "(") 5674 5675 this = self._parse_string() 5676 5677 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5678 modifier = "IN NATURAL LANGUAGE MODE" 5679 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5680 modifier = f"{modifier} WITH QUERY EXPANSION" 5681 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5682 modifier = "IN BOOLEAN MODE" 5683 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5684 modifier = "WITH QUERY EXPANSION" 5685 else: 5686 modifier = None 5687 5688 return self.expression( 5689 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5690 ) 5691 5692 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5693 def _parse_open_json(self) -> exp.OpenJSON: 5694 this = self._parse_bitwise() 5695 path = self._match(TokenType.COMMA) and self._parse_string() 5696 5697 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5698 this = self._parse_field(any_token=True) 5699 kind = self._parse_types() 5700 path = self._parse_string() 5701 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5702 5703 return self.expression( 5704 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5705 ) 5706 5707 expressions = None 5708 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5709 self._match_l_paren() 5710 expressions = self._parse_csv(_parse_open_json_column_def) 5711 5712 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5713 5714 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5715 args = self._parse_csv(self._parse_bitwise) 5716 5717 if self._match(TokenType.IN): 5718 return self.expression( 5719 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5720 ) 5721 5722 if haystack_first: 5723 haystack = seq_get(args, 0) 5724 needle = seq_get(args, 1) 5725 else: 5726 needle = seq_get(args, 0) 5727 haystack = seq_get(args, 1) 5728 5729 return self.expression( 5730 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5731 ) 5732 5733 def _parse_predict(self) -> exp.Predict: 5734 self._match_text_seq("MODEL") 5735 this = self._parse_table() 5736 5737 self._match(TokenType.COMMA) 5738 self._match_text_seq("TABLE") 5739 5740 return self.expression( 5741 exp.Predict, 5742 this=this, 5743 expression=self._parse_table(), 5744 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5745 ) 5746 5747 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5748 args = self._parse_csv(self._parse_table) 5749 return exp.JoinHint(this=func_name.upper(), expressions=args) 5750 5751 def _parse_substring(self) -> exp.Substring: 5752 # Postgres supports the form: substring(string [from int] [for int]) 5753 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5754 5755 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5756 5757 if self._match(TokenType.FROM): 5758 args.append(self._parse_bitwise()) 5759 if self._match(TokenType.FOR): 5760 if len(args) == 1: 5761 args.append(exp.Literal.number(1)) 5762 args.append(self._parse_bitwise()) 5763 5764 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5765 5766 def _parse_trim(self) -> exp.Trim: 5767 # https://www.w3resource.com/sql/character-functions/trim.php 5768 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5769 5770 position = None 5771 collation = None 5772 expression = None 5773 5774 if self._match_texts(self.TRIM_TYPES): 5775 position = self._prev.text.upper() 5776 5777 this = self._parse_bitwise() 5778 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5779 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5780 expression = self._parse_bitwise() 5781 5782 if invert_order: 5783 this, expression = expression, this 5784 5785 if self._match(TokenType.COLLATE): 5786 collation = self._parse_bitwise() 5787 5788 return self.expression( 5789 exp.Trim, this=this, position=position, expression=expression, collation=collation 5790 ) 5791 5792 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5793 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5794 5795 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5796 return self._parse_window(self._parse_id_var(), alias=True) 5797 5798 def _parse_respect_or_ignore_nulls( 5799 self, this: t.Optional[exp.Expression] 5800 ) -> t.Optional[exp.Expression]: 5801 if self._match_text_seq("IGNORE", "NULLS"): 5802 return self.expression(exp.IgnoreNulls, this=this) 5803 if self._match_text_seq("RESPECT", "NULLS"): 5804 return self.expression(exp.RespectNulls, this=this) 5805 return this 5806 5807 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5808 if self._match(TokenType.HAVING): 5809 self._match_texts(("MAX", "MIN")) 5810 max = self._prev.text.upper() != "MIN" 5811 return self.expression( 5812 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5813 ) 5814 5815 return this 5816 5817 def _parse_window( 5818 self, this: t.Optional[exp.Expression], alias: bool = False 5819 ) -> t.Optional[exp.Expression]: 5820 func = this 5821 comments = func.comments if isinstance(func, exp.Expression) else None 5822 5823 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5824 self._match(TokenType.WHERE) 5825 this = self.expression( 5826 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5827 ) 5828 self._match_r_paren() 5829 5830 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5831 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5832 if self._match_text_seq("WITHIN", "GROUP"): 5833 order = self._parse_wrapped(self._parse_order) 5834 this = self.expression(exp.WithinGroup, this=this, expression=order) 5835 5836 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5837 # Some dialects choose to implement and some do not. 5838 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5839 5840 # There is some code above in _parse_lambda that handles 5841 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5842 5843 # The below changes handle 5844 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5845 5846 # Oracle allows both formats 5847 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5848 # and Snowflake chose to do the same for familiarity 5849 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5850 if isinstance(this, exp.AggFunc): 5851 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5852 5853 if ignore_respect and ignore_respect is not this: 5854 ignore_respect.replace(ignore_respect.this) 5855 this = self.expression(ignore_respect.__class__, this=this) 5856 5857 this = self._parse_respect_or_ignore_nulls(this) 5858 5859 # bigquery select from window x AS (partition by ...) 5860 if alias: 5861 over = None 5862 self._match(TokenType.ALIAS) 5863 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5864 return this 5865 else: 5866 over = self._prev.text.upper() 5867 5868 if comments and isinstance(func, exp.Expression): 5869 func.pop_comments() 5870 5871 if not self._match(TokenType.L_PAREN): 5872 return self.expression( 5873 exp.Window, 5874 comments=comments, 5875 this=this, 5876 alias=self._parse_id_var(False), 5877 over=over, 5878 ) 5879 5880 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5881 5882 first = self._match(TokenType.FIRST) 5883 if self._match_text_seq("LAST"): 5884 first = False 5885 5886 partition, order = self._parse_partition_and_order() 5887 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5888 5889 if kind: 5890 self._match(TokenType.BETWEEN) 5891 start = self._parse_window_spec() 5892 self._match(TokenType.AND) 5893 end = self._parse_window_spec() 5894 5895 spec = self.expression( 5896 exp.WindowSpec, 5897 kind=kind, 5898 start=start["value"], 5899 start_side=start["side"], 5900 end=end["value"], 5901 end_side=end["side"], 5902 ) 5903 else: 5904 spec = None 5905 5906 self._match_r_paren() 5907 5908 window = self.expression( 5909 exp.Window, 5910 comments=comments, 5911 this=this, 5912 partition_by=partition, 5913 order=order, 5914 spec=spec, 5915 alias=window_alias, 5916 over=over, 5917 first=first, 5918 ) 5919 5920 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5921 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5922 return self._parse_window(window, alias=alias) 5923 5924 return window 5925 5926 def _parse_partition_and_order( 5927 self, 5928 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5929 return self._parse_partition_by(), self._parse_order() 5930 5931 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5932 self._match(TokenType.BETWEEN) 5933 5934 return { 5935 "value": ( 5936 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5937 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5938 or self._parse_bitwise() 5939 ), 5940 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5941 } 5942 5943 def _parse_alias( 5944 self, this: t.Optional[exp.Expression], explicit: bool = False 5945 ) -> t.Optional[exp.Expression]: 5946 any_token = self._match(TokenType.ALIAS) 5947 comments = self._prev_comments or [] 5948 5949 if explicit and not any_token: 5950 return this 5951 5952 if self._match(TokenType.L_PAREN): 5953 aliases = self.expression( 5954 exp.Aliases, 5955 comments=comments, 5956 this=this, 5957 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5958 ) 5959 self._match_r_paren(aliases) 5960 return aliases 5961 5962 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5963 self.STRING_ALIASES and self._parse_string_as_identifier() 5964 ) 5965 5966 if alias: 5967 comments.extend(alias.pop_comments()) 5968 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5969 column = this.this 5970 5971 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5972 if not this.comments and column and column.comments: 5973 this.comments = column.pop_comments() 5974 5975 return this 5976 5977 def _parse_id_var( 5978 self, 5979 any_token: bool = True, 5980 tokens: t.Optional[t.Collection[TokenType]] = None, 5981 ) -> t.Optional[exp.Expression]: 5982 expression = self._parse_identifier() 5983 if not expression and ( 5984 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5985 ): 5986 quoted = self._prev.token_type == TokenType.STRING 5987 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5988 5989 return expression 5990 5991 def _parse_string(self) -> t.Optional[exp.Expression]: 5992 if self._match_set(self.STRING_PARSERS): 5993 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5994 return self._parse_placeholder() 5995 5996 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5997 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5998 5999 def _parse_number(self) -> t.Optional[exp.Expression]: 6000 if self._match_set(self.NUMERIC_PARSERS): 6001 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6002 return self._parse_placeholder() 6003 6004 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6005 if self._match(TokenType.IDENTIFIER): 6006 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6007 return self._parse_placeholder() 6008 6009 def _parse_var( 6010 self, 6011 any_token: bool = False, 6012 tokens: t.Optional[t.Collection[TokenType]] = None, 6013 upper: bool = False, 6014 ) -> t.Optional[exp.Expression]: 6015 if ( 6016 (any_token and self._advance_any()) 6017 or self._match(TokenType.VAR) 6018 or (self._match_set(tokens) if tokens else False) 6019 ): 6020 return self.expression( 6021 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6022 ) 6023 return self._parse_placeholder() 6024 6025 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6026 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6027 self._advance() 6028 return self._prev 6029 return None 6030 6031 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6032 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6033 6034 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6035 return self._parse_primary() or self._parse_var(any_token=True) 6036 6037 def _parse_null(self) -> t.Optional[exp.Expression]: 6038 if self._match_set(self.NULL_TOKENS): 6039 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6040 return self._parse_placeholder() 6041 6042 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6043 if self._match(TokenType.TRUE): 6044 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6045 if self._match(TokenType.FALSE): 6046 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6047 return self._parse_placeholder() 6048 6049 def _parse_star(self) -> t.Optional[exp.Expression]: 6050 if self._match(TokenType.STAR): 6051 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6052 return self._parse_placeholder() 6053 6054 def _parse_parameter(self) -> exp.Parameter: 6055 this = self._parse_identifier() or self._parse_primary_or_var() 6056 return self.expression(exp.Parameter, this=this) 6057 6058 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6059 if self._match_set(self.PLACEHOLDER_PARSERS): 6060 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6061 if placeholder: 6062 return placeholder 6063 self._advance(-1) 6064 return None 6065 6066 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6067 if not self._match_texts(keywords): 6068 return None 6069 if self._match(TokenType.L_PAREN, advance=False): 6070 return self._parse_wrapped_csv(self._parse_expression) 6071 6072 expression = self._parse_expression() 6073 return [expression] if expression else None 6074 6075 def _parse_csv( 6076 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6077 ) -> t.List[exp.Expression]: 6078 parse_result = parse_method() 6079 items = [parse_result] if parse_result is not None else [] 6080 6081 while self._match(sep): 6082 self._add_comments(parse_result) 6083 parse_result = parse_method() 6084 if parse_result is not None: 6085 items.append(parse_result) 6086 6087 return items 6088 6089 def _parse_tokens( 6090 self, parse_method: t.Callable, expressions: t.Dict 6091 ) -> t.Optional[exp.Expression]: 6092 this = parse_method() 6093 6094 while self._match_set(expressions): 6095 this = self.expression( 6096 expressions[self._prev.token_type], 6097 this=this, 6098 comments=self._prev_comments, 6099 expression=parse_method(), 6100 ) 6101 6102 return this 6103 6104 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6105 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6106 6107 def _parse_wrapped_csv( 6108 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6109 ) -> t.List[exp.Expression]: 6110 return self._parse_wrapped( 6111 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6112 ) 6113 6114 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6115 wrapped = self._match(TokenType.L_PAREN) 6116 if not wrapped and not optional: 6117 self.raise_error("Expecting (") 6118 parse_result = parse_method() 6119 if wrapped: 6120 self._match_r_paren() 6121 return parse_result 6122 6123 def _parse_expressions(self) -> t.List[exp.Expression]: 6124 return self._parse_csv(self._parse_expression) 6125 6126 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6127 return self._parse_select() or self._parse_set_operations( 6128 self._parse_expression() if alias else self._parse_assignment() 6129 ) 6130 6131 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6132 return self._parse_query_modifiers( 6133 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6134 ) 6135 6136 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6137 this = None 6138 if self._match_texts(self.TRANSACTION_KIND): 6139 this = self._prev.text 6140 6141 self._match_texts(("TRANSACTION", "WORK")) 6142 6143 modes = [] 6144 while True: 6145 mode = [] 6146 while self._match(TokenType.VAR): 6147 mode.append(self._prev.text) 6148 6149 if mode: 6150 modes.append(" ".join(mode)) 6151 if not self._match(TokenType.COMMA): 6152 break 6153 6154 return self.expression(exp.Transaction, this=this, modes=modes) 6155 6156 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6157 chain = None 6158 savepoint = None 6159 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6160 6161 self._match_texts(("TRANSACTION", "WORK")) 6162 6163 if self._match_text_seq("TO"): 6164 self._match_text_seq("SAVEPOINT") 6165 savepoint = self._parse_id_var() 6166 6167 if self._match(TokenType.AND): 6168 chain = not self._match_text_seq("NO") 6169 self._match_text_seq("CHAIN") 6170 6171 if is_rollback: 6172 return self.expression(exp.Rollback, savepoint=savepoint) 6173 6174 return self.expression(exp.Commit, chain=chain) 6175 6176 def _parse_refresh(self) -> exp.Refresh: 6177 self._match(TokenType.TABLE) 6178 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6179 6180 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6181 if not self._match_text_seq("ADD"): 6182 return None 6183 6184 self._match(TokenType.COLUMN) 6185 exists_column = self._parse_exists(not_=True) 6186 expression = self._parse_field_def() 6187 6188 if expression: 6189 expression.set("exists", exists_column) 6190 6191 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6192 if self._match_texts(("FIRST", "AFTER")): 6193 position = self._prev.text 6194 column_position = self.expression( 6195 exp.ColumnPosition, this=self._parse_column(), position=position 6196 ) 6197 expression.set("position", column_position) 6198 6199 return expression 6200 6201 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6202 drop = self._match(TokenType.DROP) and self._parse_drop() 6203 if drop and not isinstance(drop, exp.Command): 6204 drop.set("kind", drop.args.get("kind", "COLUMN")) 6205 return drop 6206 6207 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6208 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6209 return self.expression( 6210 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6211 ) 6212 6213 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6214 index = self._index - 1 6215 6216 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6217 return self._parse_csv( 6218 lambda: self.expression( 6219 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6220 ) 6221 ) 6222 6223 self._retreat(index) 6224 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6225 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6226 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6227 6228 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6229 if self._match_texts(self.ALTER_ALTER_PARSERS): 6230 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6231 6232 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6233 # keyword after ALTER we default to parsing this statement 6234 self._match(TokenType.COLUMN) 6235 column = self._parse_field(any_token=True) 6236 6237 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6238 return self.expression(exp.AlterColumn, this=column, drop=True) 6239 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6240 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6241 if self._match(TokenType.COMMENT): 6242 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6243 if self._match_text_seq("DROP", "NOT", "NULL"): 6244 return self.expression( 6245 exp.AlterColumn, 6246 this=column, 6247 drop=True, 6248 allow_null=True, 6249 ) 6250 if self._match_text_seq("SET", "NOT", "NULL"): 6251 return self.expression( 6252 exp.AlterColumn, 6253 this=column, 6254 allow_null=False, 6255 ) 6256 self._match_text_seq("SET", "DATA") 6257 self._match_text_seq("TYPE") 6258 return self.expression( 6259 exp.AlterColumn, 6260 this=column, 6261 dtype=self._parse_types(), 6262 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6263 using=self._match(TokenType.USING) and self._parse_assignment(), 6264 ) 6265 6266 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6267 if self._match_texts(("ALL", "EVEN", "AUTO")): 6268 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6269 6270 self._match_text_seq("KEY", "DISTKEY") 6271 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6272 6273 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6274 if compound: 6275 self._match_text_seq("SORTKEY") 6276 6277 if self._match(TokenType.L_PAREN, advance=False): 6278 return self.expression( 6279 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6280 ) 6281 6282 self._match_texts(("AUTO", "NONE")) 6283 return self.expression( 6284 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6285 ) 6286 6287 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6288 index = self._index - 1 6289 6290 partition_exists = self._parse_exists() 6291 if self._match(TokenType.PARTITION, advance=False): 6292 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6293 6294 self._retreat(index) 6295 return self._parse_csv(self._parse_drop_column) 6296 6297 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6298 if self._match(TokenType.COLUMN): 6299 exists = self._parse_exists() 6300 old_column = self._parse_column() 6301 to = self._match_text_seq("TO") 6302 new_column = self._parse_column() 6303 6304 if old_column is None or to is None or new_column is None: 6305 return None 6306 6307 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6308 6309 self._match_text_seq("TO") 6310 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6311 6312 def _parse_alter_table_set(self) -> exp.AlterSet: 6313 alter_set = self.expression(exp.AlterSet) 6314 6315 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6316 "TABLE", "PROPERTIES" 6317 ): 6318 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6319 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6320 alter_set.set("expressions", [self._parse_assignment()]) 6321 elif self._match_texts(("LOGGED", "UNLOGGED")): 6322 alter_set.set("option", exp.var(self._prev.text.upper())) 6323 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6324 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6325 elif self._match_text_seq("LOCATION"): 6326 alter_set.set("location", self._parse_field()) 6327 elif self._match_text_seq("ACCESS", "METHOD"): 6328 alter_set.set("access_method", self._parse_field()) 6329 elif self._match_text_seq("TABLESPACE"): 6330 alter_set.set("tablespace", self._parse_field()) 6331 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6332 alter_set.set("file_format", [self._parse_field()]) 6333 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6334 alter_set.set("file_format", self._parse_wrapped_options()) 6335 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6336 alter_set.set("copy_options", self._parse_wrapped_options()) 6337 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6338 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6339 else: 6340 if self._match_text_seq("SERDE"): 6341 alter_set.set("serde", self._parse_field()) 6342 6343 alter_set.set("expressions", [self._parse_properties()]) 6344 6345 return alter_set 6346 6347 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6348 start = self._prev 6349 6350 if not self._match(TokenType.TABLE): 6351 return self._parse_as_command(start) 6352 6353 exists = self._parse_exists() 6354 only = self._match_text_seq("ONLY") 6355 this = self._parse_table(schema=True) 6356 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6357 6358 if self._next: 6359 self._advance() 6360 6361 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6362 if parser: 6363 actions = ensure_list(parser(self)) 6364 options = self._parse_csv(self._parse_property) 6365 6366 if not self._curr and actions: 6367 return self.expression( 6368 exp.AlterTable, 6369 this=this, 6370 exists=exists, 6371 actions=actions, 6372 only=only, 6373 options=options, 6374 cluster=cluster, 6375 ) 6376 6377 return self._parse_as_command(start) 6378 6379 def _parse_merge(self) -> exp.Merge: 6380 self._match(TokenType.INTO) 6381 target = self._parse_table() 6382 6383 if target and self._match(TokenType.ALIAS, advance=False): 6384 target.set("alias", self._parse_table_alias()) 6385 6386 self._match(TokenType.USING) 6387 using = self._parse_table() 6388 6389 self._match(TokenType.ON) 6390 on = self._parse_assignment() 6391 6392 return self.expression( 6393 exp.Merge, 6394 this=target, 6395 using=using, 6396 on=on, 6397 expressions=self._parse_when_matched(), 6398 ) 6399 6400 def _parse_when_matched(self) -> t.List[exp.When]: 6401 whens = [] 6402 6403 while self._match(TokenType.WHEN): 6404 matched = not self._match(TokenType.NOT) 6405 self._match_text_seq("MATCHED") 6406 source = ( 6407 False 6408 if self._match_text_seq("BY", "TARGET") 6409 else self._match_text_seq("BY", "SOURCE") 6410 ) 6411 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6412 6413 self._match(TokenType.THEN) 6414 6415 if self._match(TokenType.INSERT): 6416 _this = self._parse_star() 6417 if _this: 6418 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6419 else: 6420 then = self.expression( 6421 exp.Insert, 6422 this=self._parse_value(), 6423 expression=self._match_text_seq("VALUES") and self._parse_value(), 6424 ) 6425 elif self._match(TokenType.UPDATE): 6426 expressions = self._parse_star() 6427 if expressions: 6428 then = self.expression(exp.Update, expressions=expressions) 6429 else: 6430 then = self.expression( 6431 exp.Update, 6432 expressions=self._match(TokenType.SET) 6433 and self._parse_csv(self._parse_equality), 6434 ) 6435 elif self._match(TokenType.DELETE): 6436 then = self.expression(exp.Var, this=self._prev.text) 6437 else: 6438 then = None 6439 6440 whens.append( 6441 self.expression( 6442 exp.When, 6443 matched=matched, 6444 source=source, 6445 condition=condition, 6446 then=then, 6447 ) 6448 ) 6449 return whens 6450 6451 def _parse_show(self) -> t.Optional[exp.Expression]: 6452 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6453 if parser: 6454 return parser(self) 6455 return self._parse_as_command(self._prev) 6456 6457 def _parse_set_item_assignment( 6458 self, kind: t.Optional[str] = None 6459 ) -> t.Optional[exp.Expression]: 6460 index = self._index 6461 6462 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6463 return self._parse_set_transaction(global_=kind == "GLOBAL") 6464 6465 left = self._parse_primary() or self._parse_column() 6466 assignment_delimiter = self._match_texts(("=", "TO")) 6467 6468 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6469 self._retreat(index) 6470 return None 6471 6472 right = self._parse_statement() or self._parse_id_var() 6473 if isinstance(right, (exp.Column, exp.Identifier)): 6474 right = exp.var(right.name) 6475 6476 this = self.expression(exp.EQ, this=left, expression=right) 6477 return self.expression(exp.SetItem, this=this, kind=kind) 6478 6479 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6480 self._match_text_seq("TRANSACTION") 6481 characteristics = self._parse_csv( 6482 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6483 ) 6484 return self.expression( 6485 exp.SetItem, 6486 expressions=characteristics, 6487 kind="TRANSACTION", 6488 **{"global": global_}, # type: ignore 6489 ) 6490 6491 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6492 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6493 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6494 6495 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6496 index = self._index 6497 set_ = self.expression( 6498 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6499 ) 6500 6501 if self._curr: 6502 self._retreat(index) 6503 return self._parse_as_command(self._prev) 6504 6505 return set_ 6506 6507 def _parse_var_from_options( 6508 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6509 ) -> t.Optional[exp.Var]: 6510 start = self._curr 6511 if not start: 6512 return None 6513 6514 option = start.text.upper() 6515 continuations = options.get(option) 6516 6517 index = self._index 6518 self._advance() 6519 for keywords in continuations or []: 6520 if isinstance(keywords, str): 6521 keywords = (keywords,) 6522 6523 if self._match_text_seq(*keywords): 6524 option = f"{option} {' '.join(keywords)}" 6525 break 6526 else: 6527 if continuations or continuations is None: 6528 if raise_unmatched: 6529 self.raise_error(f"Unknown option {option}") 6530 6531 self._retreat(index) 6532 return None 6533 6534 return exp.var(option) 6535 6536 def _parse_as_command(self, start: Token) -> exp.Command: 6537 while self._curr: 6538 self._advance() 6539 text = self._find_sql(start, self._prev) 6540 size = len(start.text) 6541 self._warn_unsupported() 6542 return exp.Command(this=text[:size], expression=text[size:]) 6543 6544 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6545 settings = [] 6546 6547 self._match_l_paren() 6548 kind = self._parse_id_var() 6549 6550 if self._match(TokenType.L_PAREN): 6551 while True: 6552 key = self._parse_id_var() 6553 value = self._parse_primary() 6554 6555 if not key and value is None: 6556 break 6557 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6558 self._match(TokenType.R_PAREN) 6559 6560 self._match_r_paren() 6561 6562 return self.expression( 6563 exp.DictProperty, 6564 this=this, 6565 kind=kind.this if kind else None, 6566 settings=settings, 6567 ) 6568 6569 def _parse_dict_range(self, this: str) -> exp.DictRange: 6570 self._match_l_paren() 6571 has_min = self._match_text_seq("MIN") 6572 if has_min: 6573 min = self._parse_var() or self._parse_primary() 6574 self._match_text_seq("MAX") 6575 max = self._parse_var() or self._parse_primary() 6576 else: 6577 max = self._parse_var() or self._parse_primary() 6578 min = exp.Literal.number(0) 6579 self._match_r_paren() 6580 return self.expression(exp.DictRange, this=this, min=min, max=max) 6581 6582 def _parse_comprehension( 6583 self, this: t.Optional[exp.Expression] 6584 ) -> t.Optional[exp.Comprehension]: 6585 index = self._index 6586 expression = self._parse_column() 6587 if not self._match(TokenType.IN): 6588 self._retreat(index - 1) 6589 return None 6590 iterator = self._parse_column() 6591 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6592 return self.expression( 6593 exp.Comprehension, 6594 this=this, 6595 expression=expression, 6596 iterator=iterator, 6597 condition=condition, 6598 ) 6599 6600 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6601 if self._match(TokenType.HEREDOC_STRING): 6602 return self.expression(exp.Heredoc, this=self._prev.text) 6603 6604 if not self._match_text_seq("$"): 6605 return None 6606 6607 tags = ["$"] 6608 tag_text = None 6609 6610 if self._is_connected(): 6611 self._advance() 6612 tags.append(self._prev.text.upper()) 6613 else: 6614 self.raise_error("No closing $ found") 6615 6616 if tags[-1] != "$": 6617 if self._is_connected() and self._match_text_seq("$"): 6618 tag_text = tags[-1] 6619 tags.append("$") 6620 else: 6621 self.raise_error("No closing $ found") 6622 6623 heredoc_start = self._curr 6624 6625 while self._curr: 6626 if self._match_text_seq(*tags, advance=False): 6627 this = self._find_sql(heredoc_start, self._prev) 6628 self._advance(len(tags)) 6629 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6630 6631 self._advance() 6632 6633 self.raise_error(f"No closing {''.join(tags)} found") 6634 return None 6635 6636 def _find_parser( 6637 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6638 ) -> t.Optional[t.Callable]: 6639 if not self._curr: 6640 return None 6641 6642 index = self._index 6643 this = [] 6644 while True: 6645 # The current token might be multiple words 6646 curr = self._curr.text.upper() 6647 key = curr.split(" ") 6648 this.append(curr) 6649 6650 self._advance() 6651 result, trie = in_trie(trie, key) 6652 if result == TrieResult.FAILED: 6653 break 6654 6655 if result == TrieResult.EXISTS: 6656 subparser = parsers[" ".join(this)] 6657 return subparser 6658 6659 self._retreat(index) 6660 return None 6661 6662 def _match(self, token_type, advance=True, expression=None): 6663 if not self._curr: 6664 return None 6665 6666 if self._curr.token_type == token_type: 6667 if advance: 6668 self._advance() 6669 self._add_comments(expression) 6670 return True 6671 6672 return None 6673 6674 def _match_set(self, types, advance=True): 6675 if not self._curr: 6676 return None 6677 6678 if self._curr.token_type in types: 6679 if advance: 6680 self._advance() 6681 return True 6682 6683 return None 6684 6685 def _match_pair(self, token_type_a, token_type_b, advance=True): 6686 if not self._curr or not self._next: 6687 return None 6688 6689 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6690 if advance: 6691 self._advance(2) 6692 return True 6693 6694 return None 6695 6696 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6697 if not self._match(TokenType.L_PAREN, expression=expression): 6698 self.raise_error("Expecting (") 6699 6700 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6701 if not self._match(TokenType.R_PAREN, expression=expression): 6702 self.raise_error("Expecting )") 6703 6704 def _match_texts(self, texts, advance=True): 6705 if self._curr and self._curr.text.upper() in texts: 6706 if advance: 6707 self._advance() 6708 return True 6709 return None 6710 6711 def _match_text_seq(self, *texts, advance=True): 6712 index = self._index 6713 for text in texts: 6714 if self._curr and self._curr.text.upper() == text: 6715 self._advance() 6716 else: 6717 self._retreat(index) 6718 return None 6719 6720 if not advance: 6721 self._retreat(index) 6722 6723 return True 6724 6725 def _replace_lambda( 6726 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6727 ) -> t.Optional[exp.Expression]: 6728 if not node: 6729 return node 6730 6731 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6732 6733 for column in node.find_all(exp.Column): 6734 typ = lambda_types.get(column.parts[0].name) 6735 if typ is not None: 6736 dot_or_id = column.to_dot() if column.table else column.this 6737 6738 if typ: 6739 dot_or_id = self.expression( 6740 exp.Cast, 6741 this=dot_or_id, 6742 to=typ, 6743 ) 6744 6745 parent = column.parent 6746 6747 while isinstance(parent, exp.Dot): 6748 if not isinstance(parent.parent, exp.Dot): 6749 parent.replace(dot_or_id) 6750 break 6751 parent = parent.parent 6752 else: 6753 if column is node: 6754 node = dot_or_id 6755 else: 6756 column.replace(dot_or_id) 6757 return node 6758 6759 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6760 start = self._prev 6761 6762 # Not to be confused with TRUNCATE(number, decimals) function call 6763 if self._match(TokenType.L_PAREN): 6764 self._retreat(self._index - 2) 6765 return self._parse_function() 6766 6767 # Clickhouse supports TRUNCATE DATABASE as well 6768 is_database = self._match(TokenType.DATABASE) 6769 6770 self._match(TokenType.TABLE) 6771 6772 exists = self._parse_exists(not_=False) 6773 6774 expressions = self._parse_csv( 6775 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6776 ) 6777 6778 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6779 6780 if self._match_text_seq("RESTART", "IDENTITY"): 6781 identity = "RESTART" 6782 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6783 identity = "CONTINUE" 6784 else: 6785 identity = None 6786 6787 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6788 option = self._prev.text 6789 else: 6790 option = None 6791 6792 partition = self._parse_partition() 6793 6794 # Fallback case 6795 if self._curr: 6796 return self._parse_as_command(start) 6797 6798 return self.expression( 6799 exp.TruncateTable, 6800 expressions=expressions, 6801 is_database=is_database, 6802 exists=exists, 6803 cluster=cluster, 6804 identity=identity, 6805 option=option, 6806 partition=partition, 6807 ) 6808 6809 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6810 this = self._parse_ordered(self._parse_opclass) 6811 6812 if not self._match(TokenType.WITH): 6813 return this 6814 6815 op = self._parse_var(any_token=True) 6816 6817 return self.expression(exp.WithOperator, this=this, op=op) 6818 6819 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6820 self._match(TokenType.EQ) 6821 self._match(TokenType.L_PAREN) 6822 6823 opts: t.List[t.Optional[exp.Expression]] = [] 6824 while self._curr and not self._match(TokenType.R_PAREN): 6825 if self._match_text_seq("FORMAT_NAME", "="): 6826 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6827 # so we parse it separately to use _parse_field() 6828 prop = self.expression( 6829 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6830 ) 6831 opts.append(prop) 6832 else: 6833 opts.append(self._parse_property()) 6834 6835 self._match(TokenType.COMMA) 6836 6837 return opts 6838 6839 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6840 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6841 6842 options = [] 6843 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6844 option = self._parse_var(any_token=True) 6845 prev = self._prev.text.upper() 6846 6847 # Different dialects might separate options and values by white space, "=" and "AS" 6848 self._match(TokenType.EQ) 6849 self._match(TokenType.ALIAS) 6850 6851 param = self.expression(exp.CopyParameter, this=option) 6852 6853 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6854 TokenType.L_PAREN, advance=False 6855 ): 6856 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6857 param.set("expressions", self._parse_wrapped_options()) 6858 elif prev == "FILE_FORMAT": 6859 # T-SQL's external file format case 6860 param.set("expression", self._parse_field()) 6861 else: 6862 param.set("expression", self._parse_unquoted_field()) 6863 6864 options.append(param) 6865 self._match(sep) 6866 6867 return options 6868 6869 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6870 expr = self.expression(exp.Credentials) 6871 6872 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6873 expr.set("storage", self._parse_field()) 6874 if self._match_text_seq("CREDENTIALS"): 6875 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6876 creds = ( 6877 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6878 ) 6879 expr.set("credentials", creds) 6880 if self._match_text_seq("ENCRYPTION"): 6881 expr.set("encryption", self._parse_wrapped_options()) 6882 if self._match_text_seq("IAM_ROLE"): 6883 expr.set("iam_role", self._parse_field()) 6884 if self._match_text_seq("REGION"): 6885 expr.set("region", self._parse_field()) 6886 6887 return expr 6888 6889 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6890 return self._parse_field() 6891 6892 def _parse_copy(self) -> exp.Copy | exp.Command: 6893 start = self._prev 6894 6895 self._match(TokenType.INTO) 6896 6897 this = ( 6898 self._parse_select(nested=True, parse_subquery_alias=False) 6899 if self._match(TokenType.L_PAREN, advance=False) 6900 else self._parse_table(schema=True) 6901 ) 6902 6903 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6904 6905 files = self._parse_csv(self._parse_file_location) 6906 credentials = self._parse_credentials() 6907 6908 self._match_text_seq("WITH") 6909 6910 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6911 6912 # Fallback case 6913 if self._curr: 6914 return self._parse_as_command(start) 6915 6916 return self.expression( 6917 exp.Copy, 6918 this=this, 6919 kind=kind, 6920 credentials=credentials, 6921 files=files, 6922 params=params, 6923 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
131class Parser(metaclass=_Parser): 132 """ 133 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 134 135 Args: 136 error_level: The desired error level. 137 Default: ErrorLevel.IMMEDIATE 138 error_message_context: The amount of context to capture from a query string when displaying 139 the error message (in number of characters). 140 Default: 100 141 max_errors: Maximum number of error messages to include in a raised ParseError. 142 This is only relevant if error_level is ErrorLevel.RAISE. 143 Default: 3 144 """ 145 146 FUNCTIONS: t.Dict[str, t.Callable] = { 147 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 148 "CONCAT": lambda args, dialect: exp.Concat( 149 expressions=args, 150 safe=not dialect.STRICT_STRING_CONCAT, 151 coalesce=dialect.CONCAT_COALESCE, 152 ), 153 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 154 expressions=args, 155 safe=not dialect.STRICT_STRING_CONCAT, 156 coalesce=dialect.CONCAT_COALESCE, 157 ), 158 "DATE_TO_DATE_STR": lambda args: exp.Cast( 159 this=seq_get(args, 0), 160 to=exp.DataType(this=exp.DataType.Type.TEXT), 161 ), 162 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 163 "HEX": build_hex, 164 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 165 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 166 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 167 "LIKE": build_like, 168 "LOG": build_logarithm, 169 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 170 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 171 "LOWER": build_lower, 172 "LPAD": lambda args: build_pad(args), 173 "LEFTPAD": lambda args: build_pad(args), 174 "MOD": build_mod, 175 "RPAD": lambda args: build_pad(args, is_left=False), 176 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 177 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 178 if len(args) != 2 179 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 180 "TIME_TO_TIME_STR": lambda args: exp.Cast( 181 this=seq_get(args, 0), 182 to=exp.DataType(this=exp.DataType.Type.TEXT), 183 ), 184 "TO_HEX": build_hex, 185 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 186 this=exp.Cast( 187 this=seq_get(args, 0), 188 to=exp.DataType(this=exp.DataType.Type.TEXT), 189 ), 190 start=exp.Literal.number(1), 191 length=exp.Literal.number(10), 192 ), 193 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 194 "UPPER": build_upper, 195 "VAR_MAP": build_var_map, 196 } 197 198 NO_PAREN_FUNCTIONS = { 199 TokenType.CURRENT_DATE: exp.CurrentDate, 200 TokenType.CURRENT_DATETIME: exp.CurrentDate, 201 TokenType.CURRENT_TIME: exp.CurrentTime, 202 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 203 TokenType.CURRENT_USER: exp.CurrentUser, 204 } 205 206 STRUCT_TYPE_TOKENS = { 207 TokenType.NESTED, 208 TokenType.OBJECT, 209 TokenType.STRUCT, 210 } 211 212 NESTED_TYPE_TOKENS = { 213 TokenType.ARRAY, 214 TokenType.LIST, 215 TokenType.LOWCARDINALITY, 216 TokenType.MAP, 217 TokenType.NULLABLE, 218 *STRUCT_TYPE_TOKENS, 219 } 220 221 ENUM_TYPE_TOKENS = { 222 TokenType.ENUM, 223 TokenType.ENUM8, 224 TokenType.ENUM16, 225 } 226 227 AGGREGATE_TYPE_TOKENS = { 228 TokenType.AGGREGATEFUNCTION, 229 TokenType.SIMPLEAGGREGATEFUNCTION, 230 } 231 232 TYPE_TOKENS = { 233 TokenType.BIT, 234 TokenType.BOOLEAN, 235 TokenType.TINYINT, 236 TokenType.UTINYINT, 237 TokenType.SMALLINT, 238 TokenType.USMALLINT, 239 TokenType.INT, 240 TokenType.UINT, 241 TokenType.BIGINT, 242 TokenType.UBIGINT, 243 TokenType.INT128, 244 TokenType.UINT128, 245 TokenType.INT256, 246 TokenType.UINT256, 247 TokenType.MEDIUMINT, 248 TokenType.UMEDIUMINT, 249 TokenType.FIXEDSTRING, 250 TokenType.FLOAT, 251 TokenType.DOUBLE, 252 TokenType.CHAR, 253 TokenType.NCHAR, 254 TokenType.VARCHAR, 255 TokenType.NVARCHAR, 256 TokenType.BPCHAR, 257 TokenType.TEXT, 258 TokenType.MEDIUMTEXT, 259 TokenType.LONGTEXT, 260 TokenType.MEDIUMBLOB, 261 TokenType.LONGBLOB, 262 TokenType.BINARY, 263 TokenType.VARBINARY, 264 TokenType.JSON, 265 TokenType.JSONB, 266 TokenType.INTERVAL, 267 TokenType.TINYBLOB, 268 TokenType.TINYTEXT, 269 TokenType.TIME, 270 TokenType.TIMETZ, 271 TokenType.TIMESTAMP, 272 TokenType.TIMESTAMP_S, 273 TokenType.TIMESTAMP_MS, 274 TokenType.TIMESTAMP_NS, 275 TokenType.TIMESTAMPTZ, 276 TokenType.TIMESTAMPLTZ, 277 TokenType.TIMESTAMPNTZ, 278 TokenType.DATETIME, 279 TokenType.DATETIME64, 280 TokenType.DATE, 281 TokenType.DATE32, 282 TokenType.INT4RANGE, 283 TokenType.INT4MULTIRANGE, 284 TokenType.INT8RANGE, 285 TokenType.INT8MULTIRANGE, 286 TokenType.NUMRANGE, 287 TokenType.NUMMULTIRANGE, 288 TokenType.TSRANGE, 289 TokenType.TSMULTIRANGE, 290 TokenType.TSTZRANGE, 291 TokenType.TSTZMULTIRANGE, 292 TokenType.DATERANGE, 293 TokenType.DATEMULTIRANGE, 294 TokenType.DECIMAL, 295 TokenType.UDECIMAL, 296 TokenType.BIGDECIMAL, 297 TokenType.UUID, 298 TokenType.GEOGRAPHY, 299 TokenType.GEOMETRY, 300 TokenType.HLLSKETCH, 301 TokenType.HSTORE, 302 TokenType.PSEUDO_TYPE, 303 TokenType.SUPER, 304 TokenType.SERIAL, 305 TokenType.SMALLSERIAL, 306 TokenType.BIGSERIAL, 307 TokenType.XML, 308 TokenType.YEAR, 309 TokenType.UNIQUEIDENTIFIER, 310 TokenType.USERDEFINED, 311 TokenType.MONEY, 312 TokenType.SMALLMONEY, 313 TokenType.ROWVERSION, 314 TokenType.IMAGE, 315 TokenType.VARIANT, 316 TokenType.VECTOR, 317 TokenType.OBJECT, 318 TokenType.OBJECT_IDENTIFIER, 319 TokenType.INET, 320 TokenType.IPADDRESS, 321 TokenType.IPPREFIX, 322 TokenType.IPV4, 323 TokenType.IPV6, 324 TokenType.UNKNOWN, 325 TokenType.NULL, 326 TokenType.NAME, 327 TokenType.TDIGEST, 328 *ENUM_TYPE_TOKENS, 329 *NESTED_TYPE_TOKENS, 330 *AGGREGATE_TYPE_TOKENS, 331 } 332 333 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 334 TokenType.BIGINT: TokenType.UBIGINT, 335 TokenType.INT: TokenType.UINT, 336 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 337 TokenType.SMALLINT: TokenType.USMALLINT, 338 TokenType.TINYINT: TokenType.UTINYINT, 339 TokenType.DECIMAL: TokenType.UDECIMAL, 340 } 341 342 SUBQUERY_PREDICATES = { 343 TokenType.ANY: exp.Any, 344 TokenType.ALL: exp.All, 345 TokenType.EXISTS: exp.Exists, 346 TokenType.SOME: exp.Any, 347 } 348 349 RESERVED_TOKENS = { 350 *Tokenizer.SINGLE_TOKENS.values(), 351 TokenType.SELECT, 352 } - {TokenType.IDENTIFIER} 353 354 DB_CREATABLES = { 355 TokenType.DATABASE, 356 TokenType.DICTIONARY, 357 TokenType.MODEL, 358 TokenType.SCHEMA, 359 TokenType.SEQUENCE, 360 TokenType.STORAGE_INTEGRATION, 361 TokenType.TABLE, 362 TokenType.TAG, 363 TokenType.VIEW, 364 TokenType.WAREHOUSE, 365 TokenType.STREAMLIT, 366 } 367 368 CREATABLES = { 369 TokenType.COLUMN, 370 TokenType.CONSTRAINT, 371 TokenType.FOREIGN_KEY, 372 TokenType.FUNCTION, 373 TokenType.INDEX, 374 TokenType.PROCEDURE, 375 *DB_CREATABLES, 376 } 377 378 # Tokens that can represent identifiers 379 ID_VAR_TOKENS = { 380 TokenType.VAR, 381 TokenType.ANTI, 382 TokenType.APPLY, 383 TokenType.ASC, 384 TokenType.ASOF, 385 TokenType.AUTO_INCREMENT, 386 TokenType.BEGIN, 387 TokenType.BPCHAR, 388 TokenType.CACHE, 389 TokenType.CASE, 390 TokenType.COLLATE, 391 TokenType.COMMAND, 392 TokenType.COMMENT, 393 TokenType.COMMIT, 394 TokenType.CONSTRAINT, 395 TokenType.COPY, 396 TokenType.DEFAULT, 397 TokenType.DELETE, 398 TokenType.DESC, 399 TokenType.DESCRIBE, 400 TokenType.DICTIONARY, 401 TokenType.DIV, 402 TokenType.END, 403 TokenType.EXECUTE, 404 TokenType.ESCAPE, 405 TokenType.FALSE, 406 TokenType.FIRST, 407 TokenType.FILTER, 408 TokenType.FINAL, 409 TokenType.FORMAT, 410 TokenType.FULL, 411 TokenType.IDENTIFIER, 412 TokenType.IS, 413 TokenType.ISNULL, 414 TokenType.INTERVAL, 415 TokenType.KEEP, 416 TokenType.KILL, 417 TokenType.LEFT, 418 TokenType.LOAD, 419 TokenType.MERGE, 420 TokenType.NATURAL, 421 TokenType.NEXT, 422 TokenType.OFFSET, 423 TokenType.OPERATOR, 424 TokenType.ORDINALITY, 425 TokenType.OVERLAPS, 426 TokenType.OVERWRITE, 427 TokenType.PARTITION, 428 TokenType.PERCENT, 429 TokenType.PIVOT, 430 TokenType.PRAGMA, 431 TokenType.RANGE, 432 TokenType.RECURSIVE, 433 TokenType.REFERENCES, 434 TokenType.REFRESH, 435 TokenType.REPLACE, 436 TokenType.RIGHT, 437 TokenType.ROLLUP, 438 TokenType.ROW, 439 TokenType.ROWS, 440 TokenType.SEMI, 441 TokenType.SET, 442 TokenType.SETTINGS, 443 TokenType.SHOW, 444 TokenType.TEMPORARY, 445 TokenType.TOP, 446 TokenType.TRUE, 447 TokenType.TRUNCATE, 448 TokenType.UNIQUE, 449 TokenType.UNNEST, 450 TokenType.UNPIVOT, 451 TokenType.UPDATE, 452 TokenType.USE, 453 TokenType.VOLATILE, 454 TokenType.WINDOW, 455 *CREATABLES, 456 *SUBQUERY_PREDICATES, 457 *TYPE_TOKENS, 458 *NO_PAREN_FUNCTIONS, 459 } 460 461 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 462 463 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 464 TokenType.ANTI, 465 TokenType.APPLY, 466 TokenType.ASOF, 467 TokenType.FULL, 468 TokenType.LEFT, 469 TokenType.LOCK, 470 TokenType.NATURAL, 471 TokenType.OFFSET, 472 TokenType.RIGHT, 473 TokenType.SEMI, 474 TokenType.WINDOW, 475 } 476 477 ALIAS_TOKENS = ID_VAR_TOKENS 478 479 ARRAY_CONSTRUCTORS = { 480 "ARRAY": exp.Array, 481 "LIST": exp.List, 482 } 483 484 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 485 486 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 487 488 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 489 490 FUNC_TOKENS = { 491 TokenType.COLLATE, 492 TokenType.COMMAND, 493 TokenType.CURRENT_DATE, 494 TokenType.CURRENT_DATETIME, 495 TokenType.CURRENT_TIMESTAMP, 496 TokenType.CURRENT_TIME, 497 TokenType.CURRENT_USER, 498 TokenType.FILTER, 499 TokenType.FIRST, 500 TokenType.FORMAT, 501 TokenType.GLOB, 502 TokenType.IDENTIFIER, 503 TokenType.INDEX, 504 TokenType.ISNULL, 505 TokenType.ILIKE, 506 TokenType.INSERT, 507 TokenType.LIKE, 508 TokenType.MERGE, 509 TokenType.OFFSET, 510 TokenType.PRIMARY_KEY, 511 TokenType.RANGE, 512 TokenType.REPLACE, 513 TokenType.RLIKE, 514 TokenType.ROW, 515 TokenType.UNNEST, 516 TokenType.VAR, 517 TokenType.LEFT, 518 TokenType.RIGHT, 519 TokenType.SEQUENCE, 520 TokenType.DATE, 521 TokenType.DATETIME, 522 TokenType.TABLE, 523 TokenType.TIMESTAMP, 524 TokenType.TIMESTAMPTZ, 525 TokenType.TRUNCATE, 526 TokenType.WINDOW, 527 TokenType.XOR, 528 *TYPE_TOKENS, 529 *SUBQUERY_PREDICATES, 530 } 531 532 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 533 TokenType.AND: exp.And, 534 } 535 536 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 537 TokenType.COLON_EQ: exp.PropertyEQ, 538 } 539 540 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 541 TokenType.OR: exp.Or, 542 } 543 544 EQUALITY = { 545 TokenType.EQ: exp.EQ, 546 TokenType.NEQ: exp.NEQ, 547 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 548 } 549 550 COMPARISON = { 551 TokenType.GT: exp.GT, 552 TokenType.GTE: exp.GTE, 553 TokenType.LT: exp.LT, 554 TokenType.LTE: exp.LTE, 555 } 556 557 BITWISE = { 558 TokenType.AMP: exp.BitwiseAnd, 559 TokenType.CARET: exp.BitwiseXor, 560 TokenType.PIPE: exp.BitwiseOr, 561 } 562 563 TERM = { 564 TokenType.DASH: exp.Sub, 565 TokenType.PLUS: exp.Add, 566 TokenType.MOD: exp.Mod, 567 TokenType.COLLATE: exp.Collate, 568 } 569 570 FACTOR = { 571 TokenType.DIV: exp.IntDiv, 572 TokenType.LR_ARROW: exp.Distance, 573 TokenType.SLASH: exp.Div, 574 TokenType.STAR: exp.Mul, 575 } 576 577 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 578 579 TIMES = { 580 TokenType.TIME, 581 TokenType.TIMETZ, 582 } 583 584 TIMESTAMPS = { 585 TokenType.TIMESTAMP, 586 TokenType.TIMESTAMPTZ, 587 TokenType.TIMESTAMPLTZ, 588 *TIMES, 589 } 590 591 SET_OPERATIONS = { 592 TokenType.UNION, 593 TokenType.INTERSECT, 594 TokenType.EXCEPT, 595 } 596 597 JOIN_METHODS = { 598 TokenType.ASOF, 599 TokenType.NATURAL, 600 TokenType.POSITIONAL, 601 } 602 603 JOIN_SIDES = { 604 TokenType.LEFT, 605 TokenType.RIGHT, 606 TokenType.FULL, 607 } 608 609 JOIN_KINDS = { 610 TokenType.ANTI, 611 TokenType.CROSS, 612 TokenType.INNER, 613 TokenType.OUTER, 614 TokenType.SEMI, 615 TokenType.STRAIGHT_JOIN, 616 } 617 618 JOIN_HINTS: t.Set[str] = set() 619 620 LAMBDAS = { 621 TokenType.ARROW: lambda self, expressions: self.expression( 622 exp.Lambda, 623 this=self._replace_lambda( 624 self._parse_assignment(), 625 expressions, 626 ), 627 expressions=expressions, 628 ), 629 TokenType.FARROW: lambda self, expressions: self.expression( 630 exp.Kwarg, 631 this=exp.var(expressions[0].name), 632 expression=self._parse_assignment(), 633 ), 634 } 635 636 COLUMN_OPERATORS = { 637 TokenType.DOT: None, 638 TokenType.DCOLON: lambda self, this, to: self.expression( 639 exp.Cast if self.STRICT_CAST else exp.TryCast, 640 this=this, 641 to=to, 642 ), 643 TokenType.ARROW: lambda self, this, path: self.expression( 644 exp.JSONExtract, 645 this=this, 646 expression=self.dialect.to_json_path(path), 647 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 648 ), 649 TokenType.DARROW: lambda self, this, path: self.expression( 650 exp.JSONExtractScalar, 651 this=this, 652 expression=self.dialect.to_json_path(path), 653 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 654 ), 655 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 656 exp.JSONBExtract, 657 this=this, 658 expression=path, 659 ), 660 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 661 exp.JSONBExtractScalar, 662 this=this, 663 expression=path, 664 ), 665 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 666 exp.JSONBContains, 667 this=this, 668 expression=key, 669 ), 670 } 671 672 EXPRESSION_PARSERS = { 673 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 674 exp.Column: lambda self: self._parse_column(), 675 exp.Condition: lambda self: self._parse_assignment(), 676 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 677 exp.Expression: lambda self: self._parse_expression(), 678 exp.From: lambda self: self._parse_from(joins=True), 679 exp.Group: lambda self: self._parse_group(), 680 exp.Having: lambda self: self._parse_having(), 681 exp.Identifier: lambda self: self._parse_id_var(), 682 exp.Join: lambda self: self._parse_join(), 683 exp.Lambda: lambda self: self._parse_lambda(), 684 exp.Lateral: lambda self: self._parse_lateral(), 685 exp.Limit: lambda self: self._parse_limit(), 686 exp.Offset: lambda self: self._parse_offset(), 687 exp.Order: lambda self: self._parse_order(), 688 exp.Ordered: lambda self: self._parse_ordered(), 689 exp.Properties: lambda self: self._parse_properties(), 690 exp.Qualify: lambda self: self._parse_qualify(), 691 exp.Returning: lambda self: self._parse_returning(), 692 exp.Select: lambda self: self._parse_select(), 693 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 694 exp.Table: lambda self: self._parse_table_parts(), 695 exp.TableAlias: lambda self: self._parse_table_alias(), 696 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 697 exp.Where: lambda self: self._parse_where(), 698 exp.Window: lambda self: self._parse_named_window(), 699 exp.With: lambda self: self._parse_with(), 700 "JOIN_TYPE": lambda self: self._parse_join_parts(), 701 } 702 703 STATEMENT_PARSERS = { 704 TokenType.ALTER: lambda self: self._parse_alter(), 705 TokenType.BEGIN: lambda self: self._parse_transaction(), 706 TokenType.CACHE: lambda self: self._parse_cache(), 707 TokenType.COMMENT: lambda self: self._parse_comment(), 708 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 709 TokenType.COPY: lambda self: self._parse_copy(), 710 TokenType.CREATE: lambda self: self._parse_create(), 711 TokenType.DELETE: lambda self: self._parse_delete(), 712 TokenType.DESC: lambda self: self._parse_describe(), 713 TokenType.DESCRIBE: lambda self: self._parse_describe(), 714 TokenType.DROP: lambda self: self._parse_drop(), 715 TokenType.INSERT: lambda self: self._parse_insert(), 716 TokenType.KILL: lambda self: self._parse_kill(), 717 TokenType.LOAD: lambda self: self._parse_load(), 718 TokenType.MERGE: lambda self: self._parse_merge(), 719 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 720 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 721 TokenType.REFRESH: lambda self: self._parse_refresh(), 722 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 723 TokenType.SET: lambda self: self._parse_set(), 724 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 725 TokenType.UNCACHE: lambda self: self._parse_uncache(), 726 TokenType.UPDATE: lambda self: self._parse_update(), 727 TokenType.USE: lambda self: self.expression( 728 exp.Use, 729 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 730 this=self._parse_table(schema=False), 731 ), 732 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 733 } 734 735 UNARY_PARSERS = { 736 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 737 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 738 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 739 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 740 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 741 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 742 } 743 744 STRING_PARSERS = { 745 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 746 exp.RawString, this=token.text 747 ), 748 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 749 exp.National, this=token.text 750 ), 751 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 752 TokenType.STRING: lambda self, token: self.expression( 753 exp.Literal, this=token.text, is_string=True 754 ), 755 TokenType.UNICODE_STRING: lambda self, token: self.expression( 756 exp.UnicodeString, 757 this=token.text, 758 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 759 ), 760 } 761 762 NUMERIC_PARSERS = { 763 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 764 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 765 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 766 TokenType.NUMBER: lambda self, token: self.expression( 767 exp.Literal, this=token.text, is_string=False 768 ), 769 } 770 771 PRIMARY_PARSERS = { 772 **STRING_PARSERS, 773 **NUMERIC_PARSERS, 774 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 775 TokenType.NULL: lambda self, _: self.expression(exp.Null), 776 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 777 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 778 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 779 TokenType.STAR: lambda self, _: self.expression( 780 exp.Star, 781 **{ 782 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 783 "replace": self._parse_star_op("REPLACE"), 784 "rename": self._parse_star_op("RENAME"), 785 }, 786 ), 787 } 788 789 PLACEHOLDER_PARSERS = { 790 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 791 TokenType.PARAMETER: lambda self: self._parse_parameter(), 792 TokenType.COLON: lambda self: ( 793 self.expression(exp.Placeholder, this=self._prev.text) 794 if self._match_set(self.ID_VAR_TOKENS) 795 else None 796 ), 797 } 798 799 RANGE_PARSERS = { 800 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 801 TokenType.GLOB: binary_range_parser(exp.Glob), 802 TokenType.ILIKE: binary_range_parser(exp.ILike), 803 TokenType.IN: lambda self, this: self._parse_in(this), 804 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 805 TokenType.IS: lambda self, this: self._parse_is(this), 806 TokenType.LIKE: binary_range_parser(exp.Like), 807 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 808 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 809 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 810 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 811 } 812 813 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 814 "ALLOWED_VALUES": lambda self: self.expression( 815 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 816 ), 817 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 818 "AUTO": lambda self: self._parse_auto_property(), 819 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 820 "BACKUP": lambda self: self.expression( 821 exp.BackupProperty, this=self._parse_var(any_token=True) 822 ), 823 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 824 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 825 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 826 "CHECKSUM": lambda self: self._parse_checksum(), 827 "CLUSTER BY": lambda self: self._parse_cluster(), 828 "CLUSTERED": lambda self: self._parse_clustered_by(), 829 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 830 exp.CollateProperty, **kwargs 831 ), 832 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 833 "CONTAINS": lambda self: self._parse_contains_property(), 834 "COPY": lambda self: self._parse_copy_property(), 835 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 836 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 837 "DEFINER": lambda self: self._parse_definer(), 838 "DETERMINISTIC": lambda self: self.expression( 839 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 840 ), 841 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 842 "DISTKEY": lambda self: self._parse_distkey(), 843 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 844 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 845 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 846 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 847 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 848 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 849 "FREESPACE": lambda self: self._parse_freespace(), 850 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 851 "HEAP": lambda self: self.expression(exp.HeapProperty), 852 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 853 "IMMUTABLE": lambda self: self.expression( 854 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 855 ), 856 "INHERITS": lambda self: self.expression( 857 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 858 ), 859 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 860 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 861 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 862 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 863 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 864 "LIKE": lambda self: self._parse_create_like(), 865 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 866 "LOCK": lambda self: self._parse_locking(), 867 "LOCKING": lambda self: self._parse_locking(), 868 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 869 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 870 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 871 "MODIFIES": lambda self: self._parse_modifies_property(), 872 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 873 "NO": lambda self: self._parse_no_property(), 874 "ON": lambda self: self._parse_on_property(), 875 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 876 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 877 "PARTITION": lambda self: self._parse_partitioned_of(), 878 "PARTITION BY": lambda self: self._parse_partitioned_by(), 879 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 880 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 881 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 882 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 883 "READS": lambda self: self._parse_reads_property(), 884 "REMOTE": lambda self: self._parse_remote_with_connection(), 885 "RETURNS": lambda self: self._parse_returns(), 886 "STRICT": lambda self: self.expression(exp.StrictProperty), 887 "ROW": lambda self: self._parse_row(), 888 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 889 "SAMPLE": lambda self: self.expression( 890 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 891 ), 892 "SECURE": lambda self: self.expression(exp.SecureProperty), 893 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 894 "SETTINGS": lambda self: self.expression( 895 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 896 ), 897 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 898 "SORTKEY": lambda self: self._parse_sortkey(), 899 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 900 "STABLE": lambda self: self.expression( 901 exp.StabilityProperty, this=exp.Literal.string("STABLE") 902 ), 903 "STORED": lambda self: self._parse_stored(), 904 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 905 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 906 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 907 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 908 "TO": lambda self: self._parse_to_table(), 909 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 910 "TRANSFORM": lambda self: self.expression( 911 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 912 ), 913 "TTL": lambda self: self._parse_ttl(), 914 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 915 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 916 "VOLATILE": lambda self: self._parse_volatile_property(), 917 "WITH": lambda self: self._parse_with_property(), 918 } 919 920 CONSTRAINT_PARSERS = { 921 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 922 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 923 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 924 "CHARACTER SET": lambda self: self.expression( 925 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 926 ), 927 "CHECK": lambda self: self.expression( 928 exp.CheckColumnConstraint, 929 this=self._parse_wrapped(self._parse_assignment), 930 enforced=self._match_text_seq("ENFORCED"), 931 ), 932 "COLLATE": lambda self: self.expression( 933 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 934 ), 935 "COMMENT": lambda self: self.expression( 936 exp.CommentColumnConstraint, this=self._parse_string() 937 ), 938 "COMPRESS": lambda self: self._parse_compress(), 939 "CLUSTERED": lambda self: self.expression( 940 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 941 ), 942 "NONCLUSTERED": lambda self: self.expression( 943 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 944 ), 945 "DEFAULT": lambda self: self.expression( 946 exp.DefaultColumnConstraint, this=self._parse_bitwise() 947 ), 948 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 949 "EPHEMERAL": lambda self: self.expression( 950 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 951 ), 952 "EXCLUDE": lambda self: self.expression( 953 exp.ExcludeColumnConstraint, this=self._parse_index_params() 954 ), 955 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 956 "FORMAT": lambda self: self.expression( 957 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 958 ), 959 "GENERATED": lambda self: self._parse_generated_as_identity(), 960 "IDENTITY": lambda self: self._parse_auto_increment(), 961 "INLINE": lambda self: self._parse_inline(), 962 "LIKE": lambda self: self._parse_create_like(), 963 "NOT": lambda self: self._parse_not_constraint(), 964 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 965 "ON": lambda self: ( 966 self._match(TokenType.UPDATE) 967 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 968 ) 969 or self.expression(exp.OnProperty, this=self._parse_id_var()), 970 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 971 "PERIOD": lambda self: self._parse_period_for_system_time(), 972 "PRIMARY KEY": lambda self: self._parse_primary_key(), 973 "REFERENCES": lambda self: self._parse_references(match=False), 974 "TITLE": lambda self: self.expression( 975 exp.TitleColumnConstraint, this=self._parse_var_or_string() 976 ), 977 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 978 "UNIQUE": lambda self: self._parse_unique(), 979 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 980 "WITH": lambda self: self.expression( 981 exp.Properties, expressions=self._parse_wrapped_properties() 982 ), 983 } 984 985 ALTER_PARSERS = { 986 "ADD": lambda self: self._parse_alter_table_add(), 987 "ALTER": lambda self: self._parse_alter_table_alter(), 988 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 989 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 990 "DROP": lambda self: self._parse_alter_table_drop(), 991 "RENAME": lambda self: self._parse_alter_table_rename(), 992 "SET": lambda self: self._parse_alter_table_set(), 993 } 994 995 ALTER_ALTER_PARSERS = { 996 "DISTKEY": lambda self: self._parse_alter_diststyle(), 997 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 998 "SORTKEY": lambda self: self._parse_alter_sortkey(), 999 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1000 } 1001 1002 SCHEMA_UNNAMED_CONSTRAINTS = { 1003 "CHECK", 1004 "EXCLUDE", 1005 "FOREIGN KEY", 1006 "LIKE", 1007 "PERIOD", 1008 "PRIMARY KEY", 1009 "UNIQUE", 1010 } 1011 1012 NO_PAREN_FUNCTION_PARSERS = { 1013 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1014 "CASE": lambda self: self._parse_case(), 1015 "CONNECT_BY_ROOT": lambda self: self.expression( 1016 exp.ConnectByRoot, this=self._parse_column() 1017 ), 1018 "IF": lambda self: self._parse_if(), 1019 "NEXT": lambda self: self._parse_next_value_for(), 1020 } 1021 1022 INVALID_FUNC_NAME_TOKENS = { 1023 TokenType.IDENTIFIER, 1024 TokenType.STRING, 1025 } 1026 1027 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1028 1029 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1030 1031 FUNCTION_PARSERS = { 1032 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1033 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1034 "DECODE": lambda self: self._parse_decode(), 1035 "EXTRACT": lambda self: self._parse_extract(), 1036 "GAP_FILL": lambda self: self._parse_gap_fill(), 1037 "JSON_OBJECT": lambda self: self._parse_json_object(), 1038 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1039 "JSON_TABLE": lambda self: self._parse_json_table(), 1040 "MATCH": lambda self: self._parse_match_against(), 1041 "OPENJSON": lambda self: self._parse_open_json(), 1042 "POSITION": lambda self: self._parse_position(), 1043 "PREDICT": lambda self: self._parse_predict(), 1044 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1045 "STRING_AGG": lambda self: self._parse_string_agg(), 1046 "SUBSTRING": lambda self: self._parse_substring(), 1047 "TRIM": lambda self: self._parse_trim(), 1048 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1049 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1050 } 1051 1052 QUERY_MODIFIER_PARSERS = { 1053 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1054 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1055 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1056 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1057 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1058 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1059 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1060 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1061 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1062 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1063 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1064 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1065 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1066 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1067 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1068 TokenType.CLUSTER_BY: lambda self: ( 1069 "cluster", 1070 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1071 ), 1072 TokenType.DISTRIBUTE_BY: lambda self: ( 1073 "distribute", 1074 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1075 ), 1076 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1077 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1078 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1079 } 1080 1081 SET_PARSERS = { 1082 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1083 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1084 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1085 "TRANSACTION": lambda self: self._parse_set_transaction(), 1086 } 1087 1088 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1089 1090 TYPE_LITERAL_PARSERS = { 1091 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1092 } 1093 1094 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1095 1096 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1097 1098 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1099 1100 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1101 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1102 "ISOLATION": ( 1103 ("LEVEL", "REPEATABLE", "READ"), 1104 ("LEVEL", "READ", "COMMITTED"), 1105 ("LEVEL", "READ", "UNCOMITTED"), 1106 ("LEVEL", "SERIALIZABLE"), 1107 ), 1108 "READ": ("WRITE", "ONLY"), 1109 } 1110 1111 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1112 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1113 ) 1114 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1115 1116 CREATE_SEQUENCE: OPTIONS_TYPE = { 1117 "SCALE": ("EXTEND", "NOEXTEND"), 1118 "SHARD": ("EXTEND", "NOEXTEND"), 1119 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1120 **dict.fromkeys( 1121 ( 1122 "SESSION", 1123 "GLOBAL", 1124 "KEEP", 1125 "NOKEEP", 1126 "ORDER", 1127 "NOORDER", 1128 "NOCACHE", 1129 "CYCLE", 1130 "NOCYCLE", 1131 "NOMINVALUE", 1132 "NOMAXVALUE", 1133 "NOSCALE", 1134 "NOSHARD", 1135 ), 1136 tuple(), 1137 ), 1138 } 1139 1140 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1141 1142 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1143 1144 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1145 1146 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1147 "TYPE": ("EVOLUTION",), 1148 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1149 } 1150 1151 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1152 1153 CLONE_KEYWORDS = {"CLONE", "COPY"} 1154 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1155 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1156 1157 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1158 1159 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1160 1161 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1162 1163 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1164 1165 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1166 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1167 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1168 1169 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1170 1171 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1172 1173 ADD_CONSTRAINT_TOKENS = { 1174 TokenType.CONSTRAINT, 1175 TokenType.FOREIGN_KEY, 1176 TokenType.INDEX, 1177 TokenType.KEY, 1178 TokenType.PRIMARY_KEY, 1179 TokenType.UNIQUE, 1180 } 1181 1182 DISTINCT_TOKENS = {TokenType.DISTINCT} 1183 1184 NULL_TOKENS = {TokenType.NULL} 1185 1186 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1187 1188 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1189 1190 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1191 1192 STRICT_CAST = True 1193 1194 PREFIXED_PIVOT_COLUMNS = False 1195 IDENTIFY_PIVOT_STRINGS = False 1196 1197 LOG_DEFAULTS_TO_LN = False 1198 1199 # Whether ADD is present for each column added by ALTER TABLE 1200 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1201 1202 # Whether the table sample clause expects CSV syntax 1203 TABLESAMPLE_CSV = False 1204 1205 # The default method used for table sampling 1206 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1207 1208 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1209 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1210 1211 # Whether the TRIM function expects the characters to trim as its first argument 1212 TRIM_PATTERN_FIRST = False 1213 1214 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1215 STRING_ALIASES = False 1216 1217 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1218 MODIFIERS_ATTACHED_TO_SET_OP = True 1219 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1220 1221 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1222 NO_PAREN_IF_COMMANDS = True 1223 1224 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1225 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1226 1227 # Whether the `:` operator is used to extract a value from a VARIANT column 1228 COLON_IS_VARIANT_EXTRACT = False 1229 1230 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1231 # If this is True and '(' is not found, the keyword will be treated as an identifier 1232 VALUES_FOLLOWED_BY_PAREN = True 1233 1234 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1235 SUPPORTS_IMPLICIT_UNNEST = False 1236 1237 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1238 INTERVAL_SPANS = True 1239 1240 # Whether a PARTITION clause can follow a table reference 1241 SUPPORTS_PARTITION_SELECTION = False 1242 1243 __slots__ = ( 1244 "error_level", 1245 "error_message_context", 1246 "max_errors", 1247 "dialect", 1248 "sql", 1249 "errors", 1250 "_tokens", 1251 "_index", 1252 "_curr", 1253 "_next", 1254 "_prev", 1255 "_prev_comments", 1256 ) 1257 1258 # Autofilled 1259 SHOW_TRIE: t.Dict = {} 1260 SET_TRIE: t.Dict = {} 1261 1262 def __init__( 1263 self, 1264 error_level: t.Optional[ErrorLevel] = None, 1265 error_message_context: int = 100, 1266 max_errors: int = 3, 1267 dialect: DialectType = None, 1268 ): 1269 from sqlglot.dialects import Dialect 1270 1271 self.error_level = error_level or ErrorLevel.IMMEDIATE 1272 self.error_message_context = error_message_context 1273 self.max_errors = max_errors 1274 self.dialect = Dialect.get_or_raise(dialect) 1275 self.reset() 1276 1277 def reset(self): 1278 self.sql = "" 1279 self.errors = [] 1280 self._tokens = [] 1281 self._index = 0 1282 self._curr = None 1283 self._next = None 1284 self._prev = None 1285 self._prev_comments = None 1286 1287 def parse( 1288 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1289 ) -> t.List[t.Optional[exp.Expression]]: 1290 """ 1291 Parses a list of tokens and returns a list of syntax trees, one tree 1292 per parsed SQL statement. 1293 1294 Args: 1295 raw_tokens: The list of tokens. 1296 sql: The original SQL string, used to produce helpful debug messages. 1297 1298 Returns: 1299 The list of the produced syntax trees. 1300 """ 1301 return self._parse( 1302 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1303 ) 1304 1305 def parse_into( 1306 self, 1307 expression_types: exp.IntoType, 1308 raw_tokens: t.List[Token], 1309 sql: t.Optional[str] = None, 1310 ) -> t.List[t.Optional[exp.Expression]]: 1311 """ 1312 Parses a list of tokens into a given Expression type. If a collection of Expression 1313 types is given instead, this method will try to parse the token list into each one 1314 of them, stopping at the first for which the parsing succeeds. 1315 1316 Args: 1317 expression_types: The expression type(s) to try and parse the token list into. 1318 raw_tokens: The list of tokens. 1319 sql: The original SQL string, used to produce helpful debug messages. 1320 1321 Returns: 1322 The target Expression. 1323 """ 1324 errors = [] 1325 for expression_type in ensure_list(expression_types): 1326 parser = self.EXPRESSION_PARSERS.get(expression_type) 1327 if not parser: 1328 raise TypeError(f"No parser registered for {expression_type}") 1329 1330 try: 1331 return self._parse(parser, raw_tokens, sql) 1332 except ParseError as e: 1333 e.errors[0]["into_expression"] = expression_type 1334 errors.append(e) 1335 1336 raise ParseError( 1337 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1338 errors=merge_errors(errors), 1339 ) from errors[-1] 1340 1341 def _parse( 1342 self, 1343 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1344 raw_tokens: t.List[Token], 1345 sql: t.Optional[str] = None, 1346 ) -> t.List[t.Optional[exp.Expression]]: 1347 self.reset() 1348 self.sql = sql or "" 1349 1350 total = len(raw_tokens) 1351 chunks: t.List[t.List[Token]] = [[]] 1352 1353 for i, token in enumerate(raw_tokens): 1354 if token.token_type == TokenType.SEMICOLON: 1355 if token.comments: 1356 chunks.append([token]) 1357 1358 if i < total - 1: 1359 chunks.append([]) 1360 else: 1361 chunks[-1].append(token) 1362 1363 expressions = [] 1364 1365 for tokens in chunks: 1366 self._index = -1 1367 self._tokens = tokens 1368 self._advance() 1369 1370 expressions.append(parse_method(self)) 1371 1372 if self._index < len(self._tokens): 1373 self.raise_error("Invalid expression / Unexpected token") 1374 1375 self.check_errors() 1376 1377 return expressions 1378 1379 def check_errors(self) -> None: 1380 """Logs or raises any found errors, depending on the chosen error level setting.""" 1381 if self.error_level == ErrorLevel.WARN: 1382 for error in self.errors: 1383 logger.error(str(error)) 1384 elif self.error_level == ErrorLevel.RAISE and self.errors: 1385 raise ParseError( 1386 concat_messages(self.errors, self.max_errors), 1387 errors=merge_errors(self.errors), 1388 ) 1389 1390 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1391 """ 1392 Appends an error in the list of recorded errors or raises it, depending on the chosen 1393 error level setting. 1394 """ 1395 token = token or self._curr or self._prev or Token.string("") 1396 start = token.start 1397 end = token.end + 1 1398 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1399 highlight = self.sql[start:end] 1400 end_context = self.sql[end : end + self.error_message_context] 1401 1402 error = ParseError.new( 1403 f"{message}. Line {token.line}, Col: {token.col}.\n" 1404 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1405 description=message, 1406 line=token.line, 1407 col=token.col, 1408 start_context=start_context, 1409 highlight=highlight, 1410 end_context=end_context, 1411 ) 1412 1413 if self.error_level == ErrorLevel.IMMEDIATE: 1414 raise error 1415 1416 self.errors.append(error) 1417 1418 def expression( 1419 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1420 ) -> E: 1421 """ 1422 Creates a new, validated Expression. 1423 1424 Args: 1425 exp_class: The expression class to instantiate. 1426 comments: An optional list of comments to attach to the expression. 1427 kwargs: The arguments to set for the expression along with their respective values. 1428 1429 Returns: 1430 The target expression. 1431 """ 1432 instance = exp_class(**kwargs) 1433 instance.add_comments(comments) if comments else self._add_comments(instance) 1434 return self.validate_expression(instance) 1435 1436 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1437 if expression and self._prev_comments: 1438 expression.add_comments(self._prev_comments) 1439 self._prev_comments = None 1440 1441 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1442 """ 1443 Validates an Expression, making sure that all its mandatory arguments are set. 1444 1445 Args: 1446 expression: The expression to validate. 1447 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1448 1449 Returns: 1450 The validated expression. 1451 """ 1452 if self.error_level != ErrorLevel.IGNORE: 1453 for error_message in expression.error_messages(args): 1454 self.raise_error(error_message) 1455 1456 return expression 1457 1458 def _find_sql(self, start: Token, end: Token) -> str: 1459 return self.sql[start.start : end.end + 1] 1460 1461 def _is_connected(self) -> bool: 1462 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1463 1464 def _advance(self, times: int = 1) -> None: 1465 self._index += times 1466 self._curr = seq_get(self._tokens, self._index) 1467 self._next = seq_get(self._tokens, self._index + 1) 1468 1469 if self._index > 0: 1470 self._prev = self._tokens[self._index - 1] 1471 self._prev_comments = self._prev.comments 1472 else: 1473 self._prev = None 1474 self._prev_comments = None 1475 1476 def _retreat(self, index: int) -> None: 1477 if index != self._index: 1478 self._advance(index - self._index) 1479 1480 def _warn_unsupported(self) -> None: 1481 if len(self._tokens) <= 1: 1482 return 1483 1484 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1485 # interested in emitting a warning for the one being currently processed. 1486 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1487 1488 logger.warning( 1489 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1490 ) 1491 1492 def _parse_command(self) -> exp.Command: 1493 self._warn_unsupported() 1494 return self.expression( 1495 exp.Command, 1496 comments=self._prev_comments, 1497 this=self._prev.text.upper(), 1498 expression=self._parse_string(), 1499 ) 1500 1501 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1502 """ 1503 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1504 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1505 solve this by setting & resetting the parser state accordingly 1506 """ 1507 index = self._index 1508 error_level = self.error_level 1509 1510 self.error_level = ErrorLevel.IMMEDIATE 1511 try: 1512 this = parse_method() 1513 except ParseError: 1514 this = None 1515 finally: 1516 if not this or retreat: 1517 self._retreat(index) 1518 self.error_level = error_level 1519 1520 return this 1521 1522 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1523 start = self._prev 1524 exists = self._parse_exists() if allow_exists else None 1525 1526 self._match(TokenType.ON) 1527 1528 materialized = self._match_text_seq("MATERIALIZED") 1529 kind = self._match_set(self.CREATABLES) and self._prev 1530 if not kind: 1531 return self._parse_as_command(start) 1532 1533 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1534 this = self._parse_user_defined_function(kind=kind.token_type) 1535 elif kind.token_type == TokenType.TABLE: 1536 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1537 elif kind.token_type == TokenType.COLUMN: 1538 this = self._parse_column() 1539 else: 1540 this = self._parse_id_var() 1541 1542 self._match(TokenType.IS) 1543 1544 return self.expression( 1545 exp.Comment, 1546 this=this, 1547 kind=kind.text, 1548 expression=self._parse_string(), 1549 exists=exists, 1550 materialized=materialized, 1551 ) 1552 1553 def _parse_to_table( 1554 self, 1555 ) -> exp.ToTableProperty: 1556 table = self._parse_table_parts(schema=True) 1557 return self.expression(exp.ToTableProperty, this=table) 1558 1559 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1560 def _parse_ttl(self) -> exp.Expression: 1561 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1562 this = self._parse_bitwise() 1563 1564 if self._match_text_seq("DELETE"): 1565 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1566 if self._match_text_seq("RECOMPRESS"): 1567 return self.expression( 1568 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1569 ) 1570 if self._match_text_seq("TO", "DISK"): 1571 return self.expression( 1572 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1573 ) 1574 if self._match_text_seq("TO", "VOLUME"): 1575 return self.expression( 1576 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1577 ) 1578 1579 return this 1580 1581 expressions = self._parse_csv(_parse_ttl_action) 1582 where = self._parse_where() 1583 group = self._parse_group() 1584 1585 aggregates = None 1586 if group and self._match(TokenType.SET): 1587 aggregates = self._parse_csv(self._parse_set_item) 1588 1589 return self.expression( 1590 exp.MergeTreeTTL, 1591 expressions=expressions, 1592 where=where, 1593 group=group, 1594 aggregates=aggregates, 1595 ) 1596 1597 def _parse_statement(self) -> t.Optional[exp.Expression]: 1598 if self._curr is None: 1599 return None 1600 1601 if self._match_set(self.STATEMENT_PARSERS): 1602 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1603 1604 if self._match_set(self.dialect.tokenizer.COMMANDS): 1605 return self._parse_command() 1606 1607 expression = self._parse_expression() 1608 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1609 return self._parse_query_modifiers(expression) 1610 1611 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1612 start = self._prev 1613 temporary = self._match(TokenType.TEMPORARY) 1614 materialized = self._match_text_seq("MATERIALIZED") 1615 1616 kind = self._match_set(self.CREATABLES) and self._prev.text 1617 if not kind: 1618 return self._parse_as_command(start) 1619 1620 if_exists = exists or self._parse_exists() 1621 table = self._parse_table_parts( 1622 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1623 ) 1624 1625 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1626 1627 if self._match(TokenType.L_PAREN, advance=False): 1628 expressions = self._parse_wrapped_csv(self._parse_types) 1629 else: 1630 expressions = None 1631 1632 return self.expression( 1633 exp.Drop, 1634 comments=start.comments, 1635 exists=if_exists, 1636 this=table, 1637 expressions=expressions, 1638 kind=kind.upper(), 1639 temporary=temporary, 1640 materialized=materialized, 1641 cascade=self._match_text_seq("CASCADE"), 1642 constraints=self._match_text_seq("CONSTRAINTS"), 1643 purge=self._match_text_seq("PURGE"), 1644 cluster=cluster, 1645 ) 1646 1647 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1648 return ( 1649 self._match_text_seq("IF") 1650 and (not not_ or self._match(TokenType.NOT)) 1651 and self._match(TokenType.EXISTS) 1652 ) 1653 1654 def _parse_create(self) -> exp.Create | exp.Command: 1655 # Note: this can't be None because we've matched a statement parser 1656 start = self._prev 1657 comments = self._prev_comments 1658 1659 replace = ( 1660 start.token_type == TokenType.REPLACE 1661 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1662 or self._match_pair(TokenType.OR, TokenType.ALTER) 1663 ) 1664 1665 unique = self._match(TokenType.UNIQUE) 1666 1667 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1668 self._advance() 1669 1670 properties = None 1671 create_token = self._match_set(self.CREATABLES) and self._prev 1672 1673 if not create_token: 1674 # exp.Properties.Location.POST_CREATE 1675 properties = self._parse_properties() 1676 create_token = self._match_set(self.CREATABLES) and self._prev 1677 1678 if not properties or not create_token: 1679 return self._parse_as_command(start) 1680 1681 exists = self._parse_exists(not_=True) 1682 this = None 1683 expression: t.Optional[exp.Expression] = None 1684 indexes = None 1685 no_schema_binding = None 1686 begin = None 1687 end = None 1688 clone = None 1689 1690 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1691 nonlocal properties 1692 if properties and temp_props: 1693 properties.expressions.extend(temp_props.expressions) 1694 elif temp_props: 1695 properties = temp_props 1696 1697 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1698 this = self._parse_user_defined_function(kind=create_token.token_type) 1699 1700 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1701 extend_props(self._parse_properties()) 1702 1703 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1704 extend_props(self._parse_properties()) 1705 1706 if not expression: 1707 if self._match(TokenType.COMMAND): 1708 expression = self._parse_as_command(self._prev) 1709 else: 1710 begin = self._match(TokenType.BEGIN) 1711 return_ = self._match_text_seq("RETURN") 1712 1713 if self._match(TokenType.STRING, advance=False): 1714 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1715 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1716 expression = self._parse_string() 1717 extend_props(self._parse_properties()) 1718 else: 1719 expression = self._parse_statement() 1720 1721 end = self._match_text_seq("END") 1722 1723 if return_: 1724 expression = self.expression(exp.Return, this=expression) 1725 elif create_token.token_type == TokenType.INDEX: 1726 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1727 if not self._match(TokenType.ON): 1728 index = self._parse_id_var() 1729 anonymous = False 1730 else: 1731 index = None 1732 anonymous = True 1733 1734 this = self._parse_index(index=index, anonymous=anonymous) 1735 elif create_token.token_type in self.DB_CREATABLES: 1736 table_parts = self._parse_table_parts( 1737 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1738 ) 1739 1740 # exp.Properties.Location.POST_NAME 1741 self._match(TokenType.COMMA) 1742 extend_props(self._parse_properties(before=True)) 1743 1744 this = self._parse_schema(this=table_parts) 1745 1746 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1747 extend_props(self._parse_properties()) 1748 1749 self._match(TokenType.ALIAS) 1750 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1751 # exp.Properties.Location.POST_ALIAS 1752 extend_props(self._parse_properties()) 1753 1754 if create_token.token_type == TokenType.SEQUENCE: 1755 expression = self._parse_types() 1756 extend_props(self._parse_properties()) 1757 else: 1758 expression = self._parse_ddl_select() 1759 1760 if create_token.token_type == TokenType.TABLE: 1761 # exp.Properties.Location.POST_EXPRESSION 1762 extend_props(self._parse_properties()) 1763 1764 indexes = [] 1765 while True: 1766 index = self._parse_index() 1767 1768 # exp.Properties.Location.POST_INDEX 1769 extend_props(self._parse_properties()) 1770 1771 if not index: 1772 break 1773 else: 1774 self._match(TokenType.COMMA) 1775 indexes.append(index) 1776 elif create_token.token_type == TokenType.VIEW: 1777 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1778 no_schema_binding = True 1779 1780 shallow = self._match_text_seq("SHALLOW") 1781 1782 if self._match_texts(self.CLONE_KEYWORDS): 1783 copy = self._prev.text.lower() == "copy" 1784 clone = self.expression( 1785 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1786 ) 1787 1788 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1789 return self._parse_as_command(start) 1790 1791 return self.expression( 1792 exp.Create, 1793 comments=comments, 1794 this=this, 1795 kind=create_token.text.upper(), 1796 replace=replace, 1797 unique=unique, 1798 expression=expression, 1799 exists=exists, 1800 properties=properties, 1801 indexes=indexes, 1802 no_schema_binding=no_schema_binding, 1803 begin=begin, 1804 end=end, 1805 clone=clone, 1806 ) 1807 1808 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1809 seq = exp.SequenceProperties() 1810 1811 options = [] 1812 index = self._index 1813 1814 while self._curr: 1815 self._match(TokenType.COMMA) 1816 if self._match_text_seq("INCREMENT"): 1817 self._match_text_seq("BY") 1818 self._match_text_seq("=") 1819 seq.set("increment", self._parse_term()) 1820 elif self._match_text_seq("MINVALUE"): 1821 seq.set("minvalue", self._parse_term()) 1822 elif self._match_text_seq("MAXVALUE"): 1823 seq.set("maxvalue", self._parse_term()) 1824 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1825 self._match_text_seq("=") 1826 seq.set("start", self._parse_term()) 1827 elif self._match_text_seq("CACHE"): 1828 # T-SQL allows empty CACHE which is initialized dynamically 1829 seq.set("cache", self._parse_number() or True) 1830 elif self._match_text_seq("OWNED", "BY"): 1831 # "OWNED BY NONE" is the default 1832 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1833 else: 1834 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1835 if opt: 1836 options.append(opt) 1837 else: 1838 break 1839 1840 seq.set("options", options if options else None) 1841 return None if self._index == index else seq 1842 1843 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1844 # only used for teradata currently 1845 self._match(TokenType.COMMA) 1846 1847 kwargs = { 1848 "no": self._match_text_seq("NO"), 1849 "dual": self._match_text_seq("DUAL"), 1850 "before": self._match_text_seq("BEFORE"), 1851 "default": self._match_text_seq("DEFAULT"), 1852 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1853 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1854 "after": self._match_text_seq("AFTER"), 1855 "minimum": self._match_texts(("MIN", "MINIMUM")), 1856 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1857 } 1858 1859 if self._match_texts(self.PROPERTY_PARSERS): 1860 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1861 try: 1862 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1863 except TypeError: 1864 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1865 1866 return None 1867 1868 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1869 return self._parse_wrapped_csv(self._parse_property) 1870 1871 def _parse_property(self) -> t.Optional[exp.Expression]: 1872 if self._match_texts(self.PROPERTY_PARSERS): 1873 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1874 1875 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1876 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1877 1878 if self._match_text_seq("COMPOUND", "SORTKEY"): 1879 return self._parse_sortkey(compound=True) 1880 1881 if self._match_text_seq("SQL", "SECURITY"): 1882 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1883 1884 index = self._index 1885 key = self._parse_column() 1886 1887 if not self._match(TokenType.EQ): 1888 self._retreat(index) 1889 return self._parse_sequence_properties() 1890 1891 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1892 if isinstance(key, exp.Column): 1893 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1894 1895 value = self._parse_bitwise() or self._parse_var(any_token=True) 1896 1897 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1898 if isinstance(value, exp.Column): 1899 value = exp.var(value.name) 1900 1901 return self.expression(exp.Property, this=key, value=value) 1902 1903 def _parse_stored(self) -> exp.FileFormatProperty: 1904 self._match(TokenType.ALIAS) 1905 1906 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1907 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1908 1909 return self.expression( 1910 exp.FileFormatProperty, 1911 this=( 1912 self.expression( 1913 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1914 ) 1915 if input_format or output_format 1916 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1917 ), 1918 ) 1919 1920 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1921 field = self._parse_field() 1922 if isinstance(field, exp.Identifier) and not field.quoted: 1923 field = exp.var(field) 1924 1925 return field 1926 1927 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1928 self._match(TokenType.EQ) 1929 self._match(TokenType.ALIAS) 1930 1931 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1932 1933 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1934 properties = [] 1935 while True: 1936 if before: 1937 prop = self._parse_property_before() 1938 else: 1939 prop = self._parse_property() 1940 if not prop: 1941 break 1942 for p in ensure_list(prop): 1943 properties.append(p) 1944 1945 if properties: 1946 return self.expression(exp.Properties, expressions=properties) 1947 1948 return None 1949 1950 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1951 return self.expression( 1952 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1953 ) 1954 1955 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1956 if self._index >= 2: 1957 pre_volatile_token = self._tokens[self._index - 2] 1958 else: 1959 pre_volatile_token = None 1960 1961 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1962 return exp.VolatileProperty() 1963 1964 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1965 1966 def _parse_retention_period(self) -> exp.Var: 1967 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1968 number = self._parse_number() 1969 number_str = f"{number} " if number else "" 1970 unit = self._parse_var(any_token=True) 1971 return exp.var(f"{number_str}{unit}") 1972 1973 def _parse_system_versioning_property( 1974 self, with_: bool = False 1975 ) -> exp.WithSystemVersioningProperty: 1976 self._match(TokenType.EQ) 1977 prop = self.expression( 1978 exp.WithSystemVersioningProperty, 1979 **{ # type: ignore 1980 "on": True, 1981 "with": with_, 1982 }, 1983 ) 1984 1985 if self._match_text_seq("OFF"): 1986 prop.set("on", False) 1987 return prop 1988 1989 self._match(TokenType.ON) 1990 if self._match(TokenType.L_PAREN): 1991 while self._curr and not self._match(TokenType.R_PAREN): 1992 if self._match_text_seq("HISTORY_TABLE", "="): 1993 prop.set("this", self._parse_table_parts()) 1994 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1995 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1996 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1997 prop.set("retention_period", self._parse_retention_period()) 1998 1999 self._match(TokenType.COMMA) 2000 2001 return prop 2002 2003 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2004 self._match(TokenType.EQ) 2005 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2006 prop = self.expression(exp.DataDeletionProperty, on=on) 2007 2008 if self._match(TokenType.L_PAREN): 2009 while self._curr and not self._match(TokenType.R_PAREN): 2010 if self._match_text_seq("FILTER_COLUMN", "="): 2011 prop.set("filter_column", self._parse_column()) 2012 elif self._match_text_seq("RETENTION_PERIOD", "="): 2013 prop.set("retention_period", self._parse_retention_period()) 2014 2015 self._match(TokenType.COMMA) 2016 2017 return prop 2018 2019 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2020 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2021 prop = self._parse_system_versioning_property(with_=True) 2022 self._match_r_paren() 2023 return prop 2024 2025 if self._match(TokenType.L_PAREN, advance=False): 2026 return self._parse_wrapped_properties() 2027 2028 if self._match_text_seq("JOURNAL"): 2029 return self._parse_withjournaltable() 2030 2031 if self._match_texts(self.VIEW_ATTRIBUTES): 2032 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2033 2034 if self._match_text_seq("DATA"): 2035 return self._parse_withdata(no=False) 2036 elif self._match_text_seq("NO", "DATA"): 2037 return self._parse_withdata(no=True) 2038 2039 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2040 return self._parse_serde_properties(with_=True) 2041 2042 if self._match(TokenType.SCHEMA): 2043 return self.expression( 2044 exp.WithSchemaBindingProperty, 2045 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2046 ) 2047 2048 if not self._next: 2049 return None 2050 2051 return self._parse_withisolatedloading() 2052 2053 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2054 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2055 self._match(TokenType.EQ) 2056 2057 user = self._parse_id_var() 2058 self._match(TokenType.PARAMETER) 2059 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2060 2061 if not user or not host: 2062 return None 2063 2064 return exp.DefinerProperty(this=f"{user}@{host}") 2065 2066 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2067 self._match(TokenType.TABLE) 2068 self._match(TokenType.EQ) 2069 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2070 2071 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2072 return self.expression(exp.LogProperty, no=no) 2073 2074 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2075 return self.expression(exp.JournalProperty, **kwargs) 2076 2077 def _parse_checksum(self) -> exp.ChecksumProperty: 2078 self._match(TokenType.EQ) 2079 2080 on = None 2081 if self._match(TokenType.ON): 2082 on = True 2083 elif self._match_text_seq("OFF"): 2084 on = False 2085 2086 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2087 2088 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2089 return self.expression( 2090 exp.Cluster, 2091 expressions=( 2092 self._parse_wrapped_csv(self._parse_ordered) 2093 if wrapped 2094 else self._parse_csv(self._parse_ordered) 2095 ), 2096 ) 2097 2098 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2099 self._match_text_seq("BY") 2100 2101 self._match_l_paren() 2102 expressions = self._parse_csv(self._parse_column) 2103 self._match_r_paren() 2104 2105 if self._match_text_seq("SORTED", "BY"): 2106 self._match_l_paren() 2107 sorted_by = self._parse_csv(self._parse_ordered) 2108 self._match_r_paren() 2109 else: 2110 sorted_by = None 2111 2112 self._match(TokenType.INTO) 2113 buckets = self._parse_number() 2114 self._match_text_seq("BUCKETS") 2115 2116 return self.expression( 2117 exp.ClusteredByProperty, 2118 expressions=expressions, 2119 sorted_by=sorted_by, 2120 buckets=buckets, 2121 ) 2122 2123 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2124 if not self._match_text_seq("GRANTS"): 2125 self._retreat(self._index - 1) 2126 return None 2127 2128 return self.expression(exp.CopyGrantsProperty) 2129 2130 def _parse_freespace(self) -> exp.FreespaceProperty: 2131 self._match(TokenType.EQ) 2132 return self.expression( 2133 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2134 ) 2135 2136 def _parse_mergeblockratio( 2137 self, no: bool = False, default: bool = False 2138 ) -> exp.MergeBlockRatioProperty: 2139 if self._match(TokenType.EQ): 2140 return self.expression( 2141 exp.MergeBlockRatioProperty, 2142 this=self._parse_number(), 2143 percent=self._match(TokenType.PERCENT), 2144 ) 2145 2146 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2147 2148 def _parse_datablocksize( 2149 self, 2150 default: t.Optional[bool] = None, 2151 minimum: t.Optional[bool] = None, 2152 maximum: t.Optional[bool] = None, 2153 ) -> exp.DataBlocksizeProperty: 2154 self._match(TokenType.EQ) 2155 size = self._parse_number() 2156 2157 units = None 2158 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2159 units = self._prev.text 2160 2161 return self.expression( 2162 exp.DataBlocksizeProperty, 2163 size=size, 2164 units=units, 2165 default=default, 2166 minimum=minimum, 2167 maximum=maximum, 2168 ) 2169 2170 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2171 self._match(TokenType.EQ) 2172 always = self._match_text_seq("ALWAYS") 2173 manual = self._match_text_seq("MANUAL") 2174 never = self._match_text_seq("NEVER") 2175 default = self._match_text_seq("DEFAULT") 2176 2177 autotemp = None 2178 if self._match_text_seq("AUTOTEMP"): 2179 autotemp = self._parse_schema() 2180 2181 return self.expression( 2182 exp.BlockCompressionProperty, 2183 always=always, 2184 manual=manual, 2185 never=never, 2186 default=default, 2187 autotemp=autotemp, 2188 ) 2189 2190 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2191 index = self._index 2192 no = self._match_text_seq("NO") 2193 concurrent = self._match_text_seq("CONCURRENT") 2194 2195 if not self._match_text_seq("ISOLATED", "LOADING"): 2196 self._retreat(index) 2197 return None 2198 2199 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2200 return self.expression( 2201 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2202 ) 2203 2204 def _parse_locking(self) -> exp.LockingProperty: 2205 if self._match(TokenType.TABLE): 2206 kind = "TABLE" 2207 elif self._match(TokenType.VIEW): 2208 kind = "VIEW" 2209 elif self._match(TokenType.ROW): 2210 kind = "ROW" 2211 elif self._match_text_seq("DATABASE"): 2212 kind = "DATABASE" 2213 else: 2214 kind = None 2215 2216 if kind in ("DATABASE", "TABLE", "VIEW"): 2217 this = self._parse_table_parts() 2218 else: 2219 this = None 2220 2221 if self._match(TokenType.FOR): 2222 for_or_in = "FOR" 2223 elif self._match(TokenType.IN): 2224 for_or_in = "IN" 2225 else: 2226 for_or_in = None 2227 2228 if self._match_text_seq("ACCESS"): 2229 lock_type = "ACCESS" 2230 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2231 lock_type = "EXCLUSIVE" 2232 elif self._match_text_seq("SHARE"): 2233 lock_type = "SHARE" 2234 elif self._match_text_seq("READ"): 2235 lock_type = "READ" 2236 elif self._match_text_seq("WRITE"): 2237 lock_type = "WRITE" 2238 elif self._match_text_seq("CHECKSUM"): 2239 lock_type = "CHECKSUM" 2240 else: 2241 lock_type = None 2242 2243 override = self._match_text_seq("OVERRIDE") 2244 2245 return self.expression( 2246 exp.LockingProperty, 2247 this=this, 2248 kind=kind, 2249 for_or_in=for_or_in, 2250 lock_type=lock_type, 2251 override=override, 2252 ) 2253 2254 def _parse_partition_by(self) -> t.List[exp.Expression]: 2255 if self._match(TokenType.PARTITION_BY): 2256 return self._parse_csv(self._parse_assignment) 2257 return [] 2258 2259 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2260 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2261 if self._match_text_seq("MINVALUE"): 2262 return exp.var("MINVALUE") 2263 if self._match_text_seq("MAXVALUE"): 2264 return exp.var("MAXVALUE") 2265 return self._parse_bitwise() 2266 2267 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2268 expression = None 2269 from_expressions = None 2270 to_expressions = None 2271 2272 if self._match(TokenType.IN): 2273 this = self._parse_wrapped_csv(self._parse_bitwise) 2274 elif self._match(TokenType.FROM): 2275 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2276 self._match_text_seq("TO") 2277 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2278 elif self._match_text_seq("WITH", "(", "MODULUS"): 2279 this = self._parse_number() 2280 self._match_text_seq(",", "REMAINDER") 2281 expression = self._parse_number() 2282 self._match_r_paren() 2283 else: 2284 self.raise_error("Failed to parse partition bound spec.") 2285 2286 return self.expression( 2287 exp.PartitionBoundSpec, 2288 this=this, 2289 expression=expression, 2290 from_expressions=from_expressions, 2291 to_expressions=to_expressions, 2292 ) 2293 2294 # https://www.postgresql.org/docs/current/sql-createtable.html 2295 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2296 if not self._match_text_seq("OF"): 2297 self._retreat(self._index - 1) 2298 return None 2299 2300 this = self._parse_table(schema=True) 2301 2302 if self._match(TokenType.DEFAULT): 2303 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2304 elif self._match_text_seq("FOR", "VALUES"): 2305 expression = self._parse_partition_bound_spec() 2306 else: 2307 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2308 2309 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2310 2311 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2312 self._match(TokenType.EQ) 2313 return self.expression( 2314 exp.PartitionedByProperty, 2315 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2316 ) 2317 2318 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2319 if self._match_text_seq("AND", "STATISTICS"): 2320 statistics = True 2321 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2322 statistics = False 2323 else: 2324 statistics = None 2325 2326 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2327 2328 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2329 if self._match_text_seq("SQL"): 2330 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2331 return None 2332 2333 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2334 if self._match_text_seq("SQL", "DATA"): 2335 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2336 return None 2337 2338 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2339 if self._match_text_seq("PRIMARY", "INDEX"): 2340 return exp.NoPrimaryIndexProperty() 2341 if self._match_text_seq("SQL"): 2342 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2343 return None 2344 2345 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2346 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2347 return exp.OnCommitProperty() 2348 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2349 return exp.OnCommitProperty(delete=True) 2350 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2351 2352 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2353 if self._match_text_seq("SQL", "DATA"): 2354 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2355 return None 2356 2357 def _parse_distkey(self) -> exp.DistKeyProperty: 2358 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2359 2360 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2361 table = self._parse_table(schema=True) 2362 2363 options = [] 2364 while self._match_texts(("INCLUDING", "EXCLUDING")): 2365 this = self._prev.text.upper() 2366 2367 id_var = self._parse_id_var() 2368 if not id_var: 2369 return None 2370 2371 options.append( 2372 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2373 ) 2374 2375 return self.expression(exp.LikeProperty, this=table, expressions=options) 2376 2377 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2378 return self.expression( 2379 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2380 ) 2381 2382 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2383 self._match(TokenType.EQ) 2384 return self.expression( 2385 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2386 ) 2387 2388 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2389 self._match_text_seq("WITH", "CONNECTION") 2390 return self.expression( 2391 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2392 ) 2393 2394 def _parse_returns(self) -> exp.ReturnsProperty: 2395 value: t.Optional[exp.Expression] 2396 null = None 2397 is_table = self._match(TokenType.TABLE) 2398 2399 if is_table: 2400 if self._match(TokenType.LT): 2401 value = self.expression( 2402 exp.Schema, 2403 this="TABLE", 2404 expressions=self._parse_csv(self._parse_struct_types), 2405 ) 2406 if not self._match(TokenType.GT): 2407 self.raise_error("Expecting >") 2408 else: 2409 value = self._parse_schema(exp.var("TABLE")) 2410 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2411 null = True 2412 value = None 2413 else: 2414 value = self._parse_types() 2415 2416 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2417 2418 def _parse_describe(self) -> exp.Describe: 2419 kind = self._match_set(self.CREATABLES) and self._prev.text 2420 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2421 if self._match(TokenType.DOT): 2422 style = None 2423 self._retreat(self._index - 2) 2424 this = self._parse_table(schema=True) 2425 properties = self._parse_properties() 2426 expressions = properties.expressions if properties else None 2427 return self.expression( 2428 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2429 ) 2430 2431 def _parse_insert(self) -> exp.Insert: 2432 comments = ensure_list(self._prev_comments) 2433 hint = self._parse_hint() 2434 overwrite = self._match(TokenType.OVERWRITE) 2435 ignore = self._match(TokenType.IGNORE) 2436 local = self._match_text_seq("LOCAL") 2437 alternative = None 2438 is_function = None 2439 2440 if self._match_text_seq("DIRECTORY"): 2441 this: t.Optional[exp.Expression] = self.expression( 2442 exp.Directory, 2443 this=self._parse_var_or_string(), 2444 local=local, 2445 row_format=self._parse_row_format(match_row=True), 2446 ) 2447 else: 2448 if self._match(TokenType.OR): 2449 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2450 2451 self._match(TokenType.INTO) 2452 comments += ensure_list(self._prev_comments) 2453 self._match(TokenType.TABLE) 2454 is_function = self._match(TokenType.FUNCTION) 2455 2456 this = ( 2457 self._parse_table(schema=True, parse_partition=True) 2458 if not is_function 2459 else self._parse_function() 2460 ) 2461 2462 returning = self._parse_returning() 2463 2464 return self.expression( 2465 exp.Insert, 2466 comments=comments, 2467 hint=hint, 2468 is_function=is_function, 2469 this=this, 2470 stored=self._match_text_seq("STORED") and self._parse_stored(), 2471 by_name=self._match_text_seq("BY", "NAME"), 2472 exists=self._parse_exists(), 2473 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2474 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2475 conflict=self._parse_on_conflict(), 2476 returning=returning or self._parse_returning(), 2477 overwrite=overwrite, 2478 alternative=alternative, 2479 ignore=ignore, 2480 ) 2481 2482 def _parse_kill(self) -> exp.Kill: 2483 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2484 2485 return self.expression( 2486 exp.Kill, 2487 this=self._parse_primary(), 2488 kind=kind, 2489 ) 2490 2491 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2492 conflict = self._match_text_seq("ON", "CONFLICT") 2493 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2494 2495 if not conflict and not duplicate: 2496 return None 2497 2498 conflict_keys = None 2499 constraint = None 2500 2501 if conflict: 2502 if self._match_text_seq("ON", "CONSTRAINT"): 2503 constraint = self._parse_id_var() 2504 elif self._match(TokenType.L_PAREN): 2505 conflict_keys = self._parse_csv(self._parse_id_var) 2506 self._match_r_paren() 2507 2508 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2509 if self._prev.token_type == TokenType.UPDATE: 2510 self._match(TokenType.SET) 2511 expressions = self._parse_csv(self._parse_equality) 2512 else: 2513 expressions = None 2514 2515 return self.expression( 2516 exp.OnConflict, 2517 duplicate=duplicate, 2518 expressions=expressions, 2519 action=action, 2520 conflict_keys=conflict_keys, 2521 constraint=constraint, 2522 ) 2523 2524 def _parse_returning(self) -> t.Optional[exp.Returning]: 2525 if not self._match(TokenType.RETURNING): 2526 return None 2527 return self.expression( 2528 exp.Returning, 2529 expressions=self._parse_csv(self._parse_expression), 2530 into=self._match(TokenType.INTO) and self._parse_table_part(), 2531 ) 2532 2533 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2534 if not self._match(TokenType.FORMAT): 2535 return None 2536 return self._parse_row_format() 2537 2538 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2539 index = self._index 2540 with_ = with_ or self._match_text_seq("WITH") 2541 2542 if not self._match(TokenType.SERDE_PROPERTIES): 2543 self._retreat(index) 2544 return None 2545 return self.expression( 2546 exp.SerdeProperties, 2547 **{ # type: ignore 2548 "expressions": self._parse_wrapped_properties(), 2549 "with": with_, 2550 }, 2551 ) 2552 2553 def _parse_row_format( 2554 self, match_row: bool = False 2555 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2556 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2557 return None 2558 2559 if self._match_text_seq("SERDE"): 2560 this = self._parse_string() 2561 2562 serde_properties = self._parse_serde_properties() 2563 2564 return self.expression( 2565 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2566 ) 2567 2568 self._match_text_seq("DELIMITED") 2569 2570 kwargs = {} 2571 2572 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2573 kwargs["fields"] = self._parse_string() 2574 if self._match_text_seq("ESCAPED", "BY"): 2575 kwargs["escaped"] = self._parse_string() 2576 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2577 kwargs["collection_items"] = self._parse_string() 2578 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2579 kwargs["map_keys"] = self._parse_string() 2580 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2581 kwargs["lines"] = self._parse_string() 2582 if self._match_text_seq("NULL", "DEFINED", "AS"): 2583 kwargs["null"] = self._parse_string() 2584 2585 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2586 2587 def _parse_load(self) -> exp.LoadData | exp.Command: 2588 if self._match_text_seq("DATA"): 2589 local = self._match_text_seq("LOCAL") 2590 self._match_text_seq("INPATH") 2591 inpath = self._parse_string() 2592 overwrite = self._match(TokenType.OVERWRITE) 2593 self._match_pair(TokenType.INTO, TokenType.TABLE) 2594 2595 return self.expression( 2596 exp.LoadData, 2597 this=self._parse_table(schema=True), 2598 local=local, 2599 overwrite=overwrite, 2600 inpath=inpath, 2601 partition=self._parse_partition(), 2602 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2603 serde=self._match_text_seq("SERDE") and self._parse_string(), 2604 ) 2605 return self._parse_as_command(self._prev) 2606 2607 def _parse_delete(self) -> exp.Delete: 2608 # This handles MySQL's "Multiple-Table Syntax" 2609 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2610 tables = None 2611 comments = self._prev_comments 2612 if not self._match(TokenType.FROM, advance=False): 2613 tables = self._parse_csv(self._parse_table) or None 2614 2615 returning = self._parse_returning() 2616 2617 return self.expression( 2618 exp.Delete, 2619 comments=comments, 2620 tables=tables, 2621 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2622 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2623 where=self._parse_where(), 2624 returning=returning or self._parse_returning(), 2625 limit=self._parse_limit(), 2626 ) 2627 2628 def _parse_update(self) -> exp.Update: 2629 comments = self._prev_comments 2630 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2631 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2632 returning = self._parse_returning() 2633 return self.expression( 2634 exp.Update, 2635 comments=comments, 2636 **{ # type: ignore 2637 "this": this, 2638 "expressions": expressions, 2639 "from": self._parse_from(joins=True), 2640 "where": self._parse_where(), 2641 "returning": returning or self._parse_returning(), 2642 "order": self._parse_order(), 2643 "limit": self._parse_limit(), 2644 }, 2645 ) 2646 2647 def _parse_uncache(self) -> exp.Uncache: 2648 if not self._match(TokenType.TABLE): 2649 self.raise_error("Expecting TABLE after UNCACHE") 2650 2651 return self.expression( 2652 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2653 ) 2654 2655 def _parse_cache(self) -> exp.Cache: 2656 lazy = self._match_text_seq("LAZY") 2657 self._match(TokenType.TABLE) 2658 table = self._parse_table(schema=True) 2659 2660 options = [] 2661 if self._match_text_seq("OPTIONS"): 2662 self._match_l_paren() 2663 k = self._parse_string() 2664 self._match(TokenType.EQ) 2665 v = self._parse_string() 2666 options = [k, v] 2667 self._match_r_paren() 2668 2669 self._match(TokenType.ALIAS) 2670 return self.expression( 2671 exp.Cache, 2672 this=table, 2673 lazy=lazy, 2674 options=options, 2675 expression=self._parse_select(nested=True), 2676 ) 2677 2678 def _parse_partition(self) -> t.Optional[exp.Partition]: 2679 if not self._match(TokenType.PARTITION): 2680 return None 2681 2682 return self.expression( 2683 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2684 ) 2685 2686 def _parse_value(self) -> t.Optional[exp.Tuple]: 2687 if self._match(TokenType.L_PAREN): 2688 expressions = self._parse_csv(self._parse_expression) 2689 self._match_r_paren() 2690 return self.expression(exp.Tuple, expressions=expressions) 2691 2692 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2693 expression = self._parse_expression() 2694 if expression: 2695 return self.expression(exp.Tuple, expressions=[expression]) 2696 return None 2697 2698 def _parse_projections(self) -> t.List[exp.Expression]: 2699 return self._parse_expressions() 2700 2701 def _parse_select( 2702 self, 2703 nested: bool = False, 2704 table: bool = False, 2705 parse_subquery_alias: bool = True, 2706 parse_set_operation: bool = True, 2707 ) -> t.Optional[exp.Expression]: 2708 cte = self._parse_with() 2709 2710 if cte: 2711 this = self._parse_statement() 2712 2713 if not this: 2714 self.raise_error("Failed to parse any statement following CTE") 2715 return cte 2716 2717 if "with" in this.arg_types: 2718 this.set("with", cte) 2719 else: 2720 self.raise_error(f"{this.key} does not support CTE") 2721 this = cte 2722 2723 return this 2724 2725 # duckdb supports leading with FROM x 2726 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2727 2728 if self._match(TokenType.SELECT): 2729 comments = self._prev_comments 2730 2731 hint = self._parse_hint() 2732 all_ = self._match(TokenType.ALL) 2733 distinct = self._match_set(self.DISTINCT_TOKENS) 2734 2735 kind = ( 2736 self._match(TokenType.ALIAS) 2737 and self._match_texts(("STRUCT", "VALUE")) 2738 and self._prev.text.upper() 2739 ) 2740 2741 if distinct: 2742 distinct = self.expression( 2743 exp.Distinct, 2744 on=self._parse_value() if self._match(TokenType.ON) else None, 2745 ) 2746 2747 if all_ and distinct: 2748 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2749 2750 limit = self._parse_limit(top=True) 2751 projections = self._parse_projections() 2752 2753 this = self.expression( 2754 exp.Select, 2755 kind=kind, 2756 hint=hint, 2757 distinct=distinct, 2758 expressions=projections, 2759 limit=limit, 2760 ) 2761 this.comments = comments 2762 2763 into = self._parse_into() 2764 if into: 2765 this.set("into", into) 2766 2767 if not from_: 2768 from_ = self._parse_from() 2769 2770 if from_: 2771 this.set("from", from_) 2772 2773 this = self._parse_query_modifiers(this) 2774 elif (table or nested) and self._match(TokenType.L_PAREN): 2775 if self._match(TokenType.PIVOT): 2776 this = self._parse_simplified_pivot() 2777 elif self._match(TokenType.FROM): 2778 this = exp.select("*").from_( 2779 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2780 ) 2781 else: 2782 this = ( 2783 self._parse_table() 2784 if table 2785 else self._parse_select(nested=True, parse_set_operation=False) 2786 ) 2787 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2788 2789 self._match_r_paren() 2790 2791 # We return early here so that the UNION isn't attached to the subquery by the 2792 # following call to _parse_set_operations, but instead becomes the parent node 2793 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2794 elif self._match(TokenType.VALUES, advance=False): 2795 this = self._parse_derived_table_values() 2796 elif from_: 2797 this = exp.select("*").from_(from_.this, copy=False) 2798 else: 2799 this = None 2800 2801 if parse_set_operation: 2802 return self._parse_set_operations(this) 2803 return this 2804 2805 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2806 if not skip_with_token and not self._match(TokenType.WITH): 2807 return None 2808 2809 comments = self._prev_comments 2810 recursive = self._match(TokenType.RECURSIVE) 2811 2812 expressions = [] 2813 while True: 2814 expressions.append(self._parse_cte()) 2815 2816 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2817 break 2818 else: 2819 self._match(TokenType.WITH) 2820 2821 return self.expression( 2822 exp.With, comments=comments, expressions=expressions, recursive=recursive 2823 ) 2824 2825 def _parse_cte(self) -> exp.CTE: 2826 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2827 if not alias or not alias.this: 2828 self.raise_error("Expected CTE to have alias") 2829 2830 self._match(TokenType.ALIAS) 2831 2832 if self._match_text_seq("NOT", "MATERIALIZED"): 2833 materialized = False 2834 elif self._match_text_seq("MATERIALIZED"): 2835 materialized = True 2836 else: 2837 materialized = None 2838 2839 return self.expression( 2840 exp.CTE, 2841 this=self._parse_wrapped(self._parse_statement), 2842 alias=alias, 2843 materialized=materialized, 2844 ) 2845 2846 def _parse_table_alias( 2847 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2848 ) -> t.Optional[exp.TableAlias]: 2849 any_token = self._match(TokenType.ALIAS) 2850 alias = ( 2851 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2852 or self._parse_string_as_identifier() 2853 ) 2854 2855 index = self._index 2856 if self._match(TokenType.L_PAREN): 2857 columns = self._parse_csv(self._parse_function_parameter) 2858 self._match_r_paren() if columns else self._retreat(index) 2859 else: 2860 columns = None 2861 2862 if not alias and not columns: 2863 return None 2864 2865 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2866 2867 # We bubble up comments from the Identifier to the TableAlias 2868 if isinstance(alias, exp.Identifier): 2869 table_alias.add_comments(alias.pop_comments()) 2870 2871 return table_alias 2872 2873 def _parse_subquery( 2874 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2875 ) -> t.Optional[exp.Subquery]: 2876 if not this: 2877 return None 2878 2879 return self.expression( 2880 exp.Subquery, 2881 this=this, 2882 pivots=self._parse_pivots(), 2883 alias=self._parse_table_alias() if parse_alias else None, 2884 ) 2885 2886 def _implicit_unnests_to_explicit(self, this: E) -> E: 2887 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2888 2889 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2890 for i, join in enumerate(this.args.get("joins") or []): 2891 table = join.this 2892 normalized_table = table.copy() 2893 normalized_table.meta["maybe_column"] = True 2894 normalized_table = _norm(normalized_table, dialect=self.dialect) 2895 2896 if isinstance(table, exp.Table) and not join.args.get("on"): 2897 if normalized_table.parts[0].name in refs: 2898 table_as_column = table.to_column() 2899 unnest = exp.Unnest(expressions=[table_as_column]) 2900 2901 # Table.to_column creates a parent Alias node that we want to convert to 2902 # a TableAlias and attach to the Unnest, so it matches the parser's output 2903 if isinstance(table.args.get("alias"), exp.TableAlias): 2904 table_as_column.replace(table_as_column.this) 2905 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2906 2907 table.replace(unnest) 2908 2909 refs.add(normalized_table.alias_or_name) 2910 2911 return this 2912 2913 def _parse_query_modifiers( 2914 self, this: t.Optional[exp.Expression] 2915 ) -> t.Optional[exp.Expression]: 2916 if isinstance(this, (exp.Query, exp.Table)): 2917 for join in self._parse_joins(): 2918 this.append("joins", join) 2919 for lateral in iter(self._parse_lateral, None): 2920 this.append("laterals", lateral) 2921 2922 while True: 2923 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2924 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2925 key, expression = parser(self) 2926 2927 if expression: 2928 this.set(key, expression) 2929 if key == "limit": 2930 offset = expression.args.pop("offset", None) 2931 2932 if offset: 2933 offset = exp.Offset(expression=offset) 2934 this.set("offset", offset) 2935 2936 limit_by_expressions = expression.expressions 2937 expression.set("expressions", None) 2938 offset.set("expressions", limit_by_expressions) 2939 continue 2940 break 2941 2942 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2943 this = self._implicit_unnests_to_explicit(this) 2944 2945 return this 2946 2947 def _parse_hint(self) -> t.Optional[exp.Hint]: 2948 if self._match(TokenType.HINT): 2949 hints = [] 2950 for hint in iter( 2951 lambda: self._parse_csv( 2952 lambda: self._parse_function() or self._parse_var(upper=True) 2953 ), 2954 [], 2955 ): 2956 hints.extend(hint) 2957 2958 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2959 self.raise_error("Expected */ after HINT") 2960 2961 return self.expression(exp.Hint, expressions=hints) 2962 2963 return None 2964 2965 def _parse_into(self) -> t.Optional[exp.Into]: 2966 if not self._match(TokenType.INTO): 2967 return None 2968 2969 temp = self._match(TokenType.TEMPORARY) 2970 unlogged = self._match_text_seq("UNLOGGED") 2971 self._match(TokenType.TABLE) 2972 2973 return self.expression( 2974 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2975 ) 2976 2977 def _parse_from( 2978 self, joins: bool = False, skip_from_token: bool = False 2979 ) -> t.Optional[exp.From]: 2980 if not skip_from_token and not self._match(TokenType.FROM): 2981 return None 2982 2983 return self.expression( 2984 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2985 ) 2986 2987 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2988 return self.expression( 2989 exp.MatchRecognizeMeasure, 2990 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2991 this=self._parse_expression(), 2992 ) 2993 2994 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2995 if not self._match(TokenType.MATCH_RECOGNIZE): 2996 return None 2997 2998 self._match_l_paren() 2999 3000 partition = self._parse_partition_by() 3001 order = self._parse_order() 3002 3003 measures = ( 3004 self._parse_csv(self._parse_match_recognize_measure) 3005 if self._match_text_seq("MEASURES") 3006 else None 3007 ) 3008 3009 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3010 rows = exp.var("ONE ROW PER MATCH") 3011 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3012 text = "ALL ROWS PER MATCH" 3013 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3014 text += " SHOW EMPTY MATCHES" 3015 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3016 text += " OMIT EMPTY MATCHES" 3017 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3018 text += " WITH UNMATCHED ROWS" 3019 rows = exp.var(text) 3020 else: 3021 rows = None 3022 3023 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3024 text = "AFTER MATCH SKIP" 3025 if self._match_text_seq("PAST", "LAST", "ROW"): 3026 text += " PAST LAST ROW" 3027 elif self._match_text_seq("TO", "NEXT", "ROW"): 3028 text += " TO NEXT ROW" 3029 elif self._match_text_seq("TO", "FIRST"): 3030 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3031 elif self._match_text_seq("TO", "LAST"): 3032 text += f" TO LAST {self._advance_any().text}" # type: ignore 3033 after = exp.var(text) 3034 else: 3035 after = None 3036 3037 if self._match_text_seq("PATTERN"): 3038 self._match_l_paren() 3039 3040 if not self._curr: 3041 self.raise_error("Expecting )", self._curr) 3042 3043 paren = 1 3044 start = self._curr 3045 3046 while self._curr and paren > 0: 3047 if self._curr.token_type == TokenType.L_PAREN: 3048 paren += 1 3049 if self._curr.token_type == TokenType.R_PAREN: 3050 paren -= 1 3051 3052 end = self._prev 3053 self._advance() 3054 3055 if paren > 0: 3056 self.raise_error("Expecting )", self._curr) 3057 3058 pattern = exp.var(self._find_sql(start, end)) 3059 else: 3060 pattern = None 3061 3062 define = ( 3063 self._parse_csv(self._parse_name_as_expression) 3064 if self._match_text_seq("DEFINE") 3065 else None 3066 ) 3067 3068 self._match_r_paren() 3069 3070 return self.expression( 3071 exp.MatchRecognize, 3072 partition_by=partition, 3073 order=order, 3074 measures=measures, 3075 rows=rows, 3076 after=after, 3077 pattern=pattern, 3078 define=define, 3079 alias=self._parse_table_alias(), 3080 ) 3081 3082 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3083 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3084 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3085 cross_apply = False 3086 3087 if cross_apply is not None: 3088 this = self._parse_select(table=True) 3089 view = None 3090 outer = None 3091 elif self._match(TokenType.LATERAL): 3092 this = self._parse_select(table=True) 3093 view = self._match(TokenType.VIEW) 3094 outer = self._match(TokenType.OUTER) 3095 else: 3096 return None 3097 3098 if not this: 3099 this = ( 3100 self._parse_unnest() 3101 or self._parse_function() 3102 or self._parse_id_var(any_token=False) 3103 ) 3104 3105 while self._match(TokenType.DOT): 3106 this = exp.Dot( 3107 this=this, 3108 expression=self._parse_function() or self._parse_id_var(any_token=False), 3109 ) 3110 3111 if view: 3112 table = self._parse_id_var(any_token=False) 3113 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3114 table_alias: t.Optional[exp.TableAlias] = self.expression( 3115 exp.TableAlias, this=table, columns=columns 3116 ) 3117 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3118 # We move the alias from the lateral's child node to the lateral itself 3119 table_alias = this.args["alias"].pop() 3120 else: 3121 table_alias = self._parse_table_alias() 3122 3123 return self.expression( 3124 exp.Lateral, 3125 this=this, 3126 view=view, 3127 outer=outer, 3128 alias=table_alias, 3129 cross_apply=cross_apply, 3130 ) 3131 3132 def _parse_join_parts( 3133 self, 3134 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3135 return ( 3136 self._match_set(self.JOIN_METHODS) and self._prev, 3137 self._match_set(self.JOIN_SIDES) and self._prev, 3138 self._match_set(self.JOIN_KINDS) and self._prev, 3139 ) 3140 3141 def _parse_join( 3142 self, skip_join_token: bool = False, parse_bracket: bool = False 3143 ) -> t.Optional[exp.Join]: 3144 if self._match(TokenType.COMMA): 3145 return self.expression(exp.Join, this=self._parse_table()) 3146 3147 index = self._index 3148 method, side, kind = self._parse_join_parts() 3149 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3150 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3151 3152 if not skip_join_token and not join: 3153 self._retreat(index) 3154 kind = None 3155 method = None 3156 side = None 3157 3158 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3159 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3160 3161 if not skip_join_token and not join and not outer_apply and not cross_apply: 3162 return None 3163 3164 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3165 3166 if method: 3167 kwargs["method"] = method.text 3168 if side: 3169 kwargs["side"] = side.text 3170 if kind: 3171 kwargs["kind"] = kind.text 3172 if hint: 3173 kwargs["hint"] = hint 3174 3175 if self._match(TokenType.MATCH_CONDITION): 3176 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3177 3178 if self._match(TokenType.ON): 3179 kwargs["on"] = self._parse_assignment() 3180 elif self._match(TokenType.USING): 3181 kwargs["using"] = self._parse_wrapped_id_vars() 3182 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3183 kind and kind.token_type == TokenType.CROSS 3184 ): 3185 index = self._index 3186 joins: t.Optional[list] = list(self._parse_joins()) 3187 3188 if joins and self._match(TokenType.ON): 3189 kwargs["on"] = self._parse_assignment() 3190 elif joins and self._match(TokenType.USING): 3191 kwargs["using"] = self._parse_wrapped_id_vars() 3192 else: 3193 joins = None 3194 self._retreat(index) 3195 3196 kwargs["this"].set("joins", joins if joins else None) 3197 3198 comments = [c for token in (method, side, kind) if token for c in token.comments] 3199 return self.expression(exp.Join, comments=comments, **kwargs) 3200 3201 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3202 this = self._parse_assignment() 3203 3204 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3205 return this 3206 3207 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3208 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3209 3210 return this 3211 3212 def _parse_index_params(self) -> exp.IndexParameters: 3213 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3214 3215 if self._match(TokenType.L_PAREN, advance=False): 3216 columns = self._parse_wrapped_csv(self._parse_with_operator) 3217 else: 3218 columns = None 3219 3220 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3221 partition_by = self._parse_partition_by() 3222 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3223 tablespace = ( 3224 self._parse_var(any_token=True) 3225 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3226 else None 3227 ) 3228 where = self._parse_where() 3229 3230 on = self._parse_field() if self._match(TokenType.ON) else None 3231 3232 return self.expression( 3233 exp.IndexParameters, 3234 using=using, 3235 columns=columns, 3236 include=include, 3237 partition_by=partition_by, 3238 where=where, 3239 with_storage=with_storage, 3240 tablespace=tablespace, 3241 on=on, 3242 ) 3243 3244 def _parse_index( 3245 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3246 ) -> t.Optional[exp.Index]: 3247 if index or anonymous: 3248 unique = None 3249 primary = None 3250 amp = None 3251 3252 self._match(TokenType.ON) 3253 self._match(TokenType.TABLE) # hive 3254 table = self._parse_table_parts(schema=True) 3255 else: 3256 unique = self._match(TokenType.UNIQUE) 3257 primary = self._match_text_seq("PRIMARY") 3258 amp = self._match_text_seq("AMP") 3259 3260 if not self._match(TokenType.INDEX): 3261 return None 3262 3263 index = self._parse_id_var() 3264 table = None 3265 3266 params = self._parse_index_params() 3267 3268 return self.expression( 3269 exp.Index, 3270 this=index, 3271 table=table, 3272 unique=unique, 3273 primary=primary, 3274 amp=amp, 3275 params=params, 3276 ) 3277 3278 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3279 hints: t.List[exp.Expression] = [] 3280 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3281 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3282 hints.append( 3283 self.expression( 3284 exp.WithTableHint, 3285 expressions=self._parse_csv( 3286 lambda: self._parse_function() or self._parse_var(any_token=True) 3287 ), 3288 ) 3289 ) 3290 self._match_r_paren() 3291 else: 3292 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3293 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3294 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3295 3296 self._match_set((TokenType.INDEX, TokenType.KEY)) 3297 if self._match(TokenType.FOR): 3298 hint.set("target", self._advance_any() and self._prev.text.upper()) 3299 3300 hint.set("expressions", self._parse_wrapped_id_vars()) 3301 hints.append(hint) 3302 3303 return hints or None 3304 3305 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3306 return ( 3307 (not schema and self._parse_function(optional_parens=False)) 3308 or self._parse_id_var(any_token=False) 3309 or self._parse_string_as_identifier() 3310 or self._parse_placeholder() 3311 ) 3312 3313 def _parse_table_parts( 3314 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3315 ) -> exp.Table: 3316 catalog = None 3317 db = None 3318 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3319 3320 while self._match(TokenType.DOT): 3321 if catalog: 3322 # This allows nesting the table in arbitrarily many dot expressions if needed 3323 table = self.expression( 3324 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3325 ) 3326 else: 3327 catalog = db 3328 db = table 3329 # "" used for tsql FROM a..b case 3330 table = self._parse_table_part(schema=schema) or "" 3331 3332 if ( 3333 wildcard 3334 and self._is_connected() 3335 and (isinstance(table, exp.Identifier) or not table) 3336 and self._match(TokenType.STAR) 3337 ): 3338 if isinstance(table, exp.Identifier): 3339 table.args["this"] += "*" 3340 else: 3341 table = exp.Identifier(this="*") 3342 3343 # We bubble up comments from the Identifier to the Table 3344 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3345 3346 if is_db_reference: 3347 catalog = db 3348 db = table 3349 table = None 3350 3351 if not table and not is_db_reference: 3352 self.raise_error(f"Expected table name but got {self._curr}") 3353 if not db and is_db_reference: 3354 self.raise_error(f"Expected database name but got {self._curr}") 3355 3356 return self.expression( 3357 exp.Table, 3358 comments=comments, 3359 this=table, 3360 db=db, 3361 catalog=catalog, 3362 pivots=self._parse_pivots(), 3363 ) 3364 3365 def _parse_table( 3366 self, 3367 schema: bool = False, 3368 joins: bool = False, 3369 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3370 parse_bracket: bool = False, 3371 is_db_reference: bool = False, 3372 parse_partition: bool = False, 3373 ) -> t.Optional[exp.Expression]: 3374 lateral = self._parse_lateral() 3375 if lateral: 3376 return lateral 3377 3378 unnest = self._parse_unnest() 3379 if unnest: 3380 return unnest 3381 3382 values = self._parse_derived_table_values() 3383 if values: 3384 return values 3385 3386 subquery = self._parse_select(table=True) 3387 if subquery: 3388 if not subquery.args.get("pivots"): 3389 subquery.set("pivots", self._parse_pivots()) 3390 return subquery 3391 3392 bracket = parse_bracket and self._parse_bracket(None) 3393 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3394 3395 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3396 self._parse_table 3397 ) 3398 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3399 3400 only = self._match(TokenType.ONLY) 3401 3402 this = t.cast( 3403 exp.Expression, 3404 bracket 3405 or rows_from 3406 or self._parse_bracket( 3407 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3408 ), 3409 ) 3410 3411 if only: 3412 this.set("only", only) 3413 3414 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3415 self._match_text_seq("*") 3416 3417 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3418 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3419 this.set("partition", self._parse_partition()) 3420 3421 if schema: 3422 return self._parse_schema(this=this) 3423 3424 version = self._parse_version() 3425 3426 if version: 3427 this.set("version", version) 3428 3429 if self.dialect.ALIAS_POST_TABLESAMPLE: 3430 table_sample = self._parse_table_sample() 3431 3432 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3433 if alias: 3434 this.set("alias", alias) 3435 3436 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3437 return self.expression( 3438 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3439 ) 3440 3441 this.set("hints", self._parse_table_hints()) 3442 3443 if not this.args.get("pivots"): 3444 this.set("pivots", self._parse_pivots()) 3445 3446 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3447 table_sample = self._parse_table_sample() 3448 3449 if table_sample: 3450 table_sample.set("this", this) 3451 this = table_sample 3452 3453 if joins: 3454 for join in self._parse_joins(): 3455 this.append("joins", join) 3456 3457 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3458 this.set("ordinality", True) 3459 this.set("alias", self._parse_table_alias()) 3460 3461 return this 3462 3463 def _parse_version(self) -> t.Optional[exp.Version]: 3464 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3465 this = "TIMESTAMP" 3466 elif self._match(TokenType.VERSION_SNAPSHOT): 3467 this = "VERSION" 3468 else: 3469 return None 3470 3471 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3472 kind = self._prev.text.upper() 3473 start = self._parse_bitwise() 3474 self._match_texts(("TO", "AND")) 3475 end = self._parse_bitwise() 3476 expression: t.Optional[exp.Expression] = self.expression( 3477 exp.Tuple, expressions=[start, end] 3478 ) 3479 elif self._match_text_seq("CONTAINED", "IN"): 3480 kind = "CONTAINED IN" 3481 expression = self.expression( 3482 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3483 ) 3484 elif self._match(TokenType.ALL): 3485 kind = "ALL" 3486 expression = None 3487 else: 3488 self._match_text_seq("AS", "OF") 3489 kind = "AS OF" 3490 expression = self._parse_type() 3491 3492 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3493 3494 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3495 if not self._match(TokenType.UNNEST): 3496 return None 3497 3498 expressions = self._parse_wrapped_csv(self._parse_equality) 3499 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3500 3501 alias = self._parse_table_alias() if with_alias else None 3502 3503 if alias: 3504 if self.dialect.UNNEST_COLUMN_ONLY: 3505 if alias.args.get("columns"): 3506 self.raise_error("Unexpected extra column alias in unnest.") 3507 3508 alias.set("columns", [alias.this]) 3509 alias.set("this", None) 3510 3511 columns = alias.args.get("columns") or [] 3512 if offset and len(expressions) < len(columns): 3513 offset = columns.pop() 3514 3515 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3516 self._match(TokenType.ALIAS) 3517 offset = self._parse_id_var( 3518 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3519 ) or exp.to_identifier("offset") 3520 3521 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3522 3523 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3524 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3525 if not is_derived and not self._match_text_seq("VALUES"): 3526 return None 3527 3528 expressions = self._parse_csv(self._parse_value) 3529 alias = self._parse_table_alias() 3530 3531 if is_derived: 3532 self._match_r_paren() 3533 3534 return self.expression( 3535 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3536 ) 3537 3538 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3539 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3540 as_modifier and self._match_text_seq("USING", "SAMPLE") 3541 ): 3542 return None 3543 3544 bucket_numerator = None 3545 bucket_denominator = None 3546 bucket_field = None 3547 percent = None 3548 size = None 3549 seed = None 3550 3551 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3552 matched_l_paren = self._match(TokenType.L_PAREN) 3553 3554 if self.TABLESAMPLE_CSV: 3555 num = None 3556 expressions = self._parse_csv(self._parse_primary) 3557 else: 3558 expressions = None 3559 num = ( 3560 self._parse_factor() 3561 if self._match(TokenType.NUMBER, advance=False) 3562 else self._parse_primary() or self._parse_placeholder() 3563 ) 3564 3565 if self._match_text_seq("BUCKET"): 3566 bucket_numerator = self._parse_number() 3567 self._match_text_seq("OUT", "OF") 3568 bucket_denominator = bucket_denominator = self._parse_number() 3569 self._match(TokenType.ON) 3570 bucket_field = self._parse_field() 3571 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3572 percent = num 3573 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3574 size = num 3575 else: 3576 percent = num 3577 3578 if matched_l_paren: 3579 self._match_r_paren() 3580 3581 if self._match(TokenType.L_PAREN): 3582 method = self._parse_var(upper=True) 3583 seed = self._match(TokenType.COMMA) and self._parse_number() 3584 self._match_r_paren() 3585 elif self._match_texts(("SEED", "REPEATABLE")): 3586 seed = self._parse_wrapped(self._parse_number) 3587 3588 if not method and self.DEFAULT_SAMPLING_METHOD: 3589 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3590 3591 return self.expression( 3592 exp.TableSample, 3593 expressions=expressions, 3594 method=method, 3595 bucket_numerator=bucket_numerator, 3596 bucket_denominator=bucket_denominator, 3597 bucket_field=bucket_field, 3598 percent=percent, 3599 size=size, 3600 seed=seed, 3601 ) 3602 3603 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3604 return list(iter(self._parse_pivot, None)) or None 3605 3606 def _parse_joins(self) -> t.Iterator[exp.Join]: 3607 return iter(self._parse_join, None) 3608 3609 # https://duckdb.org/docs/sql/statements/pivot 3610 def _parse_simplified_pivot(self) -> exp.Pivot: 3611 def _parse_on() -> t.Optional[exp.Expression]: 3612 this = self._parse_bitwise() 3613 return self._parse_in(this) if self._match(TokenType.IN) else this 3614 3615 this = self._parse_table() 3616 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3617 using = self._match(TokenType.USING) and self._parse_csv( 3618 lambda: self._parse_alias(self._parse_function()) 3619 ) 3620 group = self._parse_group() 3621 return self.expression( 3622 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3623 ) 3624 3625 def _parse_pivot_in(self) -> exp.In: 3626 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3627 this = self._parse_assignment() 3628 3629 self._match(TokenType.ALIAS) 3630 alias = self._parse_field() 3631 if alias: 3632 return self.expression(exp.PivotAlias, this=this, alias=alias) 3633 3634 return this 3635 3636 value = self._parse_column() 3637 3638 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3639 self.raise_error("Expecting IN (") 3640 3641 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3642 3643 self._match_r_paren() 3644 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3645 3646 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3647 index = self._index 3648 include_nulls = None 3649 3650 if self._match(TokenType.PIVOT): 3651 unpivot = False 3652 elif self._match(TokenType.UNPIVOT): 3653 unpivot = True 3654 3655 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3656 if self._match_text_seq("INCLUDE", "NULLS"): 3657 include_nulls = True 3658 elif self._match_text_seq("EXCLUDE", "NULLS"): 3659 include_nulls = False 3660 else: 3661 return None 3662 3663 expressions = [] 3664 3665 if not self._match(TokenType.L_PAREN): 3666 self._retreat(index) 3667 return None 3668 3669 if unpivot: 3670 expressions = self._parse_csv(self._parse_column) 3671 else: 3672 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3673 3674 if not expressions: 3675 self.raise_error("Failed to parse PIVOT's aggregation list") 3676 3677 if not self._match(TokenType.FOR): 3678 self.raise_error("Expecting FOR") 3679 3680 field = self._parse_pivot_in() 3681 3682 self._match_r_paren() 3683 3684 pivot = self.expression( 3685 exp.Pivot, 3686 expressions=expressions, 3687 field=field, 3688 unpivot=unpivot, 3689 include_nulls=include_nulls, 3690 ) 3691 3692 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3693 pivot.set("alias", self._parse_table_alias()) 3694 3695 if not unpivot: 3696 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3697 3698 columns: t.List[exp.Expression] = [] 3699 for fld in pivot.args["field"].expressions: 3700 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3701 for name in names: 3702 if self.PREFIXED_PIVOT_COLUMNS: 3703 name = f"{name}_{field_name}" if name else field_name 3704 else: 3705 name = f"{field_name}_{name}" if name else field_name 3706 3707 columns.append(exp.to_identifier(name)) 3708 3709 pivot.set("columns", columns) 3710 3711 return pivot 3712 3713 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3714 return [agg.alias for agg in aggregations] 3715 3716 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3717 if not skip_where_token and not self._match(TokenType.PREWHERE): 3718 return None 3719 3720 return self.expression( 3721 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3722 ) 3723 3724 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3725 if not skip_where_token and not self._match(TokenType.WHERE): 3726 return None 3727 3728 return self.expression( 3729 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3730 ) 3731 3732 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3733 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3734 return None 3735 3736 elements: t.Dict[str, t.Any] = defaultdict(list) 3737 3738 if self._match(TokenType.ALL): 3739 elements["all"] = True 3740 elif self._match(TokenType.DISTINCT): 3741 elements["all"] = False 3742 3743 while True: 3744 expressions = self._parse_csv( 3745 lambda: None 3746 if self._match(TokenType.ROLLUP, advance=False) 3747 else self._parse_assignment() 3748 ) 3749 if expressions: 3750 elements["expressions"].extend(expressions) 3751 3752 grouping_sets = self._parse_grouping_sets() 3753 if grouping_sets: 3754 elements["grouping_sets"].extend(grouping_sets) 3755 3756 rollup = None 3757 cube = None 3758 totals = None 3759 3760 index = self._index 3761 with_ = self._match(TokenType.WITH) 3762 if self._match(TokenType.ROLLUP): 3763 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3764 elements["rollup"].extend(ensure_list(rollup)) 3765 3766 if self._match(TokenType.CUBE): 3767 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3768 elements["cube"].extend(ensure_list(cube)) 3769 3770 if self._match_text_seq("TOTALS"): 3771 totals = True 3772 elements["totals"] = True # type: ignore 3773 3774 if not (grouping_sets or rollup or cube or totals): 3775 if with_: 3776 self._retreat(index) 3777 break 3778 3779 return self.expression(exp.Group, **elements) # type: ignore 3780 3781 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3782 if not self._match(TokenType.GROUPING_SETS): 3783 return None 3784 3785 return self._parse_wrapped_csv(self._parse_grouping_set) 3786 3787 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3788 if self._match(TokenType.L_PAREN): 3789 grouping_set = self._parse_csv(self._parse_column) 3790 self._match_r_paren() 3791 return self.expression(exp.Tuple, expressions=grouping_set) 3792 3793 return self._parse_column() 3794 3795 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3796 if not skip_having_token and not self._match(TokenType.HAVING): 3797 return None 3798 return self.expression(exp.Having, this=self._parse_assignment()) 3799 3800 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3801 if not self._match(TokenType.QUALIFY): 3802 return None 3803 return self.expression(exp.Qualify, this=self._parse_assignment()) 3804 3805 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3806 if skip_start_token: 3807 start = None 3808 elif self._match(TokenType.START_WITH): 3809 start = self._parse_assignment() 3810 else: 3811 return None 3812 3813 self._match(TokenType.CONNECT_BY) 3814 nocycle = self._match_text_seq("NOCYCLE") 3815 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3816 exp.Prior, this=self._parse_bitwise() 3817 ) 3818 connect = self._parse_assignment() 3819 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3820 3821 if not start and self._match(TokenType.START_WITH): 3822 start = self._parse_assignment() 3823 3824 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3825 3826 def _parse_name_as_expression(self) -> exp.Alias: 3827 return self.expression( 3828 exp.Alias, 3829 alias=self._parse_id_var(any_token=True), 3830 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3831 ) 3832 3833 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3834 if self._match_text_seq("INTERPOLATE"): 3835 return self._parse_wrapped_csv(self._parse_name_as_expression) 3836 return None 3837 3838 def _parse_order( 3839 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3840 ) -> t.Optional[exp.Expression]: 3841 siblings = None 3842 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3843 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3844 return this 3845 3846 siblings = True 3847 3848 return self.expression( 3849 exp.Order, 3850 this=this, 3851 expressions=self._parse_csv(self._parse_ordered), 3852 interpolate=self._parse_interpolate(), 3853 siblings=siblings, 3854 ) 3855 3856 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3857 if not self._match(token): 3858 return None 3859 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3860 3861 def _parse_ordered( 3862 self, parse_method: t.Optional[t.Callable] = None 3863 ) -> t.Optional[exp.Ordered]: 3864 this = parse_method() if parse_method else self._parse_assignment() 3865 if not this: 3866 return None 3867 3868 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 3869 this = exp.var("ALL") 3870 3871 asc = self._match(TokenType.ASC) 3872 desc = self._match(TokenType.DESC) or (asc and False) 3873 3874 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3875 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3876 3877 nulls_first = is_nulls_first or False 3878 explicitly_null_ordered = is_nulls_first or is_nulls_last 3879 3880 if ( 3881 not explicitly_null_ordered 3882 and ( 3883 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3884 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3885 ) 3886 and self.dialect.NULL_ORDERING != "nulls_are_last" 3887 ): 3888 nulls_first = True 3889 3890 if self._match_text_seq("WITH", "FILL"): 3891 with_fill = self.expression( 3892 exp.WithFill, 3893 **{ # type: ignore 3894 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3895 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3896 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3897 }, 3898 ) 3899 else: 3900 with_fill = None 3901 3902 return self.expression( 3903 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3904 ) 3905 3906 def _parse_limit( 3907 self, 3908 this: t.Optional[exp.Expression] = None, 3909 top: bool = False, 3910 skip_limit_token: bool = False, 3911 ) -> t.Optional[exp.Expression]: 3912 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3913 comments = self._prev_comments 3914 if top: 3915 limit_paren = self._match(TokenType.L_PAREN) 3916 expression = self._parse_term() if limit_paren else self._parse_number() 3917 3918 if limit_paren: 3919 self._match_r_paren() 3920 else: 3921 expression = self._parse_term() 3922 3923 if self._match(TokenType.COMMA): 3924 offset = expression 3925 expression = self._parse_term() 3926 else: 3927 offset = None 3928 3929 limit_exp = self.expression( 3930 exp.Limit, 3931 this=this, 3932 expression=expression, 3933 offset=offset, 3934 comments=comments, 3935 expressions=self._parse_limit_by(), 3936 ) 3937 3938 return limit_exp 3939 3940 if self._match(TokenType.FETCH): 3941 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3942 direction = self._prev.text.upper() if direction else "FIRST" 3943 3944 count = self._parse_field(tokens=self.FETCH_TOKENS) 3945 percent = self._match(TokenType.PERCENT) 3946 3947 self._match_set((TokenType.ROW, TokenType.ROWS)) 3948 3949 only = self._match_text_seq("ONLY") 3950 with_ties = self._match_text_seq("WITH", "TIES") 3951 3952 if only and with_ties: 3953 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3954 3955 return self.expression( 3956 exp.Fetch, 3957 direction=direction, 3958 count=count, 3959 percent=percent, 3960 with_ties=with_ties, 3961 ) 3962 3963 return this 3964 3965 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3966 if not self._match(TokenType.OFFSET): 3967 return this 3968 3969 count = self._parse_term() 3970 self._match_set((TokenType.ROW, TokenType.ROWS)) 3971 3972 return self.expression( 3973 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3974 ) 3975 3976 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3977 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3978 3979 def _parse_locks(self) -> t.List[exp.Lock]: 3980 locks = [] 3981 while True: 3982 if self._match_text_seq("FOR", "UPDATE"): 3983 update = True 3984 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3985 "LOCK", "IN", "SHARE", "MODE" 3986 ): 3987 update = False 3988 else: 3989 break 3990 3991 expressions = None 3992 if self._match_text_seq("OF"): 3993 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3994 3995 wait: t.Optional[bool | exp.Expression] = None 3996 if self._match_text_seq("NOWAIT"): 3997 wait = True 3998 elif self._match_text_seq("WAIT"): 3999 wait = self._parse_primary() 4000 elif self._match_text_seq("SKIP", "LOCKED"): 4001 wait = False 4002 4003 locks.append( 4004 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4005 ) 4006 4007 return locks 4008 4009 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4010 while this and self._match_set(self.SET_OPERATIONS): 4011 token_type = self._prev.token_type 4012 4013 if token_type == TokenType.UNION: 4014 operation: t.Type[exp.SetOperation] = exp.Union 4015 elif token_type == TokenType.EXCEPT: 4016 operation = exp.Except 4017 else: 4018 operation = exp.Intersect 4019 4020 comments = self._prev.comments 4021 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4022 by_name = self._match_text_seq("BY", "NAME") 4023 expression = self._parse_select(nested=True, parse_set_operation=False) 4024 4025 this = self.expression( 4026 operation, 4027 comments=comments, 4028 this=this, 4029 distinct=distinct, 4030 by_name=by_name, 4031 expression=expression, 4032 ) 4033 4034 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4035 expression = this.expression 4036 4037 if expression: 4038 for arg in self.SET_OP_MODIFIERS: 4039 expr = expression.args.get(arg) 4040 if expr: 4041 this.set(arg, expr.pop()) 4042 4043 return this 4044 4045 def _parse_expression(self) -> t.Optional[exp.Expression]: 4046 return self._parse_alias(self._parse_assignment()) 4047 4048 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4049 this = self._parse_disjunction() 4050 4051 while self._match_set(self.ASSIGNMENT): 4052 this = self.expression( 4053 self.ASSIGNMENT[self._prev.token_type], 4054 this=this, 4055 comments=self._prev_comments, 4056 expression=self._parse_assignment(), 4057 ) 4058 4059 return this 4060 4061 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4062 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4063 4064 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4065 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4066 4067 def _parse_equality(self) -> t.Optional[exp.Expression]: 4068 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4069 4070 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4071 return self._parse_tokens(self._parse_range, self.COMPARISON) 4072 4073 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4074 this = this or self._parse_bitwise() 4075 negate = self._match(TokenType.NOT) 4076 4077 if self._match_set(self.RANGE_PARSERS): 4078 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4079 if not expression: 4080 return this 4081 4082 this = expression 4083 elif self._match(TokenType.ISNULL): 4084 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4085 4086 # Postgres supports ISNULL and NOTNULL for conditions. 4087 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4088 if self._match(TokenType.NOTNULL): 4089 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4090 this = self.expression(exp.Not, this=this) 4091 4092 if negate: 4093 this = self.expression(exp.Not, this=this) 4094 4095 if self._match(TokenType.IS): 4096 this = self._parse_is(this) 4097 4098 return this 4099 4100 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4101 index = self._index - 1 4102 negate = self._match(TokenType.NOT) 4103 4104 if self._match_text_seq("DISTINCT", "FROM"): 4105 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4106 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4107 4108 expression = self._parse_null() or self._parse_boolean() 4109 if not expression: 4110 self._retreat(index) 4111 return None 4112 4113 this = self.expression(exp.Is, this=this, expression=expression) 4114 return self.expression(exp.Not, this=this) if negate else this 4115 4116 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4117 unnest = self._parse_unnest(with_alias=False) 4118 if unnest: 4119 this = self.expression(exp.In, this=this, unnest=unnest) 4120 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4121 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4122 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4123 4124 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4125 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4126 else: 4127 this = self.expression(exp.In, this=this, expressions=expressions) 4128 4129 if matched_l_paren: 4130 self._match_r_paren(this) 4131 elif not self._match(TokenType.R_BRACKET, expression=this): 4132 self.raise_error("Expecting ]") 4133 else: 4134 this = self.expression(exp.In, this=this, field=self._parse_field()) 4135 4136 return this 4137 4138 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4139 low = self._parse_bitwise() 4140 self._match(TokenType.AND) 4141 high = self._parse_bitwise() 4142 return self.expression(exp.Between, this=this, low=low, high=high) 4143 4144 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4145 if not self._match(TokenType.ESCAPE): 4146 return this 4147 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4148 4149 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4150 index = self._index 4151 4152 if not self._match(TokenType.INTERVAL) and match_interval: 4153 return None 4154 4155 if self._match(TokenType.STRING, advance=False): 4156 this = self._parse_primary() 4157 else: 4158 this = self._parse_term() 4159 4160 if not this or ( 4161 isinstance(this, exp.Column) 4162 and not this.table 4163 and not this.this.quoted 4164 and this.name.upper() == "IS" 4165 ): 4166 self._retreat(index) 4167 return None 4168 4169 unit = self._parse_function() or ( 4170 not self._match(TokenType.ALIAS, advance=False) 4171 and self._parse_var(any_token=True, upper=True) 4172 ) 4173 4174 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4175 # each INTERVAL expression into this canonical form so it's easy to transpile 4176 if this and this.is_number: 4177 this = exp.Literal.string(this.to_py()) 4178 elif this and this.is_string: 4179 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4180 if len(parts) == 1: 4181 if unit: 4182 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4183 self._retreat(self._index - 1) 4184 4185 this = exp.Literal.string(parts[0][0]) 4186 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4187 4188 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4189 unit = self.expression( 4190 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4191 ) 4192 4193 interval = self.expression(exp.Interval, this=this, unit=unit) 4194 4195 index = self._index 4196 self._match(TokenType.PLUS) 4197 4198 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4199 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4200 return self.expression( 4201 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4202 ) 4203 4204 self._retreat(index) 4205 return interval 4206 4207 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4208 this = self._parse_term() 4209 4210 while True: 4211 if self._match_set(self.BITWISE): 4212 this = self.expression( 4213 self.BITWISE[self._prev.token_type], 4214 this=this, 4215 expression=self._parse_term(), 4216 ) 4217 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4218 this = self.expression( 4219 exp.DPipe, 4220 this=this, 4221 expression=self._parse_term(), 4222 safe=not self.dialect.STRICT_STRING_CONCAT, 4223 ) 4224 elif self._match(TokenType.DQMARK): 4225 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4226 elif self._match_pair(TokenType.LT, TokenType.LT): 4227 this = self.expression( 4228 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4229 ) 4230 elif self._match_pair(TokenType.GT, TokenType.GT): 4231 this = self.expression( 4232 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4233 ) 4234 else: 4235 break 4236 4237 return this 4238 4239 def _parse_term(self) -> t.Optional[exp.Expression]: 4240 return self._parse_tokens(self._parse_factor, self.TERM) 4241 4242 def _parse_factor(self) -> t.Optional[exp.Expression]: 4243 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4244 this = parse_method() 4245 4246 while self._match_set(self.FACTOR): 4247 klass = self.FACTOR[self._prev.token_type] 4248 comments = self._prev_comments 4249 expression = parse_method() 4250 4251 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4252 self._retreat(self._index - 1) 4253 return this 4254 4255 this = self.expression(klass, this=this, comments=comments, expression=expression) 4256 4257 if isinstance(this, exp.Div): 4258 this.args["typed"] = self.dialect.TYPED_DIVISION 4259 this.args["safe"] = self.dialect.SAFE_DIVISION 4260 4261 return this 4262 4263 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4264 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4265 4266 def _parse_unary(self) -> t.Optional[exp.Expression]: 4267 if self._match_set(self.UNARY_PARSERS): 4268 return self.UNARY_PARSERS[self._prev.token_type](self) 4269 return self._parse_at_time_zone(self._parse_type()) 4270 4271 def _parse_type( 4272 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4273 ) -> t.Optional[exp.Expression]: 4274 interval = parse_interval and self._parse_interval() 4275 if interval: 4276 return interval 4277 4278 index = self._index 4279 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4280 4281 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4282 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4283 if isinstance(data_type, exp.Cast): 4284 # This constructor can contain ops directly after it, for instance struct unnesting: 4285 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4286 return self._parse_column_ops(data_type) 4287 4288 if data_type: 4289 index2 = self._index 4290 this = self._parse_primary() 4291 4292 if isinstance(this, exp.Literal): 4293 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4294 if parser: 4295 return parser(self, this, data_type) 4296 4297 return self.expression(exp.Cast, this=this, to=data_type) 4298 4299 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4300 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4301 # 4302 # If the index difference here is greater than 1, that means the parser itself must have 4303 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4304 # 4305 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4306 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4307 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4308 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4309 # 4310 # In these cases, we don't really want to return the converted type, but instead retreat 4311 # and try to parse a Column or Identifier in the section below. 4312 if data_type.expressions and index2 - index > 1: 4313 self._retreat(index2) 4314 return self._parse_column_ops(data_type) 4315 4316 self._retreat(index) 4317 4318 if fallback_to_identifier: 4319 return self._parse_id_var() 4320 4321 this = self._parse_column() 4322 return this and self._parse_column_ops(this) 4323 4324 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4325 this = self._parse_type() 4326 if not this: 4327 return None 4328 4329 if isinstance(this, exp.Column) and not this.table: 4330 this = exp.var(this.name.upper()) 4331 4332 return self.expression( 4333 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4334 ) 4335 4336 def _parse_types( 4337 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4338 ) -> t.Optional[exp.Expression]: 4339 index = self._index 4340 4341 this: t.Optional[exp.Expression] = None 4342 prefix = self._match_text_seq("SYSUDTLIB", ".") 4343 4344 if not self._match_set(self.TYPE_TOKENS): 4345 identifier = allow_identifiers and self._parse_id_var( 4346 any_token=False, tokens=(TokenType.VAR,) 4347 ) 4348 if isinstance(identifier, exp.Identifier): 4349 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4350 4351 if len(tokens) != 1: 4352 self.raise_error("Unexpected identifier", self._prev) 4353 4354 if tokens[0].token_type in self.TYPE_TOKENS: 4355 self._prev = tokens[0] 4356 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4357 type_name = identifier.name 4358 4359 while self._match(TokenType.DOT): 4360 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4361 4362 this = exp.DataType.build(type_name, udt=True) 4363 else: 4364 self._retreat(self._index - 1) 4365 return None 4366 else: 4367 return None 4368 4369 type_token = self._prev.token_type 4370 4371 if type_token == TokenType.PSEUDO_TYPE: 4372 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4373 4374 if type_token == TokenType.OBJECT_IDENTIFIER: 4375 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4376 4377 # https://materialize.com/docs/sql/types/map/ 4378 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4379 key_type = self._parse_types( 4380 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4381 ) 4382 if not self._match(TokenType.FARROW): 4383 self._retreat(index) 4384 return None 4385 4386 value_type = self._parse_types( 4387 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4388 ) 4389 if not self._match(TokenType.R_BRACKET): 4390 self._retreat(index) 4391 return None 4392 4393 return exp.DataType( 4394 this=exp.DataType.Type.MAP, 4395 expressions=[key_type, value_type], 4396 nested=True, 4397 prefix=prefix, 4398 ) 4399 4400 nested = type_token in self.NESTED_TYPE_TOKENS 4401 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4402 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4403 expressions = None 4404 maybe_func = False 4405 4406 if self._match(TokenType.L_PAREN): 4407 if is_struct: 4408 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4409 elif nested: 4410 expressions = self._parse_csv( 4411 lambda: self._parse_types( 4412 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4413 ) 4414 ) 4415 elif type_token in self.ENUM_TYPE_TOKENS: 4416 expressions = self._parse_csv(self._parse_equality) 4417 elif is_aggregate: 4418 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4419 any_token=False, tokens=(TokenType.VAR,) 4420 ) 4421 if not func_or_ident or not self._match(TokenType.COMMA): 4422 return None 4423 expressions = self._parse_csv( 4424 lambda: self._parse_types( 4425 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4426 ) 4427 ) 4428 expressions.insert(0, func_or_ident) 4429 else: 4430 expressions = self._parse_csv(self._parse_type_size) 4431 4432 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4433 if type_token == TokenType.VECTOR and len(expressions) == 2: 4434 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4435 4436 if not expressions or not self._match(TokenType.R_PAREN): 4437 self._retreat(index) 4438 return None 4439 4440 maybe_func = True 4441 4442 values: t.Optional[t.List[exp.Expression]] = None 4443 4444 if nested and self._match(TokenType.LT): 4445 if is_struct: 4446 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4447 else: 4448 expressions = self._parse_csv( 4449 lambda: self._parse_types( 4450 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4451 ) 4452 ) 4453 4454 if not self._match(TokenType.GT): 4455 self.raise_error("Expecting >") 4456 4457 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4458 values = self._parse_csv(self._parse_assignment) 4459 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4460 4461 if type_token in self.TIMESTAMPS: 4462 if self._match_text_seq("WITH", "TIME", "ZONE"): 4463 maybe_func = False 4464 tz_type = ( 4465 exp.DataType.Type.TIMETZ 4466 if type_token in self.TIMES 4467 else exp.DataType.Type.TIMESTAMPTZ 4468 ) 4469 this = exp.DataType(this=tz_type, expressions=expressions) 4470 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4471 maybe_func = False 4472 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4473 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4474 maybe_func = False 4475 elif type_token == TokenType.INTERVAL: 4476 unit = self._parse_var(upper=True) 4477 if unit: 4478 if self._match_text_seq("TO"): 4479 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4480 4481 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4482 else: 4483 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4484 4485 if maybe_func and check_func: 4486 index2 = self._index 4487 peek = self._parse_string() 4488 4489 if not peek: 4490 self._retreat(index) 4491 return None 4492 4493 self._retreat(index2) 4494 4495 if not this: 4496 if self._match_text_seq("UNSIGNED"): 4497 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4498 if not unsigned_type_token: 4499 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4500 4501 type_token = unsigned_type_token or type_token 4502 4503 this = exp.DataType( 4504 this=exp.DataType.Type[type_token.value], 4505 expressions=expressions, 4506 nested=nested, 4507 prefix=prefix, 4508 ) 4509 4510 # Empty arrays/structs are allowed 4511 if values is not None: 4512 cls = exp.Struct if is_struct else exp.Array 4513 this = exp.cast(cls(expressions=values), this, copy=False) 4514 4515 elif expressions: 4516 this.set("expressions", expressions) 4517 4518 # https://materialize.com/docs/sql/types/list/#type-name 4519 while self._match(TokenType.LIST): 4520 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4521 4522 index = self._index 4523 4524 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4525 matched_array = self._match(TokenType.ARRAY) 4526 4527 while self._curr: 4528 matched_l_bracket = self._match(TokenType.L_BRACKET) 4529 if not matched_l_bracket and not matched_array: 4530 break 4531 4532 matched_array = False 4533 values = self._parse_csv(self._parse_assignment) or None 4534 if values and not schema: 4535 self._retreat(index) 4536 break 4537 4538 this = exp.DataType( 4539 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4540 ) 4541 self._match(TokenType.R_BRACKET) 4542 4543 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4544 converter = self.TYPE_CONVERTERS.get(this.this) 4545 if converter: 4546 this = converter(t.cast(exp.DataType, this)) 4547 4548 return this 4549 4550 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4551 index = self._index 4552 4553 if ( 4554 self._curr 4555 and self._next 4556 and self._curr.token_type in self.TYPE_TOKENS 4557 and self._next.token_type in self.TYPE_TOKENS 4558 ): 4559 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4560 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4561 this = self._parse_id_var() 4562 else: 4563 this = ( 4564 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4565 or self._parse_id_var() 4566 ) 4567 4568 self._match(TokenType.COLON) 4569 4570 if ( 4571 type_required 4572 and not isinstance(this, exp.DataType) 4573 and not self._match_set(self.TYPE_TOKENS, advance=False) 4574 ): 4575 self._retreat(index) 4576 return self._parse_types() 4577 4578 return self._parse_column_def(this) 4579 4580 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4581 if not self._match_text_seq("AT", "TIME", "ZONE"): 4582 return this 4583 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4584 4585 def _parse_column(self) -> t.Optional[exp.Expression]: 4586 this = self._parse_column_reference() 4587 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4588 4589 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4590 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4591 4592 return column 4593 4594 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4595 this = self._parse_field() 4596 if ( 4597 not this 4598 and self._match(TokenType.VALUES, advance=False) 4599 and self.VALUES_FOLLOWED_BY_PAREN 4600 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4601 ): 4602 this = self._parse_id_var() 4603 4604 if isinstance(this, exp.Identifier): 4605 # We bubble up comments from the Identifier to the Column 4606 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4607 4608 return this 4609 4610 def _parse_colon_as_variant_extract( 4611 self, this: t.Optional[exp.Expression] 4612 ) -> t.Optional[exp.Expression]: 4613 casts = [] 4614 json_path = [] 4615 4616 while self._match(TokenType.COLON): 4617 start_index = self._index 4618 4619 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4620 path = self._parse_column_ops( 4621 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4622 ) 4623 4624 # The cast :: operator has a lower precedence than the extraction operator :, so 4625 # we rearrange the AST appropriately to avoid casting the JSON path 4626 while isinstance(path, exp.Cast): 4627 casts.append(path.to) 4628 path = path.this 4629 4630 if casts: 4631 dcolon_offset = next( 4632 i 4633 for i, t in enumerate(self._tokens[start_index:]) 4634 if t.token_type == TokenType.DCOLON 4635 ) 4636 end_token = self._tokens[start_index + dcolon_offset - 1] 4637 else: 4638 end_token = self._prev 4639 4640 if path: 4641 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4642 4643 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4644 # Databricks transforms it back to the colon/dot notation 4645 if json_path: 4646 this = self.expression( 4647 exp.JSONExtract, 4648 this=this, 4649 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4650 variant_extract=True, 4651 ) 4652 4653 while casts: 4654 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4655 4656 return this 4657 4658 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4659 return self._parse_types() 4660 4661 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4662 this = self._parse_bracket(this) 4663 4664 while self._match_set(self.COLUMN_OPERATORS): 4665 op_token = self._prev.token_type 4666 op = self.COLUMN_OPERATORS.get(op_token) 4667 4668 if op_token == TokenType.DCOLON: 4669 field = self._parse_dcolon() 4670 if not field: 4671 self.raise_error("Expected type") 4672 elif op and self._curr: 4673 field = self._parse_column_reference() 4674 else: 4675 field = self._parse_field(any_token=True, anonymous_func=True) 4676 4677 if isinstance(field, exp.Func) and this: 4678 # bigquery allows function calls like x.y.count(...) 4679 # SAFE.SUBSTR(...) 4680 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4681 this = exp.replace_tree( 4682 this, 4683 lambda n: ( 4684 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4685 if n.table 4686 else n.this 4687 ) 4688 if isinstance(n, exp.Column) 4689 else n, 4690 ) 4691 4692 if op: 4693 this = op(self, this, field) 4694 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4695 this = self.expression( 4696 exp.Column, 4697 this=field, 4698 table=this.this, 4699 db=this.args.get("table"), 4700 catalog=this.args.get("db"), 4701 ) 4702 else: 4703 this = self.expression(exp.Dot, this=this, expression=field) 4704 4705 this = self._parse_bracket(this) 4706 4707 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4708 4709 def _parse_primary(self) -> t.Optional[exp.Expression]: 4710 if self._match_set(self.PRIMARY_PARSERS): 4711 token_type = self._prev.token_type 4712 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4713 4714 if token_type == TokenType.STRING: 4715 expressions = [primary] 4716 while self._match(TokenType.STRING): 4717 expressions.append(exp.Literal.string(self._prev.text)) 4718 4719 if len(expressions) > 1: 4720 return self.expression(exp.Concat, expressions=expressions) 4721 4722 return primary 4723 4724 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4725 return exp.Literal.number(f"0.{self._prev.text}") 4726 4727 if self._match(TokenType.L_PAREN): 4728 comments = self._prev_comments 4729 query = self._parse_select() 4730 4731 if query: 4732 expressions = [query] 4733 else: 4734 expressions = self._parse_expressions() 4735 4736 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4737 4738 if not this and self._match(TokenType.R_PAREN, advance=False): 4739 this = self.expression(exp.Tuple) 4740 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4741 this = self._parse_subquery(this=this, parse_alias=False) 4742 elif isinstance(this, exp.Subquery): 4743 this = self._parse_subquery( 4744 this=self._parse_set_operations(this), parse_alias=False 4745 ) 4746 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4747 this = self.expression(exp.Tuple, expressions=expressions) 4748 else: 4749 this = self.expression(exp.Paren, this=this) 4750 4751 if this: 4752 this.add_comments(comments) 4753 4754 self._match_r_paren(expression=this) 4755 return this 4756 4757 return None 4758 4759 def _parse_field( 4760 self, 4761 any_token: bool = False, 4762 tokens: t.Optional[t.Collection[TokenType]] = None, 4763 anonymous_func: bool = False, 4764 ) -> t.Optional[exp.Expression]: 4765 if anonymous_func: 4766 field = ( 4767 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4768 or self._parse_primary() 4769 ) 4770 else: 4771 field = self._parse_primary() or self._parse_function( 4772 anonymous=anonymous_func, any_token=any_token 4773 ) 4774 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4775 4776 def _parse_function( 4777 self, 4778 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4779 anonymous: bool = False, 4780 optional_parens: bool = True, 4781 any_token: bool = False, 4782 ) -> t.Optional[exp.Expression]: 4783 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4784 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4785 fn_syntax = False 4786 if ( 4787 self._match(TokenType.L_BRACE, advance=False) 4788 and self._next 4789 and self._next.text.upper() == "FN" 4790 ): 4791 self._advance(2) 4792 fn_syntax = True 4793 4794 func = self._parse_function_call( 4795 functions=functions, 4796 anonymous=anonymous, 4797 optional_parens=optional_parens, 4798 any_token=any_token, 4799 ) 4800 4801 if fn_syntax: 4802 self._match(TokenType.R_BRACE) 4803 4804 return func 4805 4806 def _parse_function_call( 4807 self, 4808 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4809 anonymous: bool = False, 4810 optional_parens: bool = True, 4811 any_token: bool = False, 4812 ) -> t.Optional[exp.Expression]: 4813 if not self._curr: 4814 return None 4815 4816 comments = self._curr.comments 4817 token_type = self._curr.token_type 4818 this = self._curr.text 4819 upper = this.upper() 4820 4821 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4822 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4823 self._advance() 4824 return self._parse_window(parser(self)) 4825 4826 if not self._next or self._next.token_type != TokenType.L_PAREN: 4827 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4828 self._advance() 4829 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4830 4831 return None 4832 4833 if any_token: 4834 if token_type in self.RESERVED_TOKENS: 4835 return None 4836 elif token_type not in self.FUNC_TOKENS: 4837 return None 4838 4839 self._advance(2) 4840 4841 parser = self.FUNCTION_PARSERS.get(upper) 4842 if parser and not anonymous: 4843 this = parser(self) 4844 else: 4845 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4846 4847 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4848 this = self.expression(subquery_predicate, this=self._parse_select()) 4849 self._match_r_paren() 4850 return this 4851 4852 if functions is None: 4853 functions = self.FUNCTIONS 4854 4855 function = functions.get(upper) 4856 4857 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4858 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4859 4860 if alias: 4861 args = self._kv_to_prop_eq(args) 4862 4863 if function and not anonymous: 4864 if "dialect" in function.__code__.co_varnames: 4865 func = function(args, dialect=self.dialect) 4866 else: 4867 func = function(args) 4868 4869 func = self.validate_expression(func, args) 4870 if not self.dialect.NORMALIZE_FUNCTIONS: 4871 func.meta["name"] = this 4872 4873 this = func 4874 else: 4875 if token_type == TokenType.IDENTIFIER: 4876 this = exp.Identifier(this=this, quoted=True) 4877 this = self.expression(exp.Anonymous, this=this, expressions=args) 4878 4879 if isinstance(this, exp.Expression): 4880 this.add_comments(comments) 4881 4882 self._match_r_paren(this) 4883 return self._parse_window(this) 4884 4885 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4886 transformed = [] 4887 4888 for e in expressions: 4889 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4890 if isinstance(e, exp.Alias): 4891 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4892 4893 if not isinstance(e, exp.PropertyEQ): 4894 e = self.expression( 4895 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4896 ) 4897 4898 if isinstance(e.this, exp.Column): 4899 e.this.replace(e.this.this) 4900 4901 transformed.append(e) 4902 4903 return transformed 4904 4905 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4906 return self._parse_column_def(self._parse_id_var()) 4907 4908 def _parse_user_defined_function( 4909 self, kind: t.Optional[TokenType] = None 4910 ) -> t.Optional[exp.Expression]: 4911 this = self._parse_id_var() 4912 4913 while self._match(TokenType.DOT): 4914 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4915 4916 if not self._match(TokenType.L_PAREN): 4917 return this 4918 4919 expressions = self._parse_csv(self._parse_function_parameter) 4920 self._match_r_paren() 4921 return self.expression( 4922 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4923 ) 4924 4925 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4926 literal = self._parse_primary() 4927 if literal: 4928 return self.expression(exp.Introducer, this=token.text, expression=literal) 4929 4930 return self.expression(exp.Identifier, this=token.text) 4931 4932 def _parse_session_parameter(self) -> exp.SessionParameter: 4933 kind = None 4934 this = self._parse_id_var() or self._parse_primary() 4935 4936 if this and self._match(TokenType.DOT): 4937 kind = this.name 4938 this = self._parse_var() or self._parse_primary() 4939 4940 return self.expression(exp.SessionParameter, this=this, kind=kind) 4941 4942 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4943 return self._parse_id_var() 4944 4945 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4946 index = self._index 4947 4948 if self._match(TokenType.L_PAREN): 4949 expressions = t.cast( 4950 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4951 ) 4952 4953 if not self._match(TokenType.R_PAREN): 4954 self._retreat(index) 4955 else: 4956 expressions = [self._parse_lambda_arg()] 4957 4958 if self._match_set(self.LAMBDAS): 4959 return self.LAMBDAS[self._prev.token_type](self, expressions) 4960 4961 self._retreat(index) 4962 4963 this: t.Optional[exp.Expression] 4964 4965 if self._match(TokenType.DISTINCT): 4966 this = self.expression( 4967 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4968 ) 4969 else: 4970 this = self._parse_select_or_expression(alias=alias) 4971 4972 return self._parse_limit( 4973 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4974 ) 4975 4976 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4977 index = self._index 4978 if not self._match(TokenType.L_PAREN): 4979 return this 4980 4981 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4982 # expr can be of both types 4983 if self._match_set(self.SELECT_START_TOKENS): 4984 self._retreat(index) 4985 return this 4986 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4987 self._match_r_paren() 4988 return self.expression(exp.Schema, this=this, expressions=args) 4989 4990 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4991 return self._parse_column_def(self._parse_field(any_token=True)) 4992 4993 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4994 # column defs are not really columns, they're identifiers 4995 if isinstance(this, exp.Column): 4996 this = this.this 4997 4998 kind = self._parse_types(schema=True) 4999 5000 if self._match_text_seq("FOR", "ORDINALITY"): 5001 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5002 5003 constraints: t.List[exp.Expression] = [] 5004 5005 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5006 ("ALIAS", "MATERIALIZED") 5007 ): 5008 persisted = self._prev.text.upper() == "MATERIALIZED" 5009 constraints.append( 5010 self.expression( 5011 exp.ComputedColumnConstraint, 5012 this=self._parse_assignment(), 5013 persisted=persisted or self._match_text_seq("PERSISTED"), 5014 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5015 ) 5016 ) 5017 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5018 self._match(TokenType.ALIAS) 5019 constraints.append( 5020 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5021 ) 5022 5023 while True: 5024 constraint = self._parse_column_constraint() 5025 if not constraint: 5026 break 5027 constraints.append(constraint) 5028 5029 if not kind and not constraints: 5030 return this 5031 5032 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5033 5034 def _parse_auto_increment( 5035 self, 5036 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5037 start = None 5038 increment = None 5039 5040 if self._match(TokenType.L_PAREN, advance=False): 5041 args = self._parse_wrapped_csv(self._parse_bitwise) 5042 start = seq_get(args, 0) 5043 increment = seq_get(args, 1) 5044 elif self._match_text_seq("START"): 5045 start = self._parse_bitwise() 5046 self._match_text_seq("INCREMENT") 5047 increment = self._parse_bitwise() 5048 5049 if start and increment: 5050 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5051 5052 return exp.AutoIncrementColumnConstraint() 5053 5054 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5055 if not self._match_text_seq("REFRESH"): 5056 self._retreat(self._index - 1) 5057 return None 5058 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5059 5060 def _parse_compress(self) -> exp.CompressColumnConstraint: 5061 if self._match(TokenType.L_PAREN, advance=False): 5062 return self.expression( 5063 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5064 ) 5065 5066 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5067 5068 def _parse_generated_as_identity( 5069 self, 5070 ) -> ( 5071 exp.GeneratedAsIdentityColumnConstraint 5072 | exp.ComputedColumnConstraint 5073 | exp.GeneratedAsRowColumnConstraint 5074 ): 5075 if self._match_text_seq("BY", "DEFAULT"): 5076 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5077 this = self.expression( 5078 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5079 ) 5080 else: 5081 self._match_text_seq("ALWAYS") 5082 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5083 5084 self._match(TokenType.ALIAS) 5085 5086 if self._match_text_seq("ROW"): 5087 start = self._match_text_seq("START") 5088 if not start: 5089 self._match(TokenType.END) 5090 hidden = self._match_text_seq("HIDDEN") 5091 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5092 5093 identity = self._match_text_seq("IDENTITY") 5094 5095 if self._match(TokenType.L_PAREN): 5096 if self._match(TokenType.START_WITH): 5097 this.set("start", self._parse_bitwise()) 5098 if self._match_text_seq("INCREMENT", "BY"): 5099 this.set("increment", self._parse_bitwise()) 5100 if self._match_text_seq("MINVALUE"): 5101 this.set("minvalue", self._parse_bitwise()) 5102 if self._match_text_seq("MAXVALUE"): 5103 this.set("maxvalue", self._parse_bitwise()) 5104 5105 if self._match_text_seq("CYCLE"): 5106 this.set("cycle", True) 5107 elif self._match_text_seq("NO", "CYCLE"): 5108 this.set("cycle", False) 5109 5110 if not identity: 5111 this.set("expression", self._parse_range()) 5112 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5113 args = self._parse_csv(self._parse_bitwise) 5114 this.set("start", seq_get(args, 0)) 5115 this.set("increment", seq_get(args, 1)) 5116 5117 self._match_r_paren() 5118 5119 return this 5120 5121 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5122 self._match_text_seq("LENGTH") 5123 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5124 5125 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5126 if self._match_text_seq("NULL"): 5127 return self.expression(exp.NotNullColumnConstraint) 5128 if self._match_text_seq("CASESPECIFIC"): 5129 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5130 if self._match_text_seq("FOR", "REPLICATION"): 5131 return self.expression(exp.NotForReplicationColumnConstraint) 5132 return None 5133 5134 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5135 if self._match(TokenType.CONSTRAINT): 5136 this = self._parse_id_var() 5137 else: 5138 this = None 5139 5140 if self._match_texts(self.CONSTRAINT_PARSERS): 5141 return self.expression( 5142 exp.ColumnConstraint, 5143 this=this, 5144 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5145 ) 5146 5147 return this 5148 5149 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5150 if not self._match(TokenType.CONSTRAINT): 5151 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5152 5153 return self.expression( 5154 exp.Constraint, 5155 this=self._parse_id_var(), 5156 expressions=self._parse_unnamed_constraints(), 5157 ) 5158 5159 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5160 constraints = [] 5161 while True: 5162 constraint = self._parse_unnamed_constraint() or self._parse_function() 5163 if not constraint: 5164 break 5165 constraints.append(constraint) 5166 5167 return constraints 5168 5169 def _parse_unnamed_constraint( 5170 self, constraints: t.Optional[t.Collection[str]] = None 5171 ) -> t.Optional[exp.Expression]: 5172 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5173 constraints or self.CONSTRAINT_PARSERS 5174 ): 5175 return None 5176 5177 constraint = self._prev.text.upper() 5178 if constraint not in self.CONSTRAINT_PARSERS: 5179 self.raise_error(f"No parser found for schema constraint {constraint}.") 5180 5181 return self.CONSTRAINT_PARSERS[constraint](self) 5182 5183 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5184 self._match_text_seq("KEY") 5185 return self.expression( 5186 exp.UniqueColumnConstraint, 5187 this=self._parse_schema(self._parse_id_var(any_token=False)), 5188 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5189 on_conflict=self._parse_on_conflict(), 5190 ) 5191 5192 def _parse_key_constraint_options(self) -> t.List[str]: 5193 options = [] 5194 while True: 5195 if not self._curr: 5196 break 5197 5198 if self._match(TokenType.ON): 5199 action = None 5200 on = self._advance_any() and self._prev.text 5201 5202 if self._match_text_seq("NO", "ACTION"): 5203 action = "NO ACTION" 5204 elif self._match_text_seq("CASCADE"): 5205 action = "CASCADE" 5206 elif self._match_text_seq("RESTRICT"): 5207 action = "RESTRICT" 5208 elif self._match_pair(TokenType.SET, TokenType.NULL): 5209 action = "SET NULL" 5210 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5211 action = "SET DEFAULT" 5212 else: 5213 self.raise_error("Invalid key constraint") 5214 5215 options.append(f"ON {on} {action}") 5216 elif self._match_text_seq("NOT", "ENFORCED"): 5217 options.append("NOT ENFORCED") 5218 elif self._match_text_seq("DEFERRABLE"): 5219 options.append("DEFERRABLE") 5220 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5221 options.append("INITIALLY DEFERRED") 5222 elif self._match_text_seq("NORELY"): 5223 options.append("NORELY") 5224 elif self._match_text_seq("MATCH", "FULL"): 5225 options.append("MATCH FULL") 5226 else: 5227 break 5228 5229 return options 5230 5231 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5232 if match and not self._match(TokenType.REFERENCES): 5233 return None 5234 5235 expressions = None 5236 this = self._parse_table(schema=True) 5237 options = self._parse_key_constraint_options() 5238 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5239 5240 def _parse_foreign_key(self) -> exp.ForeignKey: 5241 expressions = self._parse_wrapped_id_vars() 5242 reference = self._parse_references() 5243 options = {} 5244 5245 while self._match(TokenType.ON): 5246 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5247 self.raise_error("Expected DELETE or UPDATE") 5248 5249 kind = self._prev.text.lower() 5250 5251 if self._match_text_seq("NO", "ACTION"): 5252 action = "NO ACTION" 5253 elif self._match(TokenType.SET): 5254 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5255 action = "SET " + self._prev.text.upper() 5256 else: 5257 self._advance() 5258 action = self._prev.text.upper() 5259 5260 options[kind] = action 5261 5262 return self.expression( 5263 exp.ForeignKey, 5264 expressions=expressions, 5265 reference=reference, 5266 **options, # type: ignore 5267 ) 5268 5269 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5270 return self._parse_field() 5271 5272 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5273 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5274 self._retreat(self._index - 1) 5275 return None 5276 5277 id_vars = self._parse_wrapped_id_vars() 5278 return self.expression( 5279 exp.PeriodForSystemTimeConstraint, 5280 this=seq_get(id_vars, 0), 5281 expression=seq_get(id_vars, 1), 5282 ) 5283 5284 def _parse_primary_key( 5285 self, wrapped_optional: bool = False, in_props: bool = False 5286 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5287 desc = ( 5288 self._match_set((TokenType.ASC, TokenType.DESC)) 5289 and self._prev.token_type == TokenType.DESC 5290 ) 5291 5292 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5293 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5294 5295 expressions = self._parse_wrapped_csv( 5296 self._parse_primary_key_part, optional=wrapped_optional 5297 ) 5298 options = self._parse_key_constraint_options() 5299 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5300 5301 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5302 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5303 5304 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5305 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5306 return this 5307 5308 bracket_kind = self._prev.token_type 5309 expressions = self._parse_csv( 5310 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5311 ) 5312 5313 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5314 self.raise_error("Expected ]") 5315 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5316 self.raise_error("Expected }") 5317 5318 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5319 if bracket_kind == TokenType.L_BRACE: 5320 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5321 elif not this: 5322 this = self.expression(exp.Array, expressions=expressions) 5323 else: 5324 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5325 if constructor_type: 5326 return self.expression(constructor_type, expressions=expressions) 5327 5328 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5329 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5330 5331 self._add_comments(this) 5332 return self._parse_bracket(this) 5333 5334 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5335 if self._match(TokenType.COLON): 5336 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5337 return this 5338 5339 def _parse_case(self) -> t.Optional[exp.Expression]: 5340 ifs = [] 5341 default = None 5342 5343 comments = self._prev_comments 5344 expression = self._parse_assignment() 5345 5346 while self._match(TokenType.WHEN): 5347 this = self._parse_assignment() 5348 self._match(TokenType.THEN) 5349 then = self._parse_assignment() 5350 ifs.append(self.expression(exp.If, this=this, true=then)) 5351 5352 if self._match(TokenType.ELSE): 5353 default = self._parse_assignment() 5354 5355 if not self._match(TokenType.END): 5356 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5357 default = exp.column("interval") 5358 else: 5359 self.raise_error("Expected END after CASE", self._prev) 5360 5361 return self.expression( 5362 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5363 ) 5364 5365 def _parse_if(self) -> t.Optional[exp.Expression]: 5366 if self._match(TokenType.L_PAREN): 5367 args = self._parse_csv(self._parse_assignment) 5368 this = self.validate_expression(exp.If.from_arg_list(args), args) 5369 self._match_r_paren() 5370 else: 5371 index = self._index - 1 5372 5373 if self.NO_PAREN_IF_COMMANDS and index == 0: 5374 return self._parse_as_command(self._prev) 5375 5376 condition = self._parse_assignment() 5377 5378 if not condition: 5379 self._retreat(index) 5380 return None 5381 5382 self._match(TokenType.THEN) 5383 true = self._parse_assignment() 5384 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5385 self._match(TokenType.END) 5386 this = self.expression(exp.If, this=condition, true=true, false=false) 5387 5388 return this 5389 5390 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5391 if not self._match_text_seq("VALUE", "FOR"): 5392 self._retreat(self._index - 1) 5393 return None 5394 5395 return self.expression( 5396 exp.NextValueFor, 5397 this=self._parse_column(), 5398 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5399 ) 5400 5401 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5402 this = self._parse_function() or self._parse_var_or_string(upper=True) 5403 5404 if self._match(TokenType.FROM): 5405 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5406 5407 if not self._match(TokenType.COMMA): 5408 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5409 5410 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5411 5412 def _parse_gap_fill(self) -> exp.GapFill: 5413 self._match(TokenType.TABLE) 5414 this = self._parse_table() 5415 5416 self._match(TokenType.COMMA) 5417 args = [this, *self._parse_csv(self._parse_lambda)] 5418 5419 gap_fill = exp.GapFill.from_arg_list(args) 5420 return self.validate_expression(gap_fill, args) 5421 5422 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5423 this = self._parse_assignment() 5424 5425 if not self._match(TokenType.ALIAS): 5426 if self._match(TokenType.COMMA): 5427 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5428 5429 self.raise_error("Expected AS after CAST") 5430 5431 fmt = None 5432 to = self._parse_types() 5433 5434 if self._match(TokenType.FORMAT): 5435 fmt_string = self._parse_string() 5436 fmt = self._parse_at_time_zone(fmt_string) 5437 5438 if not to: 5439 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5440 if to.this in exp.DataType.TEMPORAL_TYPES: 5441 this = self.expression( 5442 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5443 this=this, 5444 format=exp.Literal.string( 5445 format_time( 5446 fmt_string.this if fmt_string else "", 5447 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5448 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5449 ) 5450 ), 5451 safe=safe, 5452 ) 5453 5454 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5455 this.set("zone", fmt.args["zone"]) 5456 return this 5457 elif not to: 5458 self.raise_error("Expected TYPE after CAST") 5459 elif isinstance(to, exp.Identifier): 5460 to = exp.DataType.build(to.name, udt=True) 5461 elif to.this == exp.DataType.Type.CHAR: 5462 if self._match(TokenType.CHARACTER_SET): 5463 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5464 5465 return self.expression( 5466 exp.Cast if strict else exp.TryCast, 5467 this=this, 5468 to=to, 5469 format=fmt, 5470 safe=safe, 5471 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5472 ) 5473 5474 def _parse_string_agg(self) -> exp.Expression: 5475 if self._match(TokenType.DISTINCT): 5476 args: t.List[t.Optional[exp.Expression]] = [ 5477 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5478 ] 5479 if self._match(TokenType.COMMA): 5480 args.extend(self._parse_csv(self._parse_assignment)) 5481 else: 5482 args = self._parse_csv(self._parse_assignment) # type: ignore 5483 5484 index = self._index 5485 if not self._match(TokenType.R_PAREN) and args: 5486 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5487 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5488 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5489 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5490 5491 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5492 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5493 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5494 if not self._match_text_seq("WITHIN", "GROUP"): 5495 self._retreat(index) 5496 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5497 5498 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5499 order = self._parse_order(this=seq_get(args, 0)) 5500 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5501 5502 def _parse_convert( 5503 self, strict: bool, safe: t.Optional[bool] = None 5504 ) -> t.Optional[exp.Expression]: 5505 this = self._parse_bitwise() 5506 5507 if self._match(TokenType.USING): 5508 to: t.Optional[exp.Expression] = self.expression( 5509 exp.CharacterSet, this=self._parse_var() 5510 ) 5511 elif self._match(TokenType.COMMA): 5512 to = self._parse_types() 5513 else: 5514 to = None 5515 5516 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5517 5518 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5519 """ 5520 There are generally two variants of the DECODE function: 5521 5522 - DECODE(bin, charset) 5523 - DECODE(expression, search, result [, search, result] ... [, default]) 5524 5525 The second variant will always be parsed into a CASE expression. Note that NULL 5526 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5527 instead of relying on pattern matching. 5528 """ 5529 args = self._parse_csv(self._parse_assignment) 5530 5531 if len(args) < 3: 5532 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5533 5534 expression, *expressions = args 5535 if not expression: 5536 return None 5537 5538 ifs = [] 5539 for search, result in zip(expressions[::2], expressions[1::2]): 5540 if not search or not result: 5541 return None 5542 5543 if isinstance(search, exp.Literal): 5544 ifs.append( 5545 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5546 ) 5547 elif isinstance(search, exp.Null): 5548 ifs.append( 5549 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5550 ) 5551 else: 5552 cond = exp.or_( 5553 exp.EQ(this=expression.copy(), expression=search), 5554 exp.and_( 5555 exp.Is(this=expression.copy(), expression=exp.Null()), 5556 exp.Is(this=search.copy(), expression=exp.Null()), 5557 copy=False, 5558 ), 5559 copy=False, 5560 ) 5561 ifs.append(exp.If(this=cond, true=result)) 5562 5563 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5564 5565 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5566 self._match_text_seq("KEY") 5567 key = self._parse_column() 5568 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5569 self._match_text_seq("VALUE") 5570 value = self._parse_bitwise() 5571 5572 if not key and not value: 5573 return None 5574 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5575 5576 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5577 if not this or not self._match_text_seq("FORMAT", "JSON"): 5578 return this 5579 5580 return self.expression(exp.FormatJson, this=this) 5581 5582 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5583 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5584 for value in values: 5585 if self._match_text_seq(value, "ON", on): 5586 return f"{value} ON {on}" 5587 5588 return None 5589 5590 @t.overload 5591 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5592 5593 @t.overload 5594 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5595 5596 def _parse_json_object(self, agg=False): 5597 star = self._parse_star() 5598 expressions = ( 5599 [star] 5600 if star 5601 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5602 ) 5603 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5604 5605 unique_keys = None 5606 if self._match_text_seq("WITH", "UNIQUE"): 5607 unique_keys = True 5608 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5609 unique_keys = False 5610 5611 self._match_text_seq("KEYS") 5612 5613 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5614 self._parse_type() 5615 ) 5616 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5617 5618 return self.expression( 5619 exp.JSONObjectAgg if agg else exp.JSONObject, 5620 expressions=expressions, 5621 null_handling=null_handling, 5622 unique_keys=unique_keys, 5623 return_type=return_type, 5624 encoding=encoding, 5625 ) 5626 5627 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5628 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5629 if not self._match_text_seq("NESTED"): 5630 this = self._parse_id_var() 5631 kind = self._parse_types(allow_identifiers=False) 5632 nested = None 5633 else: 5634 this = None 5635 kind = None 5636 nested = True 5637 5638 path = self._match_text_seq("PATH") and self._parse_string() 5639 nested_schema = nested and self._parse_json_schema() 5640 5641 return self.expression( 5642 exp.JSONColumnDef, 5643 this=this, 5644 kind=kind, 5645 path=path, 5646 nested_schema=nested_schema, 5647 ) 5648 5649 def _parse_json_schema(self) -> exp.JSONSchema: 5650 self._match_text_seq("COLUMNS") 5651 return self.expression( 5652 exp.JSONSchema, 5653 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5654 ) 5655 5656 def _parse_json_table(self) -> exp.JSONTable: 5657 this = self._parse_format_json(self._parse_bitwise()) 5658 path = self._match(TokenType.COMMA) and self._parse_string() 5659 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5660 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5661 schema = self._parse_json_schema() 5662 5663 return exp.JSONTable( 5664 this=this, 5665 schema=schema, 5666 path=path, 5667 error_handling=error_handling, 5668 empty_handling=empty_handling, 5669 ) 5670 5671 def _parse_match_against(self) -> exp.MatchAgainst: 5672 expressions = self._parse_csv(self._parse_column) 5673 5674 self._match_text_seq(")", "AGAINST", "(") 5675 5676 this = self._parse_string() 5677 5678 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5679 modifier = "IN NATURAL LANGUAGE MODE" 5680 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5681 modifier = f"{modifier} WITH QUERY EXPANSION" 5682 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5683 modifier = "IN BOOLEAN MODE" 5684 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5685 modifier = "WITH QUERY EXPANSION" 5686 else: 5687 modifier = None 5688 5689 return self.expression( 5690 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5691 ) 5692 5693 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5694 def _parse_open_json(self) -> exp.OpenJSON: 5695 this = self._parse_bitwise() 5696 path = self._match(TokenType.COMMA) and self._parse_string() 5697 5698 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5699 this = self._parse_field(any_token=True) 5700 kind = self._parse_types() 5701 path = self._parse_string() 5702 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5703 5704 return self.expression( 5705 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5706 ) 5707 5708 expressions = None 5709 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5710 self._match_l_paren() 5711 expressions = self._parse_csv(_parse_open_json_column_def) 5712 5713 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5714 5715 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5716 args = self._parse_csv(self._parse_bitwise) 5717 5718 if self._match(TokenType.IN): 5719 return self.expression( 5720 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5721 ) 5722 5723 if haystack_first: 5724 haystack = seq_get(args, 0) 5725 needle = seq_get(args, 1) 5726 else: 5727 needle = seq_get(args, 0) 5728 haystack = seq_get(args, 1) 5729 5730 return self.expression( 5731 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5732 ) 5733 5734 def _parse_predict(self) -> exp.Predict: 5735 self._match_text_seq("MODEL") 5736 this = self._parse_table() 5737 5738 self._match(TokenType.COMMA) 5739 self._match_text_seq("TABLE") 5740 5741 return self.expression( 5742 exp.Predict, 5743 this=this, 5744 expression=self._parse_table(), 5745 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5746 ) 5747 5748 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5749 args = self._parse_csv(self._parse_table) 5750 return exp.JoinHint(this=func_name.upper(), expressions=args) 5751 5752 def _parse_substring(self) -> exp.Substring: 5753 # Postgres supports the form: substring(string [from int] [for int]) 5754 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5755 5756 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5757 5758 if self._match(TokenType.FROM): 5759 args.append(self._parse_bitwise()) 5760 if self._match(TokenType.FOR): 5761 if len(args) == 1: 5762 args.append(exp.Literal.number(1)) 5763 args.append(self._parse_bitwise()) 5764 5765 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5766 5767 def _parse_trim(self) -> exp.Trim: 5768 # https://www.w3resource.com/sql/character-functions/trim.php 5769 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5770 5771 position = None 5772 collation = None 5773 expression = None 5774 5775 if self._match_texts(self.TRIM_TYPES): 5776 position = self._prev.text.upper() 5777 5778 this = self._parse_bitwise() 5779 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5780 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5781 expression = self._parse_bitwise() 5782 5783 if invert_order: 5784 this, expression = expression, this 5785 5786 if self._match(TokenType.COLLATE): 5787 collation = self._parse_bitwise() 5788 5789 return self.expression( 5790 exp.Trim, this=this, position=position, expression=expression, collation=collation 5791 ) 5792 5793 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5794 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5795 5796 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5797 return self._parse_window(self._parse_id_var(), alias=True) 5798 5799 def _parse_respect_or_ignore_nulls( 5800 self, this: t.Optional[exp.Expression] 5801 ) -> t.Optional[exp.Expression]: 5802 if self._match_text_seq("IGNORE", "NULLS"): 5803 return self.expression(exp.IgnoreNulls, this=this) 5804 if self._match_text_seq("RESPECT", "NULLS"): 5805 return self.expression(exp.RespectNulls, this=this) 5806 return this 5807 5808 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5809 if self._match(TokenType.HAVING): 5810 self._match_texts(("MAX", "MIN")) 5811 max = self._prev.text.upper() != "MIN" 5812 return self.expression( 5813 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5814 ) 5815 5816 return this 5817 5818 def _parse_window( 5819 self, this: t.Optional[exp.Expression], alias: bool = False 5820 ) -> t.Optional[exp.Expression]: 5821 func = this 5822 comments = func.comments if isinstance(func, exp.Expression) else None 5823 5824 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5825 self._match(TokenType.WHERE) 5826 this = self.expression( 5827 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5828 ) 5829 self._match_r_paren() 5830 5831 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5832 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5833 if self._match_text_seq("WITHIN", "GROUP"): 5834 order = self._parse_wrapped(self._parse_order) 5835 this = self.expression(exp.WithinGroup, this=this, expression=order) 5836 5837 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5838 # Some dialects choose to implement and some do not. 5839 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5840 5841 # There is some code above in _parse_lambda that handles 5842 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5843 5844 # The below changes handle 5845 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5846 5847 # Oracle allows both formats 5848 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5849 # and Snowflake chose to do the same for familiarity 5850 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5851 if isinstance(this, exp.AggFunc): 5852 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5853 5854 if ignore_respect and ignore_respect is not this: 5855 ignore_respect.replace(ignore_respect.this) 5856 this = self.expression(ignore_respect.__class__, this=this) 5857 5858 this = self._parse_respect_or_ignore_nulls(this) 5859 5860 # bigquery select from window x AS (partition by ...) 5861 if alias: 5862 over = None 5863 self._match(TokenType.ALIAS) 5864 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5865 return this 5866 else: 5867 over = self._prev.text.upper() 5868 5869 if comments and isinstance(func, exp.Expression): 5870 func.pop_comments() 5871 5872 if not self._match(TokenType.L_PAREN): 5873 return self.expression( 5874 exp.Window, 5875 comments=comments, 5876 this=this, 5877 alias=self._parse_id_var(False), 5878 over=over, 5879 ) 5880 5881 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5882 5883 first = self._match(TokenType.FIRST) 5884 if self._match_text_seq("LAST"): 5885 first = False 5886 5887 partition, order = self._parse_partition_and_order() 5888 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5889 5890 if kind: 5891 self._match(TokenType.BETWEEN) 5892 start = self._parse_window_spec() 5893 self._match(TokenType.AND) 5894 end = self._parse_window_spec() 5895 5896 spec = self.expression( 5897 exp.WindowSpec, 5898 kind=kind, 5899 start=start["value"], 5900 start_side=start["side"], 5901 end=end["value"], 5902 end_side=end["side"], 5903 ) 5904 else: 5905 spec = None 5906 5907 self._match_r_paren() 5908 5909 window = self.expression( 5910 exp.Window, 5911 comments=comments, 5912 this=this, 5913 partition_by=partition, 5914 order=order, 5915 spec=spec, 5916 alias=window_alias, 5917 over=over, 5918 first=first, 5919 ) 5920 5921 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5922 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5923 return self._parse_window(window, alias=alias) 5924 5925 return window 5926 5927 def _parse_partition_and_order( 5928 self, 5929 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5930 return self._parse_partition_by(), self._parse_order() 5931 5932 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5933 self._match(TokenType.BETWEEN) 5934 5935 return { 5936 "value": ( 5937 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5938 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5939 or self._parse_bitwise() 5940 ), 5941 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5942 } 5943 5944 def _parse_alias( 5945 self, this: t.Optional[exp.Expression], explicit: bool = False 5946 ) -> t.Optional[exp.Expression]: 5947 any_token = self._match(TokenType.ALIAS) 5948 comments = self._prev_comments or [] 5949 5950 if explicit and not any_token: 5951 return this 5952 5953 if self._match(TokenType.L_PAREN): 5954 aliases = self.expression( 5955 exp.Aliases, 5956 comments=comments, 5957 this=this, 5958 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5959 ) 5960 self._match_r_paren(aliases) 5961 return aliases 5962 5963 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5964 self.STRING_ALIASES and self._parse_string_as_identifier() 5965 ) 5966 5967 if alias: 5968 comments.extend(alias.pop_comments()) 5969 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5970 column = this.this 5971 5972 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5973 if not this.comments and column and column.comments: 5974 this.comments = column.pop_comments() 5975 5976 return this 5977 5978 def _parse_id_var( 5979 self, 5980 any_token: bool = True, 5981 tokens: t.Optional[t.Collection[TokenType]] = None, 5982 ) -> t.Optional[exp.Expression]: 5983 expression = self._parse_identifier() 5984 if not expression and ( 5985 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5986 ): 5987 quoted = self._prev.token_type == TokenType.STRING 5988 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5989 5990 return expression 5991 5992 def _parse_string(self) -> t.Optional[exp.Expression]: 5993 if self._match_set(self.STRING_PARSERS): 5994 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5995 return self._parse_placeholder() 5996 5997 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5998 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5999 6000 def _parse_number(self) -> t.Optional[exp.Expression]: 6001 if self._match_set(self.NUMERIC_PARSERS): 6002 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6003 return self._parse_placeholder() 6004 6005 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6006 if self._match(TokenType.IDENTIFIER): 6007 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6008 return self._parse_placeholder() 6009 6010 def _parse_var( 6011 self, 6012 any_token: bool = False, 6013 tokens: t.Optional[t.Collection[TokenType]] = None, 6014 upper: bool = False, 6015 ) -> t.Optional[exp.Expression]: 6016 if ( 6017 (any_token and self._advance_any()) 6018 or self._match(TokenType.VAR) 6019 or (self._match_set(tokens) if tokens else False) 6020 ): 6021 return self.expression( 6022 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6023 ) 6024 return self._parse_placeholder() 6025 6026 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6027 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6028 self._advance() 6029 return self._prev 6030 return None 6031 6032 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6033 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6034 6035 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6036 return self._parse_primary() or self._parse_var(any_token=True) 6037 6038 def _parse_null(self) -> t.Optional[exp.Expression]: 6039 if self._match_set(self.NULL_TOKENS): 6040 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6041 return self._parse_placeholder() 6042 6043 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6044 if self._match(TokenType.TRUE): 6045 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6046 if self._match(TokenType.FALSE): 6047 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6048 return self._parse_placeholder() 6049 6050 def _parse_star(self) -> t.Optional[exp.Expression]: 6051 if self._match(TokenType.STAR): 6052 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6053 return self._parse_placeholder() 6054 6055 def _parse_parameter(self) -> exp.Parameter: 6056 this = self._parse_identifier() or self._parse_primary_or_var() 6057 return self.expression(exp.Parameter, this=this) 6058 6059 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6060 if self._match_set(self.PLACEHOLDER_PARSERS): 6061 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6062 if placeholder: 6063 return placeholder 6064 self._advance(-1) 6065 return None 6066 6067 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6068 if not self._match_texts(keywords): 6069 return None 6070 if self._match(TokenType.L_PAREN, advance=False): 6071 return self._parse_wrapped_csv(self._parse_expression) 6072 6073 expression = self._parse_expression() 6074 return [expression] if expression else None 6075 6076 def _parse_csv( 6077 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6078 ) -> t.List[exp.Expression]: 6079 parse_result = parse_method() 6080 items = [parse_result] if parse_result is not None else [] 6081 6082 while self._match(sep): 6083 self._add_comments(parse_result) 6084 parse_result = parse_method() 6085 if parse_result is not None: 6086 items.append(parse_result) 6087 6088 return items 6089 6090 def _parse_tokens( 6091 self, parse_method: t.Callable, expressions: t.Dict 6092 ) -> t.Optional[exp.Expression]: 6093 this = parse_method() 6094 6095 while self._match_set(expressions): 6096 this = self.expression( 6097 expressions[self._prev.token_type], 6098 this=this, 6099 comments=self._prev_comments, 6100 expression=parse_method(), 6101 ) 6102 6103 return this 6104 6105 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6106 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6107 6108 def _parse_wrapped_csv( 6109 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6110 ) -> t.List[exp.Expression]: 6111 return self._parse_wrapped( 6112 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6113 ) 6114 6115 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6116 wrapped = self._match(TokenType.L_PAREN) 6117 if not wrapped and not optional: 6118 self.raise_error("Expecting (") 6119 parse_result = parse_method() 6120 if wrapped: 6121 self._match_r_paren() 6122 return parse_result 6123 6124 def _parse_expressions(self) -> t.List[exp.Expression]: 6125 return self._parse_csv(self._parse_expression) 6126 6127 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6128 return self._parse_select() or self._parse_set_operations( 6129 self._parse_expression() if alias else self._parse_assignment() 6130 ) 6131 6132 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6133 return self._parse_query_modifiers( 6134 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6135 ) 6136 6137 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6138 this = None 6139 if self._match_texts(self.TRANSACTION_KIND): 6140 this = self._prev.text 6141 6142 self._match_texts(("TRANSACTION", "WORK")) 6143 6144 modes = [] 6145 while True: 6146 mode = [] 6147 while self._match(TokenType.VAR): 6148 mode.append(self._prev.text) 6149 6150 if mode: 6151 modes.append(" ".join(mode)) 6152 if not self._match(TokenType.COMMA): 6153 break 6154 6155 return self.expression(exp.Transaction, this=this, modes=modes) 6156 6157 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6158 chain = None 6159 savepoint = None 6160 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6161 6162 self._match_texts(("TRANSACTION", "WORK")) 6163 6164 if self._match_text_seq("TO"): 6165 self._match_text_seq("SAVEPOINT") 6166 savepoint = self._parse_id_var() 6167 6168 if self._match(TokenType.AND): 6169 chain = not self._match_text_seq("NO") 6170 self._match_text_seq("CHAIN") 6171 6172 if is_rollback: 6173 return self.expression(exp.Rollback, savepoint=savepoint) 6174 6175 return self.expression(exp.Commit, chain=chain) 6176 6177 def _parse_refresh(self) -> exp.Refresh: 6178 self._match(TokenType.TABLE) 6179 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6180 6181 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6182 if not self._match_text_seq("ADD"): 6183 return None 6184 6185 self._match(TokenType.COLUMN) 6186 exists_column = self._parse_exists(not_=True) 6187 expression = self._parse_field_def() 6188 6189 if expression: 6190 expression.set("exists", exists_column) 6191 6192 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6193 if self._match_texts(("FIRST", "AFTER")): 6194 position = self._prev.text 6195 column_position = self.expression( 6196 exp.ColumnPosition, this=self._parse_column(), position=position 6197 ) 6198 expression.set("position", column_position) 6199 6200 return expression 6201 6202 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6203 drop = self._match(TokenType.DROP) and self._parse_drop() 6204 if drop and not isinstance(drop, exp.Command): 6205 drop.set("kind", drop.args.get("kind", "COLUMN")) 6206 return drop 6207 6208 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6209 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6210 return self.expression( 6211 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6212 ) 6213 6214 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6215 index = self._index - 1 6216 6217 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6218 return self._parse_csv( 6219 lambda: self.expression( 6220 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6221 ) 6222 ) 6223 6224 self._retreat(index) 6225 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6226 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6227 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6228 6229 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6230 if self._match_texts(self.ALTER_ALTER_PARSERS): 6231 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6232 6233 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6234 # keyword after ALTER we default to parsing this statement 6235 self._match(TokenType.COLUMN) 6236 column = self._parse_field(any_token=True) 6237 6238 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6239 return self.expression(exp.AlterColumn, this=column, drop=True) 6240 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6241 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6242 if self._match(TokenType.COMMENT): 6243 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6244 if self._match_text_seq("DROP", "NOT", "NULL"): 6245 return self.expression( 6246 exp.AlterColumn, 6247 this=column, 6248 drop=True, 6249 allow_null=True, 6250 ) 6251 if self._match_text_seq("SET", "NOT", "NULL"): 6252 return self.expression( 6253 exp.AlterColumn, 6254 this=column, 6255 allow_null=False, 6256 ) 6257 self._match_text_seq("SET", "DATA") 6258 self._match_text_seq("TYPE") 6259 return self.expression( 6260 exp.AlterColumn, 6261 this=column, 6262 dtype=self._parse_types(), 6263 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6264 using=self._match(TokenType.USING) and self._parse_assignment(), 6265 ) 6266 6267 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6268 if self._match_texts(("ALL", "EVEN", "AUTO")): 6269 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6270 6271 self._match_text_seq("KEY", "DISTKEY") 6272 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6273 6274 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6275 if compound: 6276 self._match_text_seq("SORTKEY") 6277 6278 if self._match(TokenType.L_PAREN, advance=False): 6279 return self.expression( 6280 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6281 ) 6282 6283 self._match_texts(("AUTO", "NONE")) 6284 return self.expression( 6285 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6286 ) 6287 6288 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6289 index = self._index - 1 6290 6291 partition_exists = self._parse_exists() 6292 if self._match(TokenType.PARTITION, advance=False): 6293 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6294 6295 self._retreat(index) 6296 return self._parse_csv(self._parse_drop_column) 6297 6298 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6299 if self._match(TokenType.COLUMN): 6300 exists = self._parse_exists() 6301 old_column = self._parse_column() 6302 to = self._match_text_seq("TO") 6303 new_column = self._parse_column() 6304 6305 if old_column is None or to is None or new_column is None: 6306 return None 6307 6308 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6309 6310 self._match_text_seq("TO") 6311 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6312 6313 def _parse_alter_table_set(self) -> exp.AlterSet: 6314 alter_set = self.expression(exp.AlterSet) 6315 6316 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6317 "TABLE", "PROPERTIES" 6318 ): 6319 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6320 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6321 alter_set.set("expressions", [self._parse_assignment()]) 6322 elif self._match_texts(("LOGGED", "UNLOGGED")): 6323 alter_set.set("option", exp.var(self._prev.text.upper())) 6324 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6325 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6326 elif self._match_text_seq("LOCATION"): 6327 alter_set.set("location", self._parse_field()) 6328 elif self._match_text_seq("ACCESS", "METHOD"): 6329 alter_set.set("access_method", self._parse_field()) 6330 elif self._match_text_seq("TABLESPACE"): 6331 alter_set.set("tablespace", self._parse_field()) 6332 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6333 alter_set.set("file_format", [self._parse_field()]) 6334 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6335 alter_set.set("file_format", self._parse_wrapped_options()) 6336 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6337 alter_set.set("copy_options", self._parse_wrapped_options()) 6338 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6339 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6340 else: 6341 if self._match_text_seq("SERDE"): 6342 alter_set.set("serde", self._parse_field()) 6343 6344 alter_set.set("expressions", [self._parse_properties()]) 6345 6346 return alter_set 6347 6348 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6349 start = self._prev 6350 6351 if not self._match(TokenType.TABLE): 6352 return self._parse_as_command(start) 6353 6354 exists = self._parse_exists() 6355 only = self._match_text_seq("ONLY") 6356 this = self._parse_table(schema=True) 6357 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6358 6359 if self._next: 6360 self._advance() 6361 6362 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6363 if parser: 6364 actions = ensure_list(parser(self)) 6365 options = self._parse_csv(self._parse_property) 6366 6367 if not self._curr and actions: 6368 return self.expression( 6369 exp.AlterTable, 6370 this=this, 6371 exists=exists, 6372 actions=actions, 6373 only=only, 6374 options=options, 6375 cluster=cluster, 6376 ) 6377 6378 return self._parse_as_command(start) 6379 6380 def _parse_merge(self) -> exp.Merge: 6381 self._match(TokenType.INTO) 6382 target = self._parse_table() 6383 6384 if target and self._match(TokenType.ALIAS, advance=False): 6385 target.set("alias", self._parse_table_alias()) 6386 6387 self._match(TokenType.USING) 6388 using = self._parse_table() 6389 6390 self._match(TokenType.ON) 6391 on = self._parse_assignment() 6392 6393 return self.expression( 6394 exp.Merge, 6395 this=target, 6396 using=using, 6397 on=on, 6398 expressions=self._parse_when_matched(), 6399 ) 6400 6401 def _parse_when_matched(self) -> t.List[exp.When]: 6402 whens = [] 6403 6404 while self._match(TokenType.WHEN): 6405 matched = not self._match(TokenType.NOT) 6406 self._match_text_seq("MATCHED") 6407 source = ( 6408 False 6409 if self._match_text_seq("BY", "TARGET") 6410 else self._match_text_seq("BY", "SOURCE") 6411 ) 6412 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6413 6414 self._match(TokenType.THEN) 6415 6416 if self._match(TokenType.INSERT): 6417 _this = self._parse_star() 6418 if _this: 6419 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6420 else: 6421 then = self.expression( 6422 exp.Insert, 6423 this=self._parse_value(), 6424 expression=self._match_text_seq("VALUES") and self._parse_value(), 6425 ) 6426 elif self._match(TokenType.UPDATE): 6427 expressions = self._parse_star() 6428 if expressions: 6429 then = self.expression(exp.Update, expressions=expressions) 6430 else: 6431 then = self.expression( 6432 exp.Update, 6433 expressions=self._match(TokenType.SET) 6434 and self._parse_csv(self._parse_equality), 6435 ) 6436 elif self._match(TokenType.DELETE): 6437 then = self.expression(exp.Var, this=self._prev.text) 6438 else: 6439 then = None 6440 6441 whens.append( 6442 self.expression( 6443 exp.When, 6444 matched=matched, 6445 source=source, 6446 condition=condition, 6447 then=then, 6448 ) 6449 ) 6450 return whens 6451 6452 def _parse_show(self) -> t.Optional[exp.Expression]: 6453 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6454 if parser: 6455 return parser(self) 6456 return self._parse_as_command(self._prev) 6457 6458 def _parse_set_item_assignment( 6459 self, kind: t.Optional[str] = None 6460 ) -> t.Optional[exp.Expression]: 6461 index = self._index 6462 6463 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6464 return self._parse_set_transaction(global_=kind == "GLOBAL") 6465 6466 left = self._parse_primary() or self._parse_column() 6467 assignment_delimiter = self._match_texts(("=", "TO")) 6468 6469 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6470 self._retreat(index) 6471 return None 6472 6473 right = self._parse_statement() or self._parse_id_var() 6474 if isinstance(right, (exp.Column, exp.Identifier)): 6475 right = exp.var(right.name) 6476 6477 this = self.expression(exp.EQ, this=left, expression=right) 6478 return self.expression(exp.SetItem, this=this, kind=kind) 6479 6480 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6481 self._match_text_seq("TRANSACTION") 6482 characteristics = self._parse_csv( 6483 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6484 ) 6485 return self.expression( 6486 exp.SetItem, 6487 expressions=characteristics, 6488 kind="TRANSACTION", 6489 **{"global": global_}, # type: ignore 6490 ) 6491 6492 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6493 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6494 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6495 6496 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6497 index = self._index 6498 set_ = self.expression( 6499 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6500 ) 6501 6502 if self._curr: 6503 self._retreat(index) 6504 return self._parse_as_command(self._prev) 6505 6506 return set_ 6507 6508 def _parse_var_from_options( 6509 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6510 ) -> t.Optional[exp.Var]: 6511 start = self._curr 6512 if not start: 6513 return None 6514 6515 option = start.text.upper() 6516 continuations = options.get(option) 6517 6518 index = self._index 6519 self._advance() 6520 for keywords in continuations or []: 6521 if isinstance(keywords, str): 6522 keywords = (keywords,) 6523 6524 if self._match_text_seq(*keywords): 6525 option = f"{option} {' '.join(keywords)}" 6526 break 6527 else: 6528 if continuations or continuations is None: 6529 if raise_unmatched: 6530 self.raise_error(f"Unknown option {option}") 6531 6532 self._retreat(index) 6533 return None 6534 6535 return exp.var(option) 6536 6537 def _parse_as_command(self, start: Token) -> exp.Command: 6538 while self._curr: 6539 self._advance() 6540 text = self._find_sql(start, self._prev) 6541 size = len(start.text) 6542 self._warn_unsupported() 6543 return exp.Command(this=text[:size], expression=text[size:]) 6544 6545 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6546 settings = [] 6547 6548 self._match_l_paren() 6549 kind = self._parse_id_var() 6550 6551 if self._match(TokenType.L_PAREN): 6552 while True: 6553 key = self._parse_id_var() 6554 value = self._parse_primary() 6555 6556 if not key and value is None: 6557 break 6558 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6559 self._match(TokenType.R_PAREN) 6560 6561 self._match_r_paren() 6562 6563 return self.expression( 6564 exp.DictProperty, 6565 this=this, 6566 kind=kind.this if kind else None, 6567 settings=settings, 6568 ) 6569 6570 def _parse_dict_range(self, this: str) -> exp.DictRange: 6571 self._match_l_paren() 6572 has_min = self._match_text_seq("MIN") 6573 if has_min: 6574 min = self._parse_var() or self._parse_primary() 6575 self._match_text_seq("MAX") 6576 max = self._parse_var() or self._parse_primary() 6577 else: 6578 max = self._parse_var() or self._parse_primary() 6579 min = exp.Literal.number(0) 6580 self._match_r_paren() 6581 return self.expression(exp.DictRange, this=this, min=min, max=max) 6582 6583 def _parse_comprehension( 6584 self, this: t.Optional[exp.Expression] 6585 ) -> t.Optional[exp.Comprehension]: 6586 index = self._index 6587 expression = self._parse_column() 6588 if not self._match(TokenType.IN): 6589 self._retreat(index - 1) 6590 return None 6591 iterator = self._parse_column() 6592 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6593 return self.expression( 6594 exp.Comprehension, 6595 this=this, 6596 expression=expression, 6597 iterator=iterator, 6598 condition=condition, 6599 ) 6600 6601 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6602 if self._match(TokenType.HEREDOC_STRING): 6603 return self.expression(exp.Heredoc, this=self._prev.text) 6604 6605 if not self._match_text_seq("$"): 6606 return None 6607 6608 tags = ["$"] 6609 tag_text = None 6610 6611 if self._is_connected(): 6612 self._advance() 6613 tags.append(self._prev.text.upper()) 6614 else: 6615 self.raise_error("No closing $ found") 6616 6617 if tags[-1] != "$": 6618 if self._is_connected() and self._match_text_seq("$"): 6619 tag_text = tags[-1] 6620 tags.append("$") 6621 else: 6622 self.raise_error("No closing $ found") 6623 6624 heredoc_start = self._curr 6625 6626 while self._curr: 6627 if self._match_text_seq(*tags, advance=False): 6628 this = self._find_sql(heredoc_start, self._prev) 6629 self._advance(len(tags)) 6630 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6631 6632 self._advance() 6633 6634 self.raise_error(f"No closing {''.join(tags)} found") 6635 return None 6636 6637 def _find_parser( 6638 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6639 ) -> t.Optional[t.Callable]: 6640 if not self._curr: 6641 return None 6642 6643 index = self._index 6644 this = [] 6645 while True: 6646 # The current token might be multiple words 6647 curr = self._curr.text.upper() 6648 key = curr.split(" ") 6649 this.append(curr) 6650 6651 self._advance() 6652 result, trie = in_trie(trie, key) 6653 if result == TrieResult.FAILED: 6654 break 6655 6656 if result == TrieResult.EXISTS: 6657 subparser = parsers[" ".join(this)] 6658 return subparser 6659 6660 self._retreat(index) 6661 return None 6662 6663 def _match(self, token_type, advance=True, expression=None): 6664 if not self._curr: 6665 return None 6666 6667 if self._curr.token_type == token_type: 6668 if advance: 6669 self._advance() 6670 self._add_comments(expression) 6671 return True 6672 6673 return None 6674 6675 def _match_set(self, types, advance=True): 6676 if not self._curr: 6677 return None 6678 6679 if self._curr.token_type in types: 6680 if advance: 6681 self._advance() 6682 return True 6683 6684 return None 6685 6686 def _match_pair(self, token_type_a, token_type_b, advance=True): 6687 if not self._curr or not self._next: 6688 return None 6689 6690 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6691 if advance: 6692 self._advance(2) 6693 return True 6694 6695 return None 6696 6697 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6698 if not self._match(TokenType.L_PAREN, expression=expression): 6699 self.raise_error("Expecting (") 6700 6701 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6702 if not self._match(TokenType.R_PAREN, expression=expression): 6703 self.raise_error("Expecting )") 6704 6705 def _match_texts(self, texts, advance=True): 6706 if self._curr and self._curr.text.upper() in texts: 6707 if advance: 6708 self._advance() 6709 return True 6710 return None 6711 6712 def _match_text_seq(self, *texts, advance=True): 6713 index = self._index 6714 for text in texts: 6715 if self._curr and self._curr.text.upper() == text: 6716 self._advance() 6717 else: 6718 self._retreat(index) 6719 return None 6720 6721 if not advance: 6722 self._retreat(index) 6723 6724 return True 6725 6726 def _replace_lambda( 6727 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6728 ) -> t.Optional[exp.Expression]: 6729 if not node: 6730 return node 6731 6732 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6733 6734 for column in node.find_all(exp.Column): 6735 typ = lambda_types.get(column.parts[0].name) 6736 if typ is not None: 6737 dot_or_id = column.to_dot() if column.table else column.this 6738 6739 if typ: 6740 dot_or_id = self.expression( 6741 exp.Cast, 6742 this=dot_or_id, 6743 to=typ, 6744 ) 6745 6746 parent = column.parent 6747 6748 while isinstance(parent, exp.Dot): 6749 if not isinstance(parent.parent, exp.Dot): 6750 parent.replace(dot_or_id) 6751 break 6752 parent = parent.parent 6753 else: 6754 if column is node: 6755 node = dot_or_id 6756 else: 6757 column.replace(dot_or_id) 6758 return node 6759 6760 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6761 start = self._prev 6762 6763 # Not to be confused with TRUNCATE(number, decimals) function call 6764 if self._match(TokenType.L_PAREN): 6765 self._retreat(self._index - 2) 6766 return self._parse_function() 6767 6768 # Clickhouse supports TRUNCATE DATABASE as well 6769 is_database = self._match(TokenType.DATABASE) 6770 6771 self._match(TokenType.TABLE) 6772 6773 exists = self._parse_exists(not_=False) 6774 6775 expressions = self._parse_csv( 6776 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6777 ) 6778 6779 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6780 6781 if self._match_text_seq("RESTART", "IDENTITY"): 6782 identity = "RESTART" 6783 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6784 identity = "CONTINUE" 6785 else: 6786 identity = None 6787 6788 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6789 option = self._prev.text 6790 else: 6791 option = None 6792 6793 partition = self._parse_partition() 6794 6795 # Fallback case 6796 if self._curr: 6797 return self._parse_as_command(start) 6798 6799 return self.expression( 6800 exp.TruncateTable, 6801 expressions=expressions, 6802 is_database=is_database, 6803 exists=exists, 6804 cluster=cluster, 6805 identity=identity, 6806 option=option, 6807 partition=partition, 6808 ) 6809 6810 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6811 this = self._parse_ordered(self._parse_opclass) 6812 6813 if not self._match(TokenType.WITH): 6814 return this 6815 6816 op = self._parse_var(any_token=True) 6817 6818 return self.expression(exp.WithOperator, this=this, op=op) 6819 6820 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6821 self._match(TokenType.EQ) 6822 self._match(TokenType.L_PAREN) 6823 6824 opts: t.List[t.Optional[exp.Expression]] = [] 6825 while self._curr and not self._match(TokenType.R_PAREN): 6826 if self._match_text_seq("FORMAT_NAME", "="): 6827 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6828 # so we parse it separately to use _parse_field() 6829 prop = self.expression( 6830 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6831 ) 6832 opts.append(prop) 6833 else: 6834 opts.append(self._parse_property()) 6835 6836 self._match(TokenType.COMMA) 6837 6838 return opts 6839 6840 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6841 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6842 6843 options = [] 6844 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6845 option = self._parse_var(any_token=True) 6846 prev = self._prev.text.upper() 6847 6848 # Different dialects might separate options and values by white space, "=" and "AS" 6849 self._match(TokenType.EQ) 6850 self._match(TokenType.ALIAS) 6851 6852 param = self.expression(exp.CopyParameter, this=option) 6853 6854 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6855 TokenType.L_PAREN, advance=False 6856 ): 6857 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6858 param.set("expressions", self._parse_wrapped_options()) 6859 elif prev == "FILE_FORMAT": 6860 # T-SQL's external file format case 6861 param.set("expression", self._parse_field()) 6862 else: 6863 param.set("expression", self._parse_unquoted_field()) 6864 6865 options.append(param) 6866 self._match(sep) 6867 6868 return options 6869 6870 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6871 expr = self.expression(exp.Credentials) 6872 6873 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6874 expr.set("storage", self._parse_field()) 6875 if self._match_text_seq("CREDENTIALS"): 6876 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6877 creds = ( 6878 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6879 ) 6880 expr.set("credentials", creds) 6881 if self._match_text_seq("ENCRYPTION"): 6882 expr.set("encryption", self._parse_wrapped_options()) 6883 if self._match_text_seq("IAM_ROLE"): 6884 expr.set("iam_role", self._parse_field()) 6885 if self._match_text_seq("REGION"): 6886 expr.set("region", self._parse_field()) 6887 6888 return expr 6889 6890 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6891 return self._parse_field() 6892 6893 def _parse_copy(self) -> exp.Copy | exp.Command: 6894 start = self._prev 6895 6896 self._match(TokenType.INTO) 6897 6898 this = ( 6899 self._parse_select(nested=True, parse_subquery_alias=False) 6900 if self._match(TokenType.L_PAREN, advance=False) 6901 else self._parse_table(schema=True) 6902 ) 6903 6904 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6905 6906 files = self._parse_csv(self._parse_file_location) 6907 credentials = self._parse_credentials() 6908 6909 self._match_text_seq("WITH") 6910 6911 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6912 6913 # Fallback case 6914 if self._curr: 6915 return self._parse_as_command(start) 6916 6917 return self.expression( 6918 exp.Copy, 6919 this=this, 6920 kind=kind, 6921 credentials=credentials, 6922 files=files, 6923 params=params, 6924 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1262 def __init__( 1263 self, 1264 error_level: t.Optional[ErrorLevel] = None, 1265 error_message_context: int = 100, 1266 max_errors: int = 3, 1267 dialect: DialectType = None, 1268 ): 1269 from sqlglot.dialects import Dialect 1270 1271 self.error_level = error_level or ErrorLevel.IMMEDIATE 1272 self.error_message_context = error_message_context 1273 self.max_errors = max_errors 1274 self.dialect = Dialect.get_or_raise(dialect) 1275 self.reset()
1287 def parse( 1288 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1289 ) -> t.List[t.Optional[exp.Expression]]: 1290 """ 1291 Parses a list of tokens and returns a list of syntax trees, one tree 1292 per parsed SQL statement. 1293 1294 Args: 1295 raw_tokens: The list of tokens. 1296 sql: The original SQL string, used to produce helpful debug messages. 1297 1298 Returns: 1299 The list of the produced syntax trees. 1300 """ 1301 return self._parse( 1302 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1303 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1305 def parse_into( 1306 self, 1307 expression_types: exp.IntoType, 1308 raw_tokens: t.List[Token], 1309 sql: t.Optional[str] = None, 1310 ) -> t.List[t.Optional[exp.Expression]]: 1311 """ 1312 Parses a list of tokens into a given Expression type. If a collection of Expression 1313 types is given instead, this method will try to parse the token list into each one 1314 of them, stopping at the first for which the parsing succeeds. 1315 1316 Args: 1317 expression_types: The expression type(s) to try and parse the token list into. 1318 raw_tokens: The list of tokens. 1319 sql: The original SQL string, used to produce helpful debug messages. 1320 1321 Returns: 1322 The target Expression. 1323 """ 1324 errors = [] 1325 for expression_type in ensure_list(expression_types): 1326 parser = self.EXPRESSION_PARSERS.get(expression_type) 1327 if not parser: 1328 raise TypeError(f"No parser registered for {expression_type}") 1329 1330 try: 1331 return self._parse(parser, raw_tokens, sql) 1332 except ParseError as e: 1333 e.errors[0]["into_expression"] = expression_type 1334 errors.append(e) 1335 1336 raise ParseError( 1337 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1338 errors=merge_errors(errors), 1339 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1379 def check_errors(self) -> None: 1380 """Logs or raises any found errors, depending on the chosen error level setting.""" 1381 if self.error_level == ErrorLevel.WARN: 1382 for error in self.errors: 1383 logger.error(str(error)) 1384 elif self.error_level == ErrorLevel.RAISE and self.errors: 1385 raise ParseError( 1386 concat_messages(self.errors, self.max_errors), 1387 errors=merge_errors(self.errors), 1388 )
Logs or raises any found errors, depending on the chosen error level setting.
1390 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1391 """ 1392 Appends an error in the list of recorded errors or raises it, depending on the chosen 1393 error level setting. 1394 """ 1395 token = token or self._curr or self._prev or Token.string("") 1396 start = token.start 1397 end = token.end + 1 1398 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1399 highlight = self.sql[start:end] 1400 end_context = self.sql[end : end + self.error_message_context] 1401 1402 error = ParseError.new( 1403 f"{message}. Line {token.line}, Col: {token.col}.\n" 1404 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1405 description=message, 1406 line=token.line, 1407 col=token.col, 1408 start_context=start_context, 1409 highlight=highlight, 1410 end_context=end_context, 1411 ) 1412 1413 if self.error_level == ErrorLevel.IMMEDIATE: 1414 raise error 1415 1416 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1418 def expression( 1419 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1420 ) -> E: 1421 """ 1422 Creates a new, validated Expression. 1423 1424 Args: 1425 exp_class: The expression class to instantiate. 1426 comments: An optional list of comments to attach to the expression. 1427 kwargs: The arguments to set for the expression along with their respective values. 1428 1429 Returns: 1430 The target expression. 1431 """ 1432 instance = exp_class(**kwargs) 1433 instance.add_comments(comments) if comments else self._add_comments(instance) 1434 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1441 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1442 """ 1443 Validates an Expression, making sure that all its mandatory arguments are set. 1444 1445 Args: 1446 expression: The expression to validate. 1447 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1448 1449 Returns: 1450 The validated expression. 1451 """ 1452 if self.error_level != ErrorLevel.IGNORE: 1453 for error_message in expression.error_messages(args): 1454 self.raise_error(error_message) 1455 1456 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.