sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.UDECIMAL, 355 TokenType.BIGDECIMAL, 356 TokenType.UUID, 357 TokenType.GEOGRAPHY, 358 TokenType.GEOMETRY, 359 TokenType.HLLSKETCH, 360 TokenType.HSTORE, 361 TokenType.PSEUDO_TYPE, 362 TokenType.SUPER, 363 TokenType.SERIAL, 364 TokenType.SMALLSERIAL, 365 TokenType.BIGSERIAL, 366 TokenType.XML, 367 TokenType.YEAR, 368 TokenType.UNIQUEIDENTIFIER, 369 TokenType.USERDEFINED, 370 TokenType.MONEY, 371 TokenType.SMALLMONEY, 372 TokenType.ROWVERSION, 373 TokenType.IMAGE, 374 TokenType.VARIANT, 375 TokenType.VECTOR, 376 TokenType.OBJECT, 377 TokenType.OBJECT_IDENTIFIER, 378 TokenType.INET, 379 TokenType.IPADDRESS, 380 TokenType.IPPREFIX, 381 TokenType.IPV4, 382 TokenType.IPV6, 383 TokenType.UNKNOWN, 384 TokenType.NULL, 385 TokenType.NAME, 386 TokenType.TDIGEST, 387 *ENUM_TYPE_TOKENS, 388 *NESTED_TYPE_TOKENS, 389 *AGGREGATE_TYPE_TOKENS, 390 } 391 392 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 393 TokenType.BIGINT: TokenType.UBIGINT, 394 TokenType.INT: TokenType.UINT, 395 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 396 TokenType.SMALLINT: TokenType.USMALLINT, 397 TokenType.TINYINT: TokenType.UTINYINT, 398 TokenType.DECIMAL: TokenType.UDECIMAL, 399 } 400 401 SUBQUERY_PREDICATES = { 402 TokenType.ANY: exp.Any, 403 TokenType.ALL: exp.All, 404 TokenType.EXISTS: exp.Exists, 405 TokenType.SOME: exp.Any, 406 } 407 408 RESERVED_TOKENS = { 409 *Tokenizer.SINGLE_TOKENS.values(), 410 TokenType.SELECT, 411 } - {TokenType.IDENTIFIER} 412 413 DB_CREATABLES = { 414 TokenType.DATABASE, 415 TokenType.DICTIONARY, 416 TokenType.MODEL, 417 TokenType.SCHEMA, 418 TokenType.SEQUENCE, 419 TokenType.STORAGE_INTEGRATION, 420 TokenType.TABLE, 421 TokenType.TAG, 422 TokenType.VIEW, 423 TokenType.WAREHOUSE, 424 TokenType.STREAMLIT, 425 } 426 427 CREATABLES = { 428 TokenType.COLUMN, 429 TokenType.CONSTRAINT, 430 TokenType.FOREIGN_KEY, 431 TokenType.FUNCTION, 432 TokenType.INDEX, 433 TokenType.PROCEDURE, 434 *DB_CREATABLES, 435 } 436 437 ALTERABLES = { 438 TokenType.INDEX, 439 TokenType.TABLE, 440 TokenType.VIEW, 441 } 442 443 # Tokens that can represent identifiers 444 ID_VAR_TOKENS = { 445 TokenType.ALL, 446 TokenType.VAR, 447 TokenType.ANTI, 448 TokenType.APPLY, 449 TokenType.ASC, 450 TokenType.ASOF, 451 TokenType.AUTO_INCREMENT, 452 TokenType.BEGIN, 453 TokenType.BPCHAR, 454 TokenType.CACHE, 455 TokenType.CASE, 456 TokenType.COLLATE, 457 TokenType.COMMAND, 458 TokenType.COMMENT, 459 TokenType.COMMIT, 460 TokenType.CONSTRAINT, 461 TokenType.COPY, 462 TokenType.CUBE, 463 TokenType.DEFAULT, 464 TokenType.DELETE, 465 TokenType.DESC, 466 TokenType.DESCRIBE, 467 TokenType.DICTIONARY, 468 TokenType.DIV, 469 TokenType.END, 470 TokenType.EXECUTE, 471 TokenType.ESCAPE, 472 TokenType.FALSE, 473 TokenType.FIRST, 474 TokenType.FILTER, 475 TokenType.FINAL, 476 TokenType.FORMAT, 477 TokenType.FULL, 478 TokenType.IDENTIFIER, 479 TokenType.IS, 480 TokenType.ISNULL, 481 TokenType.INTERVAL, 482 TokenType.KEEP, 483 TokenType.KILL, 484 TokenType.LEFT, 485 TokenType.LOAD, 486 TokenType.MERGE, 487 TokenType.NATURAL, 488 TokenType.NEXT, 489 TokenType.OFFSET, 490 TokenType.OPERATOR, 491 TokenType.ORDINALITY, 492 TokenType.OVERLAPS, 493 TokenType.OVERWRITE, 494 TokenType.PARTITION, 495 TokenType.PERCENT, 496 TokenType.PIVOT, 497 TokenType.PRAGMA, 498 TokenType.RANGE, 499 TokenType.RECURSIVE, 500 TokenType.REFERENCES, 501 TokenType.REFRESH, 502 TokenType.RENAME, 503 TokenType.REPLACE, 504 TokenType.RIGHT, 505 TokenType.ROLLUP, 506 TokenType.ROW, 507 TokenType.ROWS, 508 TokenType.SEMI, 509 TokenType.SET, 510 TokenType.SETTINGS, 511 TokenType.SHOW, 512 TokenType.TEMPORARY, 513 TokenType.TOP, 514 TokenType.TRUE, 515 TokenType.TRUNCATE, 516 TokenType.UNIQUE, 517 TokenType.UNNEST, 518 TokenType.UNPIVOT, 519 TokenType.UPDATE, 520 TokenType.USE, 521 TokenType.VOLATILE, 522 TokenType.WINDOW, 523 *CREATABLES, 524 *SUBQUERY_PREDICATES, 525 *TYPE_TOKENS, 526 *NO_PAREN_FUNCTIONS, 527 } 528 ID_VAR_TOKENS.remove(TokenType.UNION) 529 530 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 531 532 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 533 TokenType.ANTI, 534 TokenType.APPLY, 535 TokenType.ASOF, 536 TokenType.FULL, 537 TokenType.LEFT, 538 TokenType.LOCK, 539 TokenType.NATURAL, 540 TokenType.OFFSET, 541 TokenType.RIGHT, 542 TokenType.SEMI, 543 TokenType.WINDOW, 544 } 545 546 ALIAS_TOKENS = ID_VAR_TOKENS 547 548 ARRAY_CONSTRUCTORS = { 549 "ARRAY": exp.Array, 550 "LIST": exp.List, 551 } 552 553 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 554 555 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 556 557 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 558 559 FUNC_TOKENS = { 560 TokenType.COLLATE, 561 TokenType.COMMAND, 562 TokenType.CURRENT_DATE, 563 TokenType.CURRENT_DATETIME, 564 TokenType.CURRENT_TIMESTAMP, 565 TokenType.CURRENT_TIME, 566 TokenType.CURRENT_USER, 567 TokenType.FILTER, 568 TokenType.FIRST, 569 TokenType.FORMAT, 570 TokenType.GLOB, 571 TokenType.IDENTIFIER, 572 TokenType.INDEX, 573 TokenType.ISNULL, 574 TokenType.ILIKE, 575 TokenType.INSERT, 576 TokenType.LIKE, 577 TokenType.MERGE, 578 TokenType.OFFSET, 579 TokenType.PRIMARY_KEY, 580 TokenType.RANGE, 581 TokenType.REPLACE, 582 TokenType.RLIKE, 583 TokenType.ROW, 584 TokenType.UNNEST, 585 TokenType.VAR, 586 TokenType.LEFT, 587 TokenType.RIGHT, 588 TokenType.SEQUENCE, 589 TokenType.DATE, 590 TokenType.DATETIME, 591 TokenType.TABLE, 592 TokenType.TIMESTAMP, 593 TokenType.TIMESTAMPTZ, 594 TokenType.TRUNCATE, 595 TokenType.WINDOW, 596 TokenType.XOR, 597 *TYPE_TOKENS, 598 *SUBQUERY_PREDICATES, 599 } 600 601 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 602 TokenType.AND: exp.And, 603 } 604 605 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 606 TokenType.COLON_EQ: exp.PropertyEQ, 607 } 608 609 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 610 TokenType.OR: exp.Or, 611 } 612 613 EQUALITY = { 614 TokenType.EQ: exp.EQ, 615 TokenType.NEQ: exp.NEQ, 616 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 617 } 618 619 COMPARISON = { 620 TokenType.GT: exp.GT, 621 TokenType.GTE: exp.GTE, 622 TokenType.LT: exp.LT, 623 TokenType.LTE: exp.LTE, 624 } 625 626 BITWISE = { 627 TokenType.AMP: exp.BitwiseAnd, 628 TokenType.CARET: exp.BitwiseXor, 629 TokenType.PIPE: exp.BitwiseOr, 630 } 631 632 TERM = { 633 TokenType.DASH: exp.Sub, 634 TokenType.PLUS: exp.Add, 635 TokenType.MOD: exp.Mod, 636 TokenType.COLLATE: exp.Collate, 637 } 638 639 FACTOR = { 640 TokenType.DIV: exp.IntDiv, 641 TokenType.LR_ARROW: exp.Distance, 642 TokenType.SLASH: exp.Div, 643 TokenType.STAR: exp.Mul, 644 } 645 646 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 647 648 TIMES = { 649 TokenType.TIME, 650 TokenType.TIMETZ, 651 } 652 653 TIMESTAMPS = { 654 TokenType.TIMESTAMP, 655 TokenType.TIMESTAMPTZ, 656 TokenType.TIMESTAMPLTZ, 657 *TIMES, 658 } 659 660 SET_OPERATIONS = { 661 TokenType.UNION, 662 TokenType.INTERSECT, 663 TokenType.EXCEPT, 664 } 665 666 JOIN_METHODS = { 667 TokenType.ASOF, 668 TokenType.NATURAL, 669 TokenType.POSITIONAL, 670 } 671 672 JOIN_SIDES = { 673 TokenType.LEFT, 674 TokenType.RIGHT, 675 TokenType.FULL, 676 } 677 678 JOIN_KINDS = { 679 TokenType.ANTI, 680 TokenType.CROSS, 681 TokenType.INNER, 682 TokenType.OUTER, 683 TokenType.SEMI, 684 TokenType.STRAIGHT_JOIN, 685 } 686 687 JOIN_HINTS: t.Set[str] = set() 688 689 LAMBDAS = { 690 TokenType.ARROW: lambda self, expressions: self.expression( 691 exp.Lambda, 692 this=self._replace_lambda( 693 self._parse_assignment(), 694 expressions, 695 ), 696 expressions=expressions, 697 ), 698 TokenType.FARROW: lambda self, expressions: self.expression( 699 exp.Kwarg, 700 this=exp.var(expressions[0].name), 701 expression=self._parse_assignment(), 702 ), 703 } 704 705 COLUMN_OPERATORS = { 706 TokenType.DOT: None, 707 TokenType.DCOLON: lambda self, this, to: self.expression( 708 exp.Cast if self.STRICT_CAST else exp.TryCast, 709 this=this, 710 to=to, 711 ), 712 TokenType.ARROW: lambda self, this, path: self.expression( 713 exp.JSONExtract, 714 this=this, 715 expression=self.dialect.to_json_path(path), 716 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 717 ), 718 TokenType.DARROW: lambda self, this, path: self.expression( 719 exp.JSONExtractScalar, 720 this=this, 721 expression=self.dialect.to_json_path(path), 722 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 723 ), 724 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 725 exp.JSONBExtract, 726 this=this, 727 expression=path, 728 ), 729 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 730 exp.JSONBExtractScalar, 731 this=this, 732 expression=path, 733 ), 734 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 735 exp.JSONBContains, 736 this=this, 737 expression=key, 738 ), 739 } 740 741 EXPRESSION_PARSERS = { 742 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 743 exp.Column: lambda self: self._parse_column(), 744 exp.Condition: lambda self: self._parse_assignment(), 745 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 746 exp.Expression: lambda self: self._parse_expression(), 747 exp.From: lambda self: self._parse_from(joins=True), 748 exp.Group: lambda self: self._parse_group(), 749 exp.Having: lambda self: self._parse_having(), 750 exp.Identifier: lambda self: self._parse_id_var(), 751 exp.Join: lambda self: self._parse_join(), 752 exp.Lambda: lambda self: self._parse_lambda(), 753 exp.Lateral: lambda self: self._parse_lateral(), 754 exp.Limit: lambda self: self._parse_limit(), 755 exp.Offset: lambda self: self._parse_offset(), 756 exp.Order: lambda self: self._parse_order(), 757 exp.Ordered: lambda self: self._parse_ordered(), 758 exp.Properties: lambda self: self._parse_properties(), 759 exp.Qualify: lambda self: self._parse_qualify(), 760 exp.Returning: lambda self: self._parse_returning(), 761 exp.Select: lambda self: self._parse_select(), 762 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 763 exp.Table: lambda self: self._parse_table_parts(), 764 exp.TableAlias: lambda self: self._parse_table_alias(), 765 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 766 exp.Where: lambda self: self._parse_where(), 767 exp.Window: lambda self: self._parse_named_window(), 768 exp.With: lambda self: self._parse_with(), 769 "JOIN_TYPE": lambda self: self._parse_join_parts(), 770 } 771 772 STATEMENT_PARSERS = { 773 TokenType.ALTER: lambda self: self._parse_alter(), 774 TokenType.BEGIN: lambda self: self._parse_transaction(), 775 TokenType.CACHE: lambda self: self._parse_cache(), 776 TokenType.COMMENT: lambda self: self._parse_comment(), 777 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 778 TokenType.COPY: lambda self: self._parse_copy(), 779 TokenType.CREATE: lambda self: self._parse_create(), 780 TokenType.DELETE: lambda self: self._parse_delete(), 781 TokenType.DESC: lambda self: self._parse_describe(), 782 TokenType.DESCRIBE: lambda self: self._parse_describe(), 783 TokenType.DROP: lambda self: self._parse_drop(), 784 TokenType.GRANT: lambda self: self._parse_grant(), 785 TokenType.INSERT: lambda self: self._parse_insert(), 786 TokenType.KILL: lambda self: self._parse_kill(), 787 TokenType.LOAD: lambda self: self._parse_load(), 788 TokenType.MERGE: lambda self: self._parse_merge(), 789 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 790 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 791 TokenType.REFRESH: lambda self: self._parse_refresh(), 792 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 793 TokenType.SET: lambda self: self._parse_set(), 794 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 795 TokenType.UNCACHE: lambda self: self._parse_uncache(), 796 TokenType.UPDATE: lambda self: self._parse_update(), 797 TokenType.USE: lambda self: self.expression( 798 exp.Use, 799 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 800 this=self._parse_table(schema=False), 801 ), 802 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 803 } 804 805 UNARY_PARSERS = { 806 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 807 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 808 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 809 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 810 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 811 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 812 } 813 814 STRING_PARSERS = { 815 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 816 exp.RawString, this=token.text 817 ), 818 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 819 exp.National, this=token.text 820 ), 821 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 822 TokenType.STRING: lambda self, token: self.expression( 823 exp.Literal, this=token.text, is_string=True 824 ), 825 TokenType.UNICODE_STRING: lambda self, token: self.expression( 826 exp.UnicodeString, 827 this=token.text, 828 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 829 ), 830 } 831 832 NUMERIC_PARSERS = { 833 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 834 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 835 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 836 TokenType.NUMBER: lambda self, token: self.expression( 837 exp.Literal, this=token.text, is_string=False 838 ), 839 } 840 841 PRIMARY_PARSERS = { 842 **STRING_PARSERS, 843 **NUMERIC_PARSERS, 844 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 845 TokenType.NULL: lambda self, _: self.expression(exp.Null), 846 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 847 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 848 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 849 TokenType.STAR: lambda self, _: self._parse_star_ops(), 850 } 851 852 PLACEHOLDER_PARSERS = { 853 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 854 TokenType.PARAMETER: lambda self: self._parse_parameter(), 855 TokenType.COLON: lambda self: ( 856 self.expression(exp.Placeholder, this=self._prev.text) 857 if self._match_set(self.ID_VAR_TOKENS) 858 else None 859 ), 860 } 861 862 RANGE_PARSERS = { 863 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 864 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 865 TokenType.GLOB: binary_range_parser(exp.Glob), 866 TokenType.ILIKE: binary_range_parser(exp.ILike), 867 TokenType.IN: lambda self, this: self._parse_in(this), 868 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 869 TokenType.IS: lambda self, this: self._parse_is(this), 870 TokenType.LIKE: binary_range_parser(exp.Like), 871 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 872 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 873 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 874 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 875 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 876 } 877 878 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 879 "ALLOWED_VALUES": lambda self: self.expression( 880 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 881 ), 882 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 883 "AUTO": lambda self: self._parse_auto_property(), 884 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 885 "BACKUP": lambda self: self.expression( 886 exp.BackupProperty, this=self._parse_var(any_token=True) 887 ), 888 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 889 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 890 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 891 "CHECKSUM": lambda self: self._parse_checksum(), 892 "CLUSTER BY": lambda self: self._parse_cluster(), 893 "CLUSTERED": lambda self: self._parse_clustered_by(), 894 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 895 exp.CollateProperty, **kwargs 896 ), 897 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 898 "CONTAINS": lambda self: self._parse_contains_property(), 899 "COPY": lambda self: self._parse_copy_property(), 900 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 901 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 902 "DEFINER": lambda self: self._parse_definer(), 903 "DETERMINISTIC": lambda self: self.expression( 904 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 905 ), 906 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 907 "DUPLICATE": lambda self: self._parse_duplicate(), 908 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 909 "DISTKEY": lambda self: self._parse_distkey(), 910 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 911 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 912 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 913 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 914 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 915 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 916 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 917 "FREESPACE": lambda self: self._parse_freespace(), 918 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 919 "HEAP": lambda self: self.expression(exp.HeapProperty), 920 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 921 "IMMUTABLE": lambda self: self.expression( 922 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 923 ), 924 "INHERITS": lambda self: self.expression( 925 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 926 ), 927 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 928 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 929 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 930 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 931 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 932 "LIKE": lambda self: self._parse_create_like(), 933 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 934 "LOCK": lambda self: self._parse_locking(), 935 "LOCKING": lambda self: self._parse_locking(), 936 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 937 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 938 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 939 "MODIFIES": lambda self: self._parse_modifies_property(), 940 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 941 "NO": lambda self: self._parse_no_property(), 942 "ON": lambda self: self._parse_on_property(), 943 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 944 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 945 "PARTITION": lambda self: self._parse_partitioned_of(), 946 "PARTITION BY": lambda self: self._parse_partitioned_by(), 947 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 948 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 949 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 950 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 951 "READS": lambda self: self._parse_reads_property(), 952 "REMOTE": lambda self: self._parse_remote_with_connection(), 953 "RETURNS": lambda self: self._parse_returns(), 954 "STRICT": lambda self: self.expression(exp.StrictProperty), 955 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 956 "ROW": lambda self: self._parse_row(), 957 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 958 "SAMPLE": lambda self: self.expression( 959 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 960 ), 961 "SECURE": lambda self: self.expression(exp.SecureProperty), 962 "SECURITY": lambda self: self._parse_security(), 963 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 964 "SETTINGS": lambda self: self._parse_settings_property(), 965 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 966 "SORTKEY": lambda self: self._parse_sortkey(), 967 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 968 "STABLE": lambda self: self.expression( 969 exp.StabilityProperty, this=exp.Literal.string("STABLE") 970 ), 971 "STORED": lambda self: self._parse_stored(), 972 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 973 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 974 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 975 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 976 "TO": lambda self: self._parse_to_table(), 977 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 978 "TRANSFORM": lambda self: self.expression( 979 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 980 ), 981 "TTL": lambda self: self._parse_ttl(), 982 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 983 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 984 "VOLATILE": lambda self: self._parse_volatile_property(), 985 "WITH": lambda self: self._parse_with_property(), 986 } 987 988 CONSTRAINT_PARSERS = { 989 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 990 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 991 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 992 "CHARACTER SET": lambda self: self.expression( 993 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 994 ), 995 "CHECK": lambda self: self.expression( 996 exp.CheckColumnConstraint, 997 this=self._parse_wrapped(self._parse_assignment), 998 enforced=self._match_text_seq("ENFORCED"), 999 ), 1000 "COLLATE": lambda self: self.expression( 1001 exp.CollateColumnConstraint, 1002 this=self._parse_identifier() or self._parse_column(), 1003 ), 1004 "COMMENT": lambda self: self.expression( 1005 exp.CommentColumnConstraint, this=self._parse_string() 1006 ), 1007 "COMPRESS": lambda self: self._parse_compress(), 1008 "CLUSTERED": lambda self: self.expression( 1009 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1010 ), 1011 "NONCLUSTERED": lambda self: self.expression( 1012 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1013 ), 1014 "DEFAULT": lambda self: self.expression( 1015 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1016 ), 1017 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1018 "EPHEMERAL": lambda self: self.expression( 1019 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1020 ), 1021 "EXCLUDE": lambda self: self.expression( 1022 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1023 ), 1024 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1025 "FORMAT": lambda self: self.expression( 1026 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1027 ), 1028 "GENERATED": lambda self: self._parse_generated_as_identity(), 1029 "IDENTITY": lambda self: self._parse_auto_increment(), 1030 "INLINE": lambda self: self._parse_inline(), 1031 "LIKE": lambda self: self._parse_create_like(), 1032 "NOT": lambda self: self._parse_not_constraint(), 1033 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1034 "ON": lambda self: ( 1035 self._match(TokenType.UPDATE) 1036 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1037 ) 1038 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1039 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1040 "PERIOD": lambda self: self._parse_period_for_system_time(), 1041 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1042 "REFERENCES": lambda self: self._parse_references(match=False), 1043 "TITLE": lambda self: self.expression( 1044 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1045 ), 1046 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1047 "UNIQUE": lambda self: self._parse_unique(), 1048 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1049 "WITH": lambda self: self.expression( 1050 exp.Properties, expressions=self._parse_wrapped_properties() 1051 ), 1052 } 1053 1054 ALTER_PARSERS = { 1055 "ADD": lambda self: self._parse_alter_table_add(), 1056 "ALTER": lambda self: self._parse_alter_table_alter(), 1057 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1058 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1059 "DROP": lambda self: self._parse_alter_table_drop(), 1060 "RENAME": lambda self: self._parse_alter_table_rename(), 1061 "SET": lambda self: self._parse_alter_table_set(), 1062 "AS": lambda self: self._parse_select(), 1063 } 1064 1065 ALTER_ALTER_PARSERS = { 1066 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1067 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1068 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1069 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1070 } 1071 1072 SCHEMA_UNNAMED_CONSTRAINTS = { 1073 "CHECK", 1074 "EXCLUDE", 1075 "FOREIGN KEY", 1076 "LIKE", 1077 "PERIOD", 1078 "PRIMARY KEY", 1079 "UNIQUE", 1080 } 1081 1082 NO_PAREN_FUNCTION_PARSERS = { 1083 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1084 "CASE": lambda self: self._parse_case(), 1085 "CONNECT_BY_ROOT": lambda self: self.expression( 1086 exp.ConnectByRoot, this=self._parse_column() 1087 ), 1088 "IF": lambda self: self._parse_if(), 1089 "NEXT": lambda self: self._parse_next_value_for(), 1090 } 1091 1092 INVALID_FUNC_NAME_TOKENS = { 1093 TokenType.IDENTIFIER, 1094 TokenType.STRING, 1095 } 1096 1097 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1098 1099 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1100 1101 FUNCTION_PARSERS = { 1102 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1103 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1104 "DECODE": lambda self: self._parse_decode(), 1105 "EXTRACT": lambda self: self._parse_extract(), 1106 "GAP_FILL": lambda self: self._parse_gap_fill(), 1107 "JSON_OBJECT": lambda self: self._parse_json_object(), 1108 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1109 "JSON_TABLE": lambda self: self._parse_json_table(), 1110 "MATCH": lambda self: self._parse_match_against(), 1111 "NORMALIZE": lambda self: self._parse_normalize(), 1112 "OPENJSON": lambda self: self._parse_open_json(), 1113 "OVERLAY": lambda self: self._parse_overlay(), 1114 "POSITION": lambda self: self._parse_position(), 1115 "PREDICT": lambda self: self._parse_predict(), 1116 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1117 "STRING_AGG": lambda self: self._parse_string_agg(), 1118 "SUBSTRING": lambda self: self._parse_substring(), 1119 "TRIM": lambda self: self._parse_trim(), 1120 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1121 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1122 } 1123 1124 QUERY_MODIFIER_PARSERS = { 1125 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1126 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1127 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1128 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1129 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1130 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1131 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1132 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1133 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1134 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1135 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1136 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1137 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1138 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1139 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1140 TokenType.CLUSTER_BY: lambda self: ( 1141 "cluster", 1142 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1143 ), 1144 TokenType.DISTRIBUTE_BY: lambda self: ( 1145 "distribute", 1146 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1147 ), 1148 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1149 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1150 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1151 } 1152 1153 SET_PARSERS = { 1154 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1155 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1156 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1157 "TRANSACTION": lambda self: self._parse_set_transaction(), 1158 } 1159 1160 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1161 1162 TYPE_LITERAL_PARSERS = { 1163 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1164 } 1165 1166 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1167 1168 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1169 1170 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1171 1172 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1173 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1174 "ISOLATION": ( 1175 ("LEVEL", "REPEATABLE", "READ"), 1176 ("LEVEL", "READ", "COMMITTED"), 1177 ("LEVEL", "READ", "UNCOMITTED"), 1178 ("LEVEL", "SERIALIZABLE"), 1179 ), 1180 "READ": ("WRITE", "ONLY"), 1181 } 1182 1183 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1184 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1185 ) 1186 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1187 1188 CREATE_SEQUENCE: OPTIONS_TYPE = { 1189 "SCALE": ("EXTEND", "NOEXTEND"), 1190 "SHARD": ("EXTEND", "NOEXTEND"), 1191 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1192 **dict.fromkeys( 1193 ( 1194 "SESSION", 1195 "GLOBAL", 1196 "KEEP", 1197 "NOKEEP", 1198 "ORDER", 1199 "NOORDER", 1200 "NOCACHE", 1201 "CYCLE", 1202 "NOCYCLE", 1203 "NOMINVALUE", 1204 "NOMAXVALUE", 1205 "NOSCALE", 1206 "NOSHARD", 1207 ), 1208 tuple(), 1209 ), 1210 } 1211 1212 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1213 1214 USABLES: OPTIONS_TYPE = dict.fromkeys( 1215 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1216 ) 1217 1218 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1219 1220 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1221 "TYPE": ("EVOLUTION",), 1222 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1223 } 1224 1225 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1226 "NOT": ("ENFORCED",), 1227 "MATCH": ( 1228 "FULL", 1229 "PARTIAL", 1230 "SIMPLE", 1231 ), 1232 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1233 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1234 } 1235 1236 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1237 1238 CLONE_KEYWORDS = {"CLONE", "COPY"} 1239 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1240 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1241 1242 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1243 1244 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1245 1246 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1247 1248 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1249 1250 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1251 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1252 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1253 1254 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1255 1256 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1257 1258 ADD_CONSTRAINT_TOKENS = { 1259 TokenType.CONSTRAINT, 1260 TokenType.FOREIGN_KEY, 1261 TokenType.INDEX, 1262 TokenType.KEY, 1263 TokenType.PRIMARY_KEY, 1264 TokenType.UNIQUE, 1265 } 1266 1267 DISTINCT_TOKENS = {TokenType.DISTINCT} 1268 1269 NULL_TOKENS = {TokenType.NULL} 1270 1271 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1272 1273 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1274 1275 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1276 1277 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1278 1279 ODBC_DATETIME_LITERALS = { 1280 "d": exp.Date, 1281 "t": exp.Time, 1282 "ts": exp.Timestamp, 1283 } 1284 1285 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1286 1287 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1288 1289 STRICT_CAST = True 1290 1291 PREFIXED_PIVOT_COLUMNS = False 1292 IDENTIFY_PIVOT_STRINGS = False 1293 1294 LOG_DEFAULTS_TO_LN = False 1295 1296 # Whether ADD is present for each column added by ALTER TABLE 1297 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1298 1299 # Whether the table sample clause expects CSV syntax 1300 TABLESAMPLE_CSV = False 1301 1302 # The default method used for table sampling 1303 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1304 1305 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1306 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1307 1308 # Whether the TRIM function expects the characters to trim as its first argument 1309 TRIM_PATTERN_FIRST = False 1310 1311 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1312 STRING_ALIASES = False 1313 1314 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1315 MODIFIERS_ATTACHED_TO_SET_OP = True 1316 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1317 1318 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1319 NO_PAREN_IF_COMMANDS = True 1320 1321 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1322 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1323 1324 # Whether the `:` operator is used to extract a value from a VARIANT column 1325 COLON_IS_VARIANT_EXTRACT = False 1326 1327 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1328 # If this is True and '(' is not found, the keyword will be treated as an identifier 1329 VALUES_FOLLOWED_BY_PAREN = True 1330 1331 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1332 SUPPORTS_IMPLICIT_UNNEST = False 1333 1334 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1335 INTERVAL_SPANS = True 1336 1337 # Whether a PARTITION clause can follow a table reference 1338 SUPPORTS_PARTITION_SELECTION = False 1339 1340 __slots__ = ( 1341 "error_level", 1342 "error_message_context", 1343 "max_errors", 1344 "dialect", 1345 "sql", 1346 "errors", 1347 "_tokens", 1348 "_index", 1349 "_curr", 1350 "_next", 1351 "_prev", 1352 "_prev_comments", 1353 ) 1354 1355 # Autofilled 1356 SHOW_TRIE: t.Dict = {} 1357 SET_TRIE: t.Dict = {} 1358 1359 def __init__( 1360 self, 1361 error_level: t.Optional[ErrorLevel] = None, 1362 error_message_context: int = 100, 1363 max_errors: int = 3, 1364 dialect: DialectType = None, 1365 ): 1366 from sqlglot.dialects import Dialect 1367 1368 self.error_level = error_level or ErrorLevel.IMMEDIATE 1369 self.error_message_context = error_message_context 1370 self.max_errors = max_errors 1371 self.dialect = Dialect.get_or_raise(dialect) 1372 self.reset() 1373 1374 def reset(self): 1375 self.sql = "" 1376 self.errors = [] 1377 self._tokens = [] 1378 self._index = 0 1379 self._curr = None 1380 self._next = None 1381 self._prev = None 1382 self._prev_comments = None 1383 1384 def parse( 1385 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1386 ) -> t.List[t.Optional[exp.Expression]]: 1387 """ 1388 Parses a list of tokens and returns a list of syntax trees, one tree 1389 per parsed SQL statement. 1390 1391 Args: 1392 raw_tokens: The list of tokens. 1393 sql: The original SQL string, used to produce helpful debug messages. 1394 1395 Returns: 1396 The list of the produced syntax trees. 1397 """ 1398 return self._parse( 1399 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1400 ) 1401 1402 def parse_into( 1403 self, 1404 expression_types: exp.IntoType, 1405 raw_tokens: t.List[Token], 1406 sql: t.Optional[str] = None, 1407 ) -> t.List[t.Optional[exp.Expression]]: 1408 """ 1409 Parses a list of tokens into a given Expression type. If a collection of Expression 1410 types is given instead, this method will try to parse the token list into each one 1411 of them, stopping at the first for which the parsing succeeds. 1412 1413 Args: 1414 expression_types: The expression type(s) to try and parse the token list into. 1415 raw_tokens: The list of tokens. 1416 sql: The original SQL string, used to produce helpful debug messages. 1417 1418 Returns: 1419 The target Expression. 1420 """ 1421 errors = [] 1422 for expression_type in ensure_list(expression_types): 1423 parser = self.EXPRESSION_PARSERS.get(expression_type) 1424 if not parser: 1425 raise TypeError(f"No parser registered for {expression_type}") 1426 1427 try: 1428 return self._parse(parser, raw_tokens, sql) 1429 except ParseError as e: 1430 e.errors[0]["into_expression"] = expression_type 1431 errors.append(e) 1432 1433 raise ParseError( 1434 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1435 errors=merge_errors(errors), 1436 ) from errors[-1] 1437 1438 def _parse( 1439 self, 1440 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1441 raw_tokens: t.List[Token], 1442 sql: t.Optional[str] = None, 1443 ) -> t.List[t.Optional[exp.Expression]]: 1444 self.reset() 1445 self.sql = sql or "" 1446 1447 total = len(raw_tokens) 1448 chunks: t.List[t.List[Token]] = [[]] 1449 1450 for i, token in enumerate(raw_tokens): 1451 if token.token_type == TokenType.SEMICOLON: 1452 if token.comments: 1453 chunks.append([token]) 1454 1455 if i < total - 1: 1456 chunks.append([]) 1457 else: 1458 chunks[-1].append(token) 1459 1460 expressions = [] 1461 1462 for tokens in chunks: 1463 self._index = -1 1464 self._tokens = tokens 1465 self._advance() 1466 1467 expressions.append(parse_method(self)) 1468 1469 if self._index < len(self._tokens): 1470 self.raise_error("Invalid expression / Unexpected token") 1471 1472 self.check_errors() 1473 1474 return expressions 1475 1476 def check_errors(self) -> None: 1477 """Logs or raises any found errors, depending on the chosen error level setting.""" 1478 if self.error_level == ErrorLevel.WARN: 1479 for error in self.errors: 1480 logger.error(str(error)) 1481 elif self.error_level == ErrorLevel.RAISE and self.errors: 1482 raise ParseError( 1483 concat_messages(self.errors, self.max_errors), 1484 errors=merge_errors(self.errors), 1485 ) 1486 1487 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1488 """ 1489 Appends an error in the list of recorded errors or raises it, depending on the chosen 1490 error level setting. 1491 """ 1492 token = token or self._curr or self._prev or Token.string("") 1493 start = token.start 1494 end = token.end + 1 1495 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1496 highlight = self.sql[start:end] 1497 end_context = self.sql[end : end + self.error_message_context] 1498 1499 error = ParseError.new( 1500 f"{message}. Line {token.line}, Col: {token.col}.\n" 1501 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1502 description=message, 1503 line=token.line, 1504 col=token.col, 1505 start_context=start_context, 1506 highlight=highlight, 1507 end_context=end_context, 1508 ) 1509 1510 if self.error_level == ErrorLevel.IMMEDIATE: 1511 raise error 1512 1513 self.errors.append(error) 1514 1515 def expression( 1516 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1517 ) -> E: 1518 """ 1519 Creates a new, validated Expression. 1520 1521 Args: 1522 exp_class: The expression class to instantiate. 1523 comments: An optional list of comments to attach to the expression. 1524 kwargs: The arguments to set for the expression along with their respective values. 1525 1526 Returns: 1527 The target expression. 1528 """ 1529 instance = exp_class(**kwargs) 1530 instance.add_comments(comments) if comments else self._add_comments(instance) 1531 return self.validate_expression(instance) 1532 1533 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1534 if expression and self._prev_comments: 1535 expression.add_comments(self._prev_comments) 1536 self._prev_comments = None 1537 1538 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1539 """ 1540 Validates an Expression, making sure that all its mandatory arguments are set. 1541 1542 Args: 1543 expression: The expression to validate. 1544 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1545 1546 Returns: 1547 The validated expression. 1548 """ 1549 if self.error_level != ErrorLevel.IGNORE: 1550 for error_message in expression.error_messages(args): 1551 self.raise_error(error_message) 1552 1553 return expression 1554 1555 def _find_sql(self, start: Token, end: Token) -> str: 1556 return self.sql[start.start : end.end + 1] 1557 1558 def _is_connected(self) -> bool: 1559 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1560 1561 def _advance(self, times: int = 1) -> None: 1562 self._index += times 1563 self._curr = seq_get(self._tokens, self._index) 1564 self._next = seq_get(self._tokens, self._index + 1) 1565 1566 if self._index > 0: 1567 self._prev = self._tokens[self._index - 1] 1568 self._prev_comments = self._prev.comments 1569 else: 1570 self._prev = None 1571 self._prev_comments = None 1572 1573 def _retreat(self, index: int) -> None: 1574 if index != self._index: 1575 self._advance(index - self._index) 1576 1577 def _warn_unsupported(self) -> None: 1578 if len(self._tokens) <= 1: 1579 return 1580 1581 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1582 # interested in emitting a warning for the one being currently processed. 1583 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1584 1585 logger.warning( 1586 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1587 ) 1588 1589 def _parse_command(self) -> exp.Command: 1590 self._warn_unsupported() 1591 return self.expression( 1592 exp.Command, 1593 comments=self._prev_comments, 1594 this=self._prev.text.upper(), 1595 expression=self._parse_string(), 1596 ) 1597 1598 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1599 """ 1600 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1601 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1602 solve this by setting & resetting the parser state accordingly 1603 """ 1604 index = self._index 1605 error_level = self.error_level 1606 1607 self.error_level = ErrorLevel.IMMEDIATE 1608 try: 1609 this = parse_method() 1610 except ParseError: 1611 this = None 1612 finally: 1613 if not this or retreat: 1614 self._retreat(index) 1615 self.error_level = error_level 1616 1617 return this 1618 1619 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1620 start = self._prev 1621 exists = self._parse_exists() if allow_exists else None 1622 1623 self._match(TokenType.ON) 1624 1625 materialized = self._match_text_seq("MATERIALIZED") 1626 kind = self._match_set(self.CREATABLES) and self._prev 1627 if not kind: 1628 return self._parse_as_command(start) 1629 1630 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1631 this = self._parse_user_defined_function(kind=kind.token_type) 1632 elif kind.token_type == TokenType.TABLE: 1633 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1634 elif kind.token_type == TokenType.COLUMN: 1635 this = self._parse_column() 1636 else: 1637 this = self._parse_id_var() 1638 1639 self._match(TokenType.IS) 1640 1641 return self.expression( 1642 exp.Comment, 1643 this=this, 1644 kind=kind.text, 1645 expression=self._parse_string(), 1646 exists=exists, 1647 materialized=materialized, 1648 ) 1649 1650 def _parse_to_table( 1651 self, 1652 ) -> exp.ToTableProperty: 1653 table = self._parse_table_parts(schema=True) 1654 return self.expression(exp.ToTableProperty, this=table) 1655 1656 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1657 def _parse_ttl(self) -> exp.Expression: 1658 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1659 this = self._parse_bitwise() 1660 1661 if self._match_text_seq("DELETE"): 1662 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1663 if self._match_text_seq("RECOMPRESS"): 1664 return self.expression( 1665 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1666 ) 1667 if self._match_text_seq("TO", "DISK"): 1668 return self.expression( 1669 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1670 ) 1671 if self._match_text_seq("TO", "VOLUME"): 1672 return self.expression( 1673 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1674 ) 1675 1676 return this 1677 1678 expressions = self._parse_csv(_parse_ttl_action) 1679 where = self._parse_where() 1680 group = self._parse_group() 1681 1682 aggregates = None 1683 if group and self._match(TokenType.SET): 1684 aggregates = self._parse_csv(self._parse_set_item) 1685 1686 return self.expression( 1687 exp.MergeTreeTTL, 1688 expressions=expressions, 1689 where=where, 1690 group=group, 1691 aggregates=aggregates, 1692 ) 1693 1694 def _parse_statement(self) -> t.Optional[exp.Expression]: 1695 if self._curr is None: 1696 return None 1697 1698 if self._match_set(self.STATEMENT_PARSERS): 1699 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1700 1701 if self._match_set(self.dialect.tokenizer.COMMANDS): 1702 return self._parse_command() 1703 1704 expression = self._parse_expression() 1705 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1706 return self._parse_query_modifiers(expression) 1707 1708 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1709 start = self._prev 1710 temporary = self._match(TokenType.TEMPORARY) 1711 materialized = self._match_text_seq("MATERIALIZED") 1712 1713 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1714 if not kind: 1715 return self._parse_as_command(start) 1716 1717 concurrently = self._match_text_seq("CONCURRENTLY") 1718 if_exists = exists or self._parse_exists() 1719 table = self._parse_table_parts( 1720 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1721 ) 1722 1723 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1724 1725 if self._match(TokenType.L_PAREN, advance=False): 1726 expressions = self._parse_wrapped_csv(self._parse_types) 1727 else: 1728 expressions = None 1729 1730 return self.expression( 1731 exp.Drop, 1732 comments=start.comments, 1733 exists=if_exists, 1734 this=table, 1735 expressions=expressions, 1736 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1737 temporary=temporary, 1738 materialized=materialized, 1739 cascade=self._match_text_seq("CASCADE"), 1740 constraints=self._match_text_seq("CONSTRAINTS"), 1741 purge=self._match_text_seq("PURGE"), 1742 cluster=cluster, 1743 concurrently=concurrently, 1744 ) 1745 1746 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1747 return ( 1748 self._match_text_seq("IF") 1749 and (not not_ or self._match(TokenType.NOT)) 1750 and self._match(TokenType.EXISTS) 1751 ) 1752 1753 def _parse_create(self) -> exp.Create | exp.Command: 1754 # Note: this can't be None because we've matched a statement parser 1755 start = self._prev 1756 comments = self._prev_comments 1757 1758 replace = ( 1759 start.token_type == TokenType.REPLACE 1760 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1761 or self._match_pair(TokenType.OR, TokenType.ALTER) 1762 ) 1763 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1764 1765 unique = self._match(TokenType.UNIQUE) 1766 1767 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1768 clustered = True 1769 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1770 "COLUMNSTORE" 1771 ): 1772 clustered = False 1773 else: 1774 clustered = None 1775 1776 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1777 self._advance() 1778 1779 properties = None 1780 create_token = self._match_set(self.CREATABLES) and self._prev 1781 1782 if not create_token: 1783 # exp.Properties.Location.POST_CREATE 1784 properties = self._parse_properties() 1785 create_token = self._match_set(self.CREATABLES) and self._prev 1786 1787 if not properties or not create_token: 1788 return self._parse_as_command(start) 1789 1790 concurrently = self._match_text_seq("CONCURRENTLY") 1791 exists = self._parse_exists(not_=True) 1792 this = None 1793 expression: t.Optional[exp.Expression] = None 1794 indexes = None 1795 no_schema_binding = None 1796 begin = None 1797 end = None 1798 clone = None 1799 1800 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1801 nonlocal properties 1802 if properties and temp_props: 1803 properties.expressions.extend(temp_props.expressions) 1804 elif temp_props: 1805 properties = temp_props 1806 1807 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1808 this = self._parse_user_defined_function(kind=create_token.token_type) 1809 1810 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1811 extend_props(self._parse_properties()) 1812 1813 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1814 extend_props(self._parse_properties()) 1815 1816 if not expression: 1817 if self._match(TokenType.COMMAND): 1818 expression = self._parse_as_command(self._prev) 1819 else: 1820 begin = self._match(TokenType.BEGIN) 1821 return_ = self._match_text_seq("RETURN") 1822 1823 if self._match(TokenType.STRING, advance=False): 1824 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1825 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1826 expression = self._parse_string() 1827 extend_props(self._parse_properties()) 1828 else: 1829 expression = self._parse_statement() 1830 1831 end = self._match_text_seq("END") 1832 1833 if return_: 1834 expression = self.expression(exp.Return, this=expression) 1835 elif create_token.token_type == TokenType.INDEX: 1836 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1837 if not self._match(TokenType.ON): 1838 index = self._parse_id_var() 1839 anonymous = False 1840 else: 1841 index = None 1842 anonymous = True 1843 1844 this = self._parse_index(index=index, anonymous=anonymous) 1845 elif create_token.token_type in self.DB_CREATABLES: 1846 table_parts = self._parse_table_parts( 1847 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1848 ) 1849 1850 # exp.Properties.Location.POST_NAME 1851 self._match(TokenType.COMMA) 1852 extend_props(self._parse_properties(before=True)) 1853 1854 this = self._parse_schema(this=table_parts) 1855 1856 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1857 extend_props(self._parse_properties()) 1858 1859 self._match(TokenType.ALIAS) 1860 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1861 # exp.Properties.Location.POST_ALIAS 1862 extend_props(self._parse_properties()) 1863 1864 if create_token.token_type == TokenType.SEQUENCE: 1865 expression = self._parse_types() 1866 extend_props(self._parse_properties()) 1867 else: 1868 expression = self._parse_ddl_select() 1869 1870 if create_token.token_type == TokenType.TABLE: 1871 # exp.Properties.Location.POST_EXPRESSION 1872 extend_props(self._parse_properties()) 1873 1874 indexes = [] 1875 while True: 1876 index = self._parse_index() 1877 1878 # exp.Properties.Location.POST_INDEX 1879 extend_props(self._parse_properties()) 1880 if not index: 1881 break 1882 else: 1883 self._match(TokenType.COMMA) 1884 indexes.append(index) 1885 elif create_token.token_type == TokenType.VIEW: 1886 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1887 no_schema_binding = True 1888 1889 shallow = self._match_text_seq("SHALLOW") 1890 1891 if self._match_texts(self.CLONE_KEYWORDS): 1892 copy = self._prev.text.lower() == "copy" 1893 clone = self.expression( 1894 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1895 ) 1896 1897 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1898 return self._parse_as_command(start) 1899 1900 create_kind_text = create_token.text.upper() 1901 return self.expression( 1902 exp.Create, 1903 comments=comments, 1904 this=this, 1905 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1906 replace=replace, 1907 refresh=refresh, 1908 unique=unique, 1909 expression=expression, 1910 exists=exists, 1911 properties=properties, 1912 indexes=indexes, 1913 no_schema_binding=no_schema_binding, 1914 begin=begin, 1915 end=end, 1916 clone=clone, 1917 concurrently=concurrently, 1918 clustered=clustered, 1919 ) 1920 1921 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1922 seq = exp.SequenceProperties() 1923 1924 options = [] 1925 index = self._index 1926 1927 while self._curr: 1928 self._match(TokenType.COMMA) 1929 if self._match_text_seq("INCREMENT"): 1930 self._match_text_seq("BY") 1931 self._match_text_seq("=") 1932 seq.set("increment", self._parse_term()) 1933 elif self._match_text_seq("MINVALUE"): 1934 seq.set("minvalue", self._parse_term()) 1935 elif self._match_text_seq("MAXVALUE"): 1936 seq.set("maxvalue", self._parse_term()) 1937 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1938 self._match_text_seq("=") 1939 seq.set("start", self._parse_term()) 1940 elif self._match_text_seq("CACHE"): 1941 # T-SQL allows empty CACHE which is initialized dynamically 1942 seq.set("cache", self._parse_number() or True) 1943 elif self._match_text_seq("OWNED", "BY"): 1944 # "OWNED BY NONE" is the default 1945 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1946 else: 1947 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1948 if opt: 1949 options.append(opt) 1950 else: 1951 break 1952 1953 seq.set("options", options if options else None) 1954 return None if self._index == index else seq 1955 1956 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1957 # only used for teradata currently 1958 self._match(TokenType.COMMA) 1959 1960 kwargs = { 1961 "no": self._match_text_seq("NO"), 1962 "dual": self._match_text_seq("DUAL"), 1963 "before": self._match_text_seq("BEFORE"), 1964 "default": self._match_text_seq("DEFAULT"), 1965 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1966 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1967 "after": self._match_text_seq("AFTER"), 1968 "minimum": self._match_texts(("MIN", "MINIMUM")), 1969 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1970 } 1971 1972 if self._match_texts(self.PROPERTY_PARSERS): 1973 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1974 try: 1975 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1976 except TypeError: 1977 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1978 1979 return None 1980 1981 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1982 return self._parse_wrapped_csv(self._parse_property) 1983 1984 def _parse_property(self) -> t.Optional[exp.Expression]: 1985 if self._match_texts(self.PROPERTY_PARSERS): 1986 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1987 1988 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1989 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1990 1991 if self._match_text_seq("COMPOUND", "SORTKEY"): 1992 return self._parse_sortkey(compound=True) 1993 1994 if self._match_text_seq("SQL", "SECURITY"): 1995 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1996 1997 index = self._index 1998 key = self._parse_column() 1999 2000 if not self._match(TokenType.EQ): 2001 self._retreat(index) 2002 return self._parse_sequence_properties() 2003 2004 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2005 if isinstance(key, exp.Column): 2006 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2007 2008 value = self._parse_bitwise() or self._parse_var(any_token=True) 2009 2010 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2011 if isinstance(value, exp.Column): 2012 value = exp.var(value.name) 2013 2014 return self.expression(exp.Property, this=key, value=value) 2015 2016 def _parse_stored(self) -> exp.FileFormatProperty: 2017 self._match(TokenType.ALIAS) 2018 2019 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2020 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2021 2022 return self.expression( 2023 exp.FileFormatProperty, 2024 this=( 2025 self.expression( 2026 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2027 ) 2028 if input_format or output_format 2029 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2030 ), 2031 ) 2032 2033 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2034 field = self._parse_field() 2035 if isinstance(field, exp.Identifier) and not field.quoted: 2036 field = exp.var(field) 2037 2038 return field 2039 2040 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2041 self._match(TokenType.EQ) 2042 self._match(TokenType.ALIAS) 2043 2044 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2045 2046 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2047 properties = [] 2048 while True: 2049 if before: 2050 prop = self._parse_property_before() 2051 else: 2052 prop = self._parse_property() 2053 if not prop: 2054 break 2055 for p in ensure_list(prop): 2056 properties.append(p) 2057 2058 if properties: 2059 return self.expression(exp.Properties, expressions=properties) 2060 2061 return None 2062 2063 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2064 return self.expression( 2065 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2066 ) 2067 2068 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2069 if self._match_texts(("DEFINER", "INVOKER")): 2070 security_specifier = self._prev.text.upper() 2071 return self.expression(exp.SecurityProperty, this=security_specifier) 2072 return None 2073 2074 def _parse_settings_property(self) -> exp.SettingsProperty: 2075 return self.expression( 2076 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2077 ) 2078 2079 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2080 if self._index >= 2: 2081 pre_volatile_token = self._tokens[self._index - 2] 2082 else: 2083 pre_volatile_token = None 2084 2085 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2086 return exp.VolatileProperty() 2087 2088 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2089 2090 def _parse_retention_period(self) -> exp.Var: 2091 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2092 number = self._parse_number() 2093 number_str = f"{number} " if number else "" 2094 unit = self._parse_var(any_token=True) 2095 return exp.var(f"{number_str}{unit}") 2096 2097 def _parse_system_versioning_property( 2098 self, with_: bool = False 2099 ) -> exp.WithSystemVersioningProperty: 2100 self._match(TokenType.EQ) 2101 prop = self.expression( 2102 exp.WithSystemVersioningProperty, 2103 **{ # type: ignore 2104 "on": True, 2105 "with": with_, 2106 }, 2107 ) 2108 2109 if self._match_text_seq("OFF"): 2110 prop.set("on", False) 2111 return prop 2112 2113 self._match(TokenType.ON) 2114 if self._match(TokenType.L_PAREN): 2115 while self._curr and not self._match(TokenType.R_PAREN): 2116 if self._match_text_seq("HISTORY_TABLE", "="): 2117 prop.set("this", self._parse_table_parts()) 2118 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2119 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2120 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2121 prop.set("retention_period", self._parse_retention_period()) 2122 2123 self._match(TokenType.COMMA) 2124 2125 return prop 2126 2127 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2128 self._match(TokenType.EQ) 2129 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2130 prop = self.expression(exp.DataDeletionProperty, on=on) 2131 2132 if self._match(TokenType.L_PAREN): 2133 while self._curr and not self._match(TokenType.R_PAREN): 2134 if self._match_text_seq("FILTER_COLUMN", "="): 2135 prop.set("filter_column", self._parse_column()) 2136 elif self._match_text_seq("RETENTION_PERIOD", "="): 2137 prop.set("retention_period", self._parse_retention_period()) 2138 2139 self._match(TokenType.COMMA) 2140 2141 return prop 2142 2143 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2144 kind = "HASH" 2145 expressions: t.Optional[t.List[exp.Expression]] = None 2146 if self._match_text_seq("BY", "HASH"): 2147 expressions = self._parse_wrapped_csv(self._parse_id_var) 2148 elif self._match_text_seq("BY", "RANDOM"): 2149 kind = "RANDOM" 2150 2151 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2152 buckets: t.Optional[exp.Expression] = None 2153 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2154 buckets = self._parse_number() 2155 2156 return self.expression( 2157 exp.DistributedByProperty, 2158 expressions=expressions, 2159 kind=kind, 2160 buckets=buckets, 2161 order=self._parse_order(), 2162 ) 2163 2164 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2165 self._match_text_seq("KEY") 2166 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2167 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2168 2169 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2170 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2171 prop = self._parse_system_versioning_property(with_=True) 2172 self._match_r_paren() 2173 return prop 2174 2175 if self._match(TokenType.L_PAREN, advance=False): 2176 return self._parse_wrapped_properties() 2177 2178 if self._match_text_seq("JOURNAL"): 2179 return self._parse_withjournaltable() 2180 2181 if self._match_texts(self.VIEW_ATTRIBUTES): 2182 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2183 2184 if self._match_text_seq("DATA"): 2185 return self._parse_withdata(no=False) 2186 elif self._match_text_seq("NO", "DATA"): 2187 return self._parse_withdata(no=True) 2188 2189 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2190 return self._parse_serde_properties(with_=True) 2191 2192 if self._match(TokenType.SCHEMA): 2193 return self.expression( 2194 exp.WithSchemaBindingProperty, 2195 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2196 ) 2197 2198 if not self._next: 2199 return None 2200 2201 return self._parse_withisolatedloading() 2202 2203 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2204 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2205 self._match(TokenType.EQ) 2206 2207 user = self._parse_id_var() 2208 self._match(TokenType.PARAMETER) 2209 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2210 2211 if not user or not host: 2212 return None 2213 2214 return exp.DefinerProperty(this=f"{user}@{host}") 2215 2216 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2217 self._match(TokenType.TABLE) 2218 self._match(TokenType.EQ) 2219 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2220 2221 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2222 return self.expression(exp.LogProperty, no=no) 2223 2224 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2225 return self.expression(exp.JournalProperty, **kwargs) 2226 2227 def _parse_checksum(self) -> exp.ChecksumProperty: 2228 self._match(TokenType.EQ) 2229 2230 on = None 2231 if self._match(TokenType.ON): 2232 on = True 2233 elif self._match_text_seq("OFF"): 2234 on = False 2235 2236 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2237 2238 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2239 return self.expression( 2240 exp.Cluster, 2241 expressions=( 2242 self._parse_wrapped_csv(self._parse_ordered) 2243 if wrapped 2244 else self._parse_csv(self._parse_ordered) 2245 ), 2246 ) 2247 2248 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2249 self._match_text_seq("BY") 2250 2251 self._match_l_paren() 2252 expressions = self._parse_csv(self._parse_column) 2253 self._match_r_paren() 2254 2255 if self._match_text_seq("SORTED", "BY"): 2256 self._match_l_paren() 2257 sorted_by = self._parse_csv(self._parse_ordered) 2258 self._match_r_paren() 2259 else: 2260 sorted_by = None 2261 2262 self._match(TokenType.INTO) 2263 buckets = self._parse_number() 2264 self._match_text_seq("BUCKETS") 2265 2266 return self.expression( 2267 exp.ClusteredByProperty, 2268 expressions=expressions, 2269 sorted_by=sorted_by, 2270 buckets=buckets, 2271 ) 2272 2273 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2274 if not self._match_text_seq("GRANTS"): 2275 self._retreat(self._index - 1) 2276 return None 2277 2278 return self.expression(exp.CopyGrantsProperty) 2279 2280 def _parse_freespace(self) -> exp.FreespaceProperty: 2281 self._match(TokenType.EQ) 2282 return self.expression( 2283 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2284 ) 2285 2286 def _parse_mergeblockratio( 2287 self, no: bool = False, default: bool = False 2288 ) -> exp.MergeBlockRatioProperty: 2289 if self._match(TokenType.EQ): 2290 return self.expression( 2291 exp.MergeBlockRatioProperty, 2292 this=self._parse_number(), 2293 percent=self._match(TokenType.PERCENT), 2294 ) 2295 2296 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2297 2298 def _parse_datablocksize( 2299 self, 2300 default: t.Optional[bool] = None, 2301 minimum: t.Optional[bool] = None, 2302 maximum: t.Optional[bool] = None, 2303 ) -> exp.DataBlocksizeProperty: 2304 self._match(TokenType.EQ) 2305 size = self._parse_number() 2306 2307 units = None 2308 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2309 units = self._prev.text 2310 2311 return self.expression( 2312 exp.DataBlocksizeProperty, 2313 size=size, 2314 units=units, 2315 default=default, 2316 minimum=minimum, 2317 maximum=maximum, 2318 ) 2319 2320 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2321 self._match(TokenType.EQ) 2322 always = self._match_text_seq("ALWAYS") 2323 manual = self._match_text_seq("MANUAL") 2324 never = self._match_text_seq("NEVER") 2325 default = self._match_text_seq("DEFAULT") 2326 2327 autotemp = None 2328 if self._match_text_seq("AUTOTEMP"): 2329 autotemp = self._parse_schema() 2330 2331 return self.expression( 2332 exp.BlockCompressionProperty, 2333 always=always, 2334 manual=manual, 2335 never=never, 2336 default=default, 2337 autotemp=autotemp, 2338 ) 2339 2340 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2341 index = self._index 2342 no = self._match_text_seq("NO") 2343 concurrent = self._match_text_seq("CONCURRENT") 2344 2345 if not self._match_text_seq("ISOLATED", "LOADING"): 2346 self._retreat(index) 2347 return None 2348 2349 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2350 return self.expression( 2351 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2352 ) 2353 2354 def _parse_locking(self) -> exp.LockingProperty: 2355 if self._match(TokenType.TABLE): 2356 kind = "TABLE" 2357 elif self._match(TokenType.VIEW): 2358 kind = "VIEW" 2359 elif self._match(TokenType.ROW): 2360 kind = "ROW" 2361 elif self._match_text_seq("DATABASE"): 2362 kind = "DATABASE" 2363 else: 2364 kind = None 2365 2366 if kind in ("DATABASE", "TABLE", "VIEW"): 2367 this = self._parse_table_parts() 2368 else: 2369 this = None 2370 2371 if self._match(TokenType.FOR): 2372 for_or_in = "FOR" 2373 elif self._match(TokenType.IN): 2374 for_or_in = "IN" 2375 else: 2376 for_or_in = None 2377 2378 if self._match_text_seq("ACCESS"): 2379 lock_type = "ACCESS" 2380 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2381 lock_type = "EXCLUSIVE" 2382 elif self._match_text_seq("SHARE"): 2383 lock_type = "SHARE" 2384 elif self._match_text_seq("READ"): 2385 lock_type = "READ" 2386 elif self._match_text_seq("WRITE"): 2387 lock_type = "WRITE" 2388 elif self._match_text_seq("CHECKSUM"): 2389 lock_type = "CHECKSUM" 2390 else: 2391 lock_type = None 2392 2393 override = self._match_text_seq("OVERRIDE") 2394 2395 return self.expression( 2396 exp.LockingProperty, 2397 this=this, 2398 kind=kind, 2399 for_or_in=for_or_in, 2400 lock_type=lock_type, 2401 override=override, 2402 ) 2403 2404 def _parse_partition_by(self) -> t.List[exp.Expression]: 2405 if self._match(TokenType.PARTITION_BY): 2406 return self._parse_csv(self._parse_assignment) 2407 return [] 2408 2409 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2410 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2411 if self._match_text_seq("MINVALUE"): 2412 return exp.var("MINVALUE") 2413 if self._match_text_seq("MAXVALUE"): 2414 return exp.var("MAXVALUE") 2415 return self._parse_bitwise() 2416 2417 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2418 expression = None 2419 from_expressions = None 2420 to_expressions = None 2421 2422 if self._match(TokenType.IN): 2423 this = self._parse_wrapped_csv(self._parse_bitwise) 2424 elif self._match(TokenType.FROM): 2425 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2426 self._match_text_seq("TO") 2427 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2428 elif self._match_text_seq("WITH", "(", "MODULUS"): 2429 this = self._parse_number() 2430 self._match_text_seq(",", "REMAINDER") 2431 expression = self._parse_number() 2432 self._match_r_paren() 2433 else: 2434 self.raise_error("Failed to parse partition bound spec.") 2435 2436 return self.expression( 2437 exp.PartitionBoundSpec, 2438 this=this, 2439 expression=expression, 2440 from_expressions=from_expressions, 2441 to_expressions=to_expressions, 2442 ) 2443 2444 # https://www.postgresql.org/docs/current/sql-createtable.html 2445 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2446 if not self._match_text_seq("OF"): 2447 self._retreat(self._index - 1) 2448 return None 2449 2450 this = self._parse_table(schema=True) 2451 2452 if self._match(TokenType.DEFAULT): 2453 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2454 elif self._match_text_seq("FOR", "VALUES"): 2455 expression = self._parse_partition_bound_spec() 2456 else: 2457 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2458 2459 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2460 2461 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2462 self._match(TokenType.EQ) 2463 return self.expression( 2464 exp.PartitionedByProperty, 2465 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2466 ) 2467 2468 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2469 if self._match_text_seq("AND", "STATISTICS"): 2470 statistics = True 2471 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2472 statistics = False 2473 else: 2474 statistics = None 2475 2476 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2477 2478 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2479 if self._match_text_seq("SQL"): 2480 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2481 return None 2482 2483 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2484 if self._match_text_seq("SQL", "DATA"): 2485 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2486 return None 2487 2488 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2489 if self._match_text_seq("PRIMARY", "INDEX"): 2490 return exp.NoPrimaryIndexProperty() 2491 if self._match_text_seq("SQL"): 2492 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2493 return None 2494 2495 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2496 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2497 return exp.OnCommitProperty() 2498 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2499 return exp.OnCommitProperty(delete=True) 2500 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2501 2502 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2503 if self._match_text_seq("SQL", "DATA"): 2504 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2505 return None 2506 2507 def _parse_distkey(self) -> exp.DistKeyProperty: 2508 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2509 2510 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2511 table = self._parse_table(schema=True) 2512 2513 options = [] 2514 while self._match_texts(("INCLUDING", "EXCLUDING")): 2515 this = self._prev.text.upper() 2516 2517 id_var = self._parse_id_var() 2518 if not id_var: 2519 return None 2520 2521 options.append( 2522 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2523 ) 2524 2525 return self.expression(exp.LikeProperty, this=table, expressions=options) 2526 2527 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2528 return self.expression( 2529 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2530 ) 2531 2532 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2533 self._match(TokenType.EQ) 2534 return self.expression( 2535 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2536 ) 2537 2538 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2539 self._match_text_seq("WITH", "CONNECTION") 2540 return self.expression( 2541 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2542 ) 2543 2544 def _parse_returns(self) -> exp.ReturnsProperty: 2545 value: t.Optional[exp.Expression] 2546 null = None 2547 is_table = self._match(TokenType.TABLE) 2548 2549 if is_table: 2550 if self._match(TokenType.LT): 2551 value = self.expression( 2552 exp.Schema, 2553 this="TABLE", 2554 expressions=self._parse_csv(self._parse_struct_types), 2555 ) 2556 if not self._match(TokenType.GT): 2557 self.raise_error("Expecting >") 2558 else: 2559 value = self._parse_schema(exp.var("TABLE")) 2560 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2561 null = True 2562 value = None 2563 else: 2564 value = self._parse_types() 2565 2566 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2567 2568 def _parse_describe(self) -> exp.Describe: 2569 kind = self._match_set(self.CREATABLES) and self._prev.text 2570 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2571 if self._match(TokenType.DOT): 2572 style = None 2573 self._retreat(self._index - 2) 2574 this = self._parse_table(schema=True) 2575 properties = self._parse_properties() 2576 expressions = properties.expressions if properties else None 2577 partition = self._parse_partition() 2578 return self.expression( 2579 exp.Describe, 2580 this=this, 2581 style=style, 2582 kind=kind, 2583 expressions=expressions, 2584 partition=partition, 2585 ) 2586 2587 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2588 kind = self._prev.text.upper() 2589 expressions = [] 2590 2591 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2592 if self._match(TokenType.WHEN): 2593 expression = self._parse_disjunction() 2594 self._match(TokenType.THEN) 2595 else: 2596 expression = None 2597 2598 else_ = self._match(TokenType.ELSE) 2599 2600 if not self._match(TokenType.INTO): 2601 return None 2602 2603 return self.expression( 2604 exp.ConditionalInsert, 2605 this=self.expression( 2606 exp.Insert, 2607 this=self._parse_table(schema=True), 2608 expression=self._parse_derived_table_values(), 2609 ), 2610 expression=expression, 2611 else_=else_, 2612 ) 2613 2614 expression = parse_conditional_insert() 2615 while expression is not None: 2616 expressions.append(expression) 2617 expression = parse_conditional_insert() 2618 2619 return self.expression( 2620 exp.MultitableInserts, 2621 kind=kind, 2622 comments=comments, 2623 expressions=expressions, 2624 source=self._parse_table(), 2625 ) 2626 2627 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2628 comments = ensure_list(self._prev_comments) 2629 hint = self._parse_hint() 2630 overwrite = self._match(TokenType.OVERWRITE) 2631 ignore = self._match(TokenType.IGNORE) 2632 local = self._match_text_seq("LOCAL") 2633 alternative = None 2634 is_function = None 2635 2636 if self._match_text_seq("DIRECTORY"): 2637 this: t.Optional[exp.Expression] = self.expression( 2638 exp.Directory, 2639 this=self._parse_var_or_string(), 2640 local=local, 2641 row_format=self._parse_row_format(match_row=True), 2642 ) 2643 else: 2644 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2645 comments += ensure_list(self._prev_comments) 2646 return self._parse_multitable_inserts(comments) 2647 2648 if self._match(TokenType.OR): 2649 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2650 2651 self._match(TokenType.INTO) 2652 comments += ensure_list(self._prev_comments) 2653 self._match(TokenType.TABLE) 2654 is_function = self._match(TokenType.FUNCTION) 2655 2656 this = ( 2657 self._parse_table(schema=True, parse_partition=True) 2658 if not is_function 2659 else self._parse_function() 2660 ) 2661 2662 returning = self._parse_returning() 2663 2664 return self.expression( 2665 exp.Insert, 2666 comments=comments, 2667 hint=hint, 2668 is_function=is_function, 2669 this=this, 2670 stored=self._match_text_seq("STORED") and self._parse_stored(), 2671 by_name=self._match_text_seq("BY", "NAME"), 2672 exists=self._parse_exists(), 2673 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2674 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2675 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2676 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2677 conflict=self._parse_on_conflict(), 2678 returning=returning or self._parse_returning(), 2679 overwrite=overwrite, 2680 alternative=alternative, 2681 ignore=ignore, 2682 source=self._match(TokenType.TABLE) and self._parse_table(), 2683 ) 2684 2685 def _parse_kill(self) -> exp.Kill: 2686 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2687 2688 return self.expression( 2689 exp.Kill, 2690 this=self._parse_primary(), 2691 kind=kind, 2692 ) 2693 2694 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2695 conflict = self._match_text_seq("ON", "CONFLICT") 2696 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2697 2698 if not conflict and not duplicate: 2699 return None 2700 2701 conflict_keys = None 2702 constraint = None 2703 2704 if conflict: 2705 if self._match_text_seq("ON", "CONSTRAINT"): 2706 constraint = self._parse_id_var() 2707 elif self._match(TokenType.L_PAREN): 2708 conflict_keys = self._parse_csv(self._parse_id_var) 2709 self._match_r_paren() 2710 2711 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2712 if self._prev.token_type == TokenType.UPDATE: 2713 self._match(TokenType.SET) 2714 expressions = self._parse_csv(self._parse_equality) 2715 else: 2716 expressions = None 2717 2718 return self.expression( 2719 exp.OnConflict, 2720 duplicate=duplicate, 2721 expressions=expressions, 2722 action=action, 2723 conflict_keys=conflict_keys, 2724 constraint=constraint, 2725 ) 2726 2727 def _parse_returning(self) -> t.Optional[exp.Returning]: 2728 if not self._match(TokenType.RETURNING): 2729 return None 2730 return self.expression( 2731 exp.Returning, 2732 expressions=self._parse_csv(self._parse_expression), 2733 into=self._match(TokenType.INTO) and self._parse_table_part(), 2734 ) 2735 2736 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2737 if not self._match(TokenType.FORMAT): 2738 return None 2739 return self._parse_row_format() 2740 2741 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2742 index = self._index 2743 with_ = with_ or self._match_text_seq("WITH") 2744 2745 if not self._match(TokenType.SERDE_PROPERTIES): 2746 self._retreat(index) 2747 return None 2748 return self.expression( 2749 exp.SerdeProperties, 2750 **{ # type: ignore 2751 "expressions": self._parse_wrapped_properties(), 2752 "with": with_, 2753 }, 2754 ) 2755 2756 def _parse_row_format( 2757 self, match_row: bool = False 2758 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2759 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2760 return None 2761 2762 if self._match_text_seq("SERDE"): 2763 this = self._parse_string() 2764 2765 serde_properties = self._parse_serde_properties() 2766 2767 return self.expression( 2768 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2769 ) 2770 2771 self._match_text_seq("DELIMITED") 2772 2773 kwargs = {} 2774 2775 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2776 kwargs["fields"] = self._parse_string() 2777 if self._match_text_seq("ESCAPED", "BY"): 2778 kwargs["escaped"] = self._parse_string() 2779 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2780 kwargs["collection_items"] = self._parse_string() 2781 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2782 kwargs["map_keys"] = self._parse_string() 2783 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2784 kwargs["lines"] = self._parse_string() 2785 if self._match_text_seq("NULL", "DEFINED", "AS"): 2786 kwargs["null"] = self._parse_string() 2787 2788 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2789 2790 def _parse_load(self) -> exp.LoadData | exp.Command: 2791 if self._match_text_seq("DATA"): 2792 local = self._match_text_seq("LOCAL") 2793 self._match_text_seq("INPATH") 2794 inpath = self._parse_string() 2795 overwrite = self._match(TokenType.OVERWRITE) 2796 self._match_pair(TokenType.INTO, TokenType.TABLE) 2797 2798 return self.expression( 2799 exp.LoadData, 2800 this=self._parse_table(schema=True), 2801 local=local, 2802 overwrite=overwrite, 2803 inpath=inpath, 2804 partition=self._parse_partition(), 2805 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2806 serde=self._match_text_seq("SERDE") and self._parse_string(), 2807 ) 2808 return self._parse_as_command(self._prev) 2809 2810 def _parse_delete(self) -> exp.Delete: 2811 # This handles MySQL's "Multiple-Table Syntax" 2812 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2813 tables = None 2814 comments = self._prev_comments 2815 if not self._match(TokenType.FROM, advance=False): 2816 tables = self._parse_csv(self._parse_table) or None 2817 2818 returning = self._parse_returning() 2819 2820 return self.expression( 2821 exp.Delete, 2822 comments=comments, 2823 tables=tables, 2824 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2825 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2826 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2827 where=self._parse_where(), 2828 returning=returning or self._parse_returning(), 2829 limit=self._parse_limit(), 2830 ) 2831 2832 def _parse_update(self) -> exp.Update: 2833 comments = self._prev_comments 2834 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2835 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2836 returning = self._parse_returning() 2837 return self.expression( 2838 exp.Update, 2839 comments=comments, 2840 **{ # type: ignore 2841 "this": this, 2842 "expressions": expressions, 2843 "from": self._parse_from(joins=True), 2844 "where": self._parse_where(), 2845 "returning": returning or self._parse_returning(), 2846 "order": self._parse_order(), 2847 "limit": self._parse_limit(), 2848 }, 2849 ) 2850 2851 def _parse_uncache(self) -> exp.Uncache: 2852 if not self._match(TokenType.TABLE): 2853 self.raise_error("Expecting TABLE after UNCACHE") 2854 2855 return self.expression( 2856 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2857 ) 2858 2859 def _parse_cache(self) -> exp.Cache: 2860 lazy = self._match_text_seq("LAZY") 2861 self._match(TokenType.TABLE) 2862 table = self._parse_table(schema=True) 2863 2864 options = [] 2865 if self._match_text_seq("OPTIONS"): 2866 self._match_l_paren() 2867 k = self._parse_string() 2868 self._match(TokenType.EQ) 2869 v = self._parse_string() 2870 options = [k, v] 2871 self._match_r_paren() 2872 2873 self._match(TokenType.ALIAS) 2874 return self.expression( 2875 exp.Cache, 2876 this=table, 2877 lazy=lazy, 2878 options=options, 2879 expression=self._parse_select(nested=True), 2880 ) 2881 2882 def _parse_partition(self) -> t.Optional[exp.Partition]: 2883 if not self._match(TokenType.PARTITION): 2884 return None 2885 2886 return self.expression( 2887 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2888 ) 2889 2890 def _parse_value(self) -> t.Optional[exp.Tuple]: 2891 if self._match(TokenType.L_PAREN): 2892 expressions = self._parse_csv(self._parse_expression) 2893 self._match_r_paren() 2894 return self.expression(exp.Tuple, expressions=expressions) 2895 2896 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2897 expression = self._parse_expression() 2898 if expression: 2899 return self.expression(exp.Tuple, expressions=[expression]) 2900 return None 2901 2902 def _parse_projections(self) -> t.List[exp.Expression]: 2903 return self._parse_expressions() 2904 2905 def _parse_select( 2906 self, 2907 nested: bool = False, 2908 table: bool = False, 2909 parse_subquery_alias: bool = True, 2910 parse_set_operation: bool = True, 2911 ) -> t.Optional[exp.Expression]: 2912 cte = self._parse_with() 2913 2914 if cte: 2915 this = self._parse_statement() 2916 2917 if not this: 2918 self.raise_error("Failed to parse any statement following CTE") 2919 return cte 2920 2921 if "with" in this.arg_types: 2922 this.set("with", cte) 2923 else: 2924 self.raise_error(f"{this.key} does not support CTE") 2925 this = cte 2926 2927 return this 2928 2929 # duckdb supports leading with FROM x 2930 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2931 2932 if self._match(TokenType.SELECT): 2933 comments = self._prev_comments 2934 2935 hint = self._parse_hint() 2936 2937 if self._next and not self._next.token_type == TokenType.DOT: 2938 all_ = self._match(TokenType.ALL) 2939 distinct = self._match_set(self.DISTINCT_TOKENS) 2940 else: 2941 all_, distinct = None, None 2942 2943 kind = ( 2944 self._match(TokenType.ALIAS) 2945 and self._match_texts(("STRUCT", "VALUE")) 2946 and self._prev.text.upper() 2947 ) 2948 2949 if distinct: 2950 distinct = self.expression( 2951 exp.Distinct, 2952 on=self._parse_value() if self._match(TokenType.ON) else None, 2953 ) 2954 2955 if all_ and distinct: 2956 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2957 2958 limit = self._parse_limit(top=True) 2959 projections = self._parse_projections() 2960 2961 this = self.expression( 2962 exp.Select, 2963 kind=kind, 2964 hint=hint, 2965 distinct=distinct, 2966 expressions=projections, 2967 limit=limit, 2968 ) 2969 this.comments = comments 2970 2971 into = self._parse_into() 2972 if into: 2973 this.set("into", into) 2974 2975 if not from_: 2976 from_ = self._parse_from() 2977 2978 if from_: 2979 this.set("from", from_) 2980 2981 this = self._parse_query_modifiers(this) 2982 elif (table or nested) and self._match(TokenType.L_PAREN): 2983 if self._match(TokenType.PIVOT): 2984 this = self._parse_simplified_pivot() 2985 elif self._match(TokenType.FROM): 2986 this = exp.select("*").from_( 2987 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2988 ) 2989 else: 2990 this = ( 2991 self._parse_table() 2992 if table 2993 else self._parse_select(nested=True, parse_set_operation=False) 2994 ) 2995 2996 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 2997 # in case a modifier (e.g. join) is following 2998 if table and isinstance(this, exp.Values) and this.alias: 2999 alias = this.args["alias"].pop() 3000 this = exp.Table(this=this, alias=alias) 3001 3002 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3003 3004 self._match_r_paren() 3005 3006 # We return early here so that the UNION isn't attached to the subquery by the 3007 # following call to _parse_set_operations, but instead becomes the parent node 3008 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3009 elif self._match(TokenType.VALUES, advance=False): 3010 this = self._parse_derived_table_values() 3011 elif from_: 3012 this = exp.select("*").from_(from_.this, copy=False) 3013 elif self._match(TokenType.SUMMARIZE): 3014 table = self._match(TokenType.TABLE) 3015 this = self._parse_select() or self._parse_string() or self._parse_table() 3016 return self.expression(exp.Summarize, this=this, table=table) 3017 elif self._match(TokenType.DESCRIBE): 3018 this = self._parse_describe() 3019 elif self._match_text_seq("STREAM"): 3020 this = self.expression(exp.Stream, this=self._parse_function()) 3021 else: 3022 this = None 3023 3024 return self._parse_set_operations(this) if parse_set_operation else this 3025 3026 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3027 if not skip_with_token and not self._match(TokenType.WITH): 3028 return None 3029 3030 comments = self._prev_comments 3031 recursive = self._match(TokenType.RECURSIVE) 3032 3033 last_comments = None 3034 expressions = [] 3035 while True: 3036 expressions.append(self._parse_cte()) 3037 if last_comments: 3038 expressions[-1].add_comments(last_comments) 3039 3040 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3041 break 3042 else: 3043 self._match(TokenType.WITH) 3044 3045 last_comments = self._prev_comments 3046 3047 return self.expression( 3048 exp.With, comments=comments, expressions=expressions, recursive=recursive 3049 ) 3050 3051 def _parse_cte(self) -> exp.CTE: 3052 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3053 if not alias or not alias.this: 3054 self.raise_error("Expected CTE to have alias") 3055 3056 self._match(TokenType.ALIAS) 3057 comments = self._prev_comments 3058 3059 if self._match_text_seq("NOT", "MATERIALIZED"): 3060 materialized = False 3061 elif self._match_text_seq("MATERIALIZED"): 3062 materialized = True 3063 else: 3064 materialized = None 3065 3066 return self.expression( 3067 exp.CTE, 3068 this=self._parse_wrapped(self._parse_statement), 3069 alias=alias, 3070 materialized=materialized, 3071 comments=comments, 3072 ) 3073 3074 def _parse_table_alias( 3075 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3076 ) -> t.Optional[exp.TableAlias]: 3077 any_token = self._match(TokenType.ALIAS) 3078 alias = ( 3079 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3080 or self._parse_string_as_identifier() 3081 ) 3082 3083 index = self._index 3084 if self._match(TokenType.L_PAREN): 3085 columns = self._parse_csv(self._parse_function_parameter) 3086 self._match_r_paren() if columns else self._retreat(index) 3087 else: 3088 columns = None 3089 3090 if not alias and not columns: 3091 return None 3092 3093 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3094 3095 # We bubble up comments from the Identifier to the TableAlias 3096 if isinstance(alias, exp.Identifier): 3097 table_alias.add_comments(alias.pop_comments()) 3098 3099 return table_alias 3100 3101 def _parse_subquery( 3102 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3103 ) -> t.Optional[exp.Subquery]: 3104 if not this: 3105 return None 3106 3107 return self.expression( 3108 exp.Subquery, 3109 this=this, 3110 pivots=self._parse_pivots(), 3111 alias=self._parse_table_alias() if parse_alias else None, 3112 sample=self._parse_table_sample(), 3113 ) 3114 3115 def _implicit_unnests_to_explicit(self, this: E) -> E: 3116 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3117 3118 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3119 for i, join in enumerate(this.args.get("joins") or []): 3120 table = join.this 3121 normalized_table = table.copy() 3122 normalized_table.meta["maybe_column"] = True 3123 normalized_table = _norm(normalized_table, dialect=self.dialect) 3124 3125 if isinstance(table, exp.Table) and not join.args.get("on"): 3126 if normalized_table.parts[0].name in refs: 3127 table_as_column = table.to_column() 3128 unnest = exp.Unnest(expressions=[table_as_column]) 3129 3130 # Table.to_column creates a parent Alias node that we want to convert to 3131 # a TableAlias and attach to the Unnest, so it matches the parser's output 3132 if isinstance(table.args.get("alias"), exp.TableAlias): 3133 table_as_column.replace(table_as_column.this) 3134 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3135 3136 table.replace(unnest) 3137 3138 refs.add(normalized_table.alias_or_name) 3139 3140 return this 3141 3142 def _parse_query_modifiers( 3143 self, this: t.Optional[exp.Expression] 3144 ) -> t.Optional[exp.Expression]: 3145 if isinstance(this, (exp.Query, exp.Table)): 3146 for join in self._parse_joins(): 3147 this.append("joins", join) 3148 for lateral in iter(self._parse_lateral, None): 3149 this.append("laterals", lateral) 3150 3151 while True: 3152 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3153 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3154 key, expression = parser(self) 3155 3156 if expression: 3157 this.set(key, expression) 3158 if key == "limit": 3159 offset = expression.args.pop("offset", None) 3160 3161 if offset: 3162 offset = exp.Offset(expression=offset) 3163 this.set("offset", offset) 3164 3165 limit_by_expressions = expression.expressions 3166 expression.set("expressions", None) 3167 offset.set("expressions", limit_by_expressions) 3168 continue 3169 break 3170 3171 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3172 this = self._implicit_unnests_to_explicit(this) 3173 3174 return this 3175 3176 def _parse_hint(self) -> t.Optional[exp.Hint]: 3177 if self._match(TokenType.HINT): 3178 hints = [] 3179 for hint in iter( 3180 lambda: self._parse_csv( 3181 lambda: self._parse_function() or self._parse_var(upper=True) 3182 ), 3183 [], 3184 ): 3185 hints.extend(hint) 3186 3187 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3188 self.raise_error("Expected */ after HINT") 3189 3190 return self.expression(exp.Hint, expressions=hints) 3191 3192 return None 3193 3194 def _parse_into(self) -> t.Optional[exp.Into]: 3195 if not self._match(TokenType.INTO): 3196 return None 3197 3198 temp = self._match(TokenType.TEMPORARY) 3199 unlogged = self._match_text_seq("UNLOGGED") 3200 self._match(TokenType.TABLE) 3201 3202 return self.expression( 3203 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3204 ) 3205 3206 def _parse_from( 3207 self, joins: bool = False, skip_from_token: bool = False 3208 ) -> t.Optional[exp.From]: 3209 if not skip_from_token and not self._match(TokenType.FROM): 3210 return None 3211 3212 return self.expression( 3213 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3214 ) 3215 3216 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3217 return self.expression( 3218 exp.MatchRecognizeMeasure, 3219 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3220 this=self._parse_expression(), 3221 ) 3222 3223 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3224 if not self._match(TokenType.MATCH_RECOGNIZE): 3225 return None 3226 3227 self._match_l_paren() 3228 3229 partition = self._parse_partition_by() 3230 order = self._parse_order() 3231 3232 measures = ( 3233 self._parse_csv(self._parse_match_recognize_measure) 3234 if self._match_text_seq("MEASURES") 3235 else None 3236 ) 3237 3238 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3239 rows = exp.var("ONE ROW PER MATCH") 3240 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3241 text = "ALL ROWS PER MATCH" 3242 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3243 text += " SHOW EMPTY MATCHES" 3244 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3245 text += " OMIT EMPTY MATCHES" 3246 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3247 text += " WITH UNMATCHED ROWS" 3248 rows = exp.var(text) 3249 else: 3250 rows = None 3251 3252 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3253 text = "AFTER MATCH SKIP" 3254 if self._match_text_seq("PAST", "LAST", "ROW"): 3255 text += " PAST LAST ROW" 3256 elif self._match_text_seq("TO", "NEXT", "ROW"): 3257 text += " TO NEXT ROW" 3258 elif self._match_text_seq("TO", "FIRST"): 3259 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3260 elif self._match_text_seq("TO", "LAST"): 3261 text += f" TO LAST {self._advance_any().text}" # type: ignore 3262 after = exp.var(text) 3263 else: 3264 after = None 3265 3266 if self._match_text_seq("PATTERN"): 3267 self._match_l_paren() 3268 3269 if not self._curr: 3270 self.raise_error("Expecting )", self._curr) 3271 3272 paren = 1 3273 start = self._curr 3274 3275 while self._curr and paren > 0: 3276 if self._curr.token_type == TokenType.L_PAREN: 3277 paren += 1 3278 if self._curr.token_type == TokenType.R_PAREN: 3279 paren -= 1 3280 3281 end = self._prev 3282 self._advance() 3283 3284 if paren > 0: 3285 self.raise_error("Expecting )", self._curr) 3286 3287 pattern = exp.var(self._find_sql(start, end)) 3288 else: 3289 pattern = None 3290 3291 define = ( 3292 self._parse_csv(self._parse_name_as_expression) 3293 if self._match_text_seq("DEFINE") 3294 else None 3295 ) 3296 3297 self._match_r_paren() 3298 3299 return self.expression( 3300 exp.MatchRecognize, 3301 partition_by=partition, 3302 order=order, 3303 measures=measures, 3304 rows=rows, 3305 after=after, 3306 pattern=pattern, 3307 define=define, 3308 alias=self._parse_table_alias(), 3309 ) 3310 3311 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3312 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3313 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3314 cross_apply = False 3315 3316 if cross_apply is not None: 3317 this = self._parse_select(table=True) 3318 view = None 3319 outer = None 3320 elif self._match(TokenType.LATERAL): 3321 this = self._parse_select(table=True) 3322 view = self._match(TokenType.VIEW) 3323 outer = self._match(TokenType.OUTER) 3324 else: 3325 return None 3326 3327 if not this: 3328 this = ( 3329 self._parse_unnest() 3330 or self._parse_function() 3331 or self._parse_id_var(any_token=False) 3332 ) 3333 3334 while self._match(TokenType.DOT): 3335 this = exp.Dot( 3336 this=this, 3337 expression=self._parse_function() or self._parse_id_var(any_token=False), 3338 ) 3339 3340 if view: 3341 table = self._parse_id_var(any_token=False) 3342 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3343 table_alias: t.Optional[exp.TableAlias] = self.expression( 3344 exp.TableAlias, this=table, columns=columns 3345 ) 3346 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3347 # We move the alias from the lateral's child node to the lateral itself 3348 table_alias = this.args["alias"].pop() 3349 else: 3350 table_alias = self._parse_table_alias() 3351 3352 return self.expression( 3353 exp.Lateral, 3354 this=this, 3355 view=view, 3356 outer=outer, 3357 alias=table_alias, 3358 cross_apply=cross_apply, 3359 ) 3360 3361 def _parse_join_parts( 3362 self, 3363 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3364 return ( 3365 self._match_set(self.JOIN_METHODS) and self._prev, 3366 self._match_set(self.JOIN_SIDES) and self._prev, 3367 self._match_set(self.JOIN_KINDS) and self._prev, 3368 ) 3369 3370 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3371 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3372 this = self._parse_column() 3373 if isinstance(this, exp.Column): 3374 return this.this 3375 return this 3376 3377 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3378 3379 def _parse_join( 3380 self, skip_join_token: bool = False, parse_bracket: bool = False 3381 ) -> t.Optional[exp.Join]: 3382 if self._match(TokenType.COMMA): 3383 return self.expression(exp.Join, this=self._parse_table()) 3384 3385 index = self._index 3386 method, side, kind = self._parse_join_parts() 3387 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3388 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3389 3390 if not skip_join_token and not join: 3391 self._retreat(index) 3392 kind = None 3393 method = None 3394 side = None 3395 3396 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3397 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3398 3399 if not skip_join_token and not join and not outer_apply and not cross_apply: 3400 return None 3401 3402 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3403 3404 if method: 3405 kwargs["method"] = method.text 3406 if side: 3407 kwargs["side"] = side.text 3408 if kind: 3409 kwargs["kind"] = kind.text 3410 if hint: 3411 kwargs["hint"] = hint 3412 3413 if self._match(TokenType.MATCH_CONDITION): 3414 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3415 3416 if self._match(TokenType.ON): 3417 kwargs["on"] = self._parse_assignment() 3418 elif self._match(TokenType.USING): 3419 kwargs["using"] = self._parse_using_identifiers() 3420 elif ( 3421 not (outer_apply or cross_apply) 3422 and not isinstance(kwargs["this"], exp.Unnest) 3423 and not (kind and kind.token_type == TokenType.CROSS) 3424 ): 3425 index = self._index 3426 joins: t.Optional[list] = list(self._parse_joins()) 3427 3428 if joins and self._match(TokenType.ON): 3429 kwargs["on"] = self._parse_assignment() 3430 elif joins and self._match(TokenType.USING): 3431 kwargs["using"] = self._parse_using_identifiers() 3432 else: 3433 joins = None 3434 self._retreat(index) 3435 3436 kwargs["this"].set("joins", joins if joins else None) 3437 3438 comments = [c for token in (method, side, kind) if token for c in token.comments] 3439 return self.expression(exp.Join, comments=comments, **kwargs) 3440 3441 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3442 this = self._parse_assignment() 3443 3444 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3445 return this 3446 3447 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3448 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3449 3450 return this 3451 3452 def _parse_index_params(self) -> exp.IndexParameters: 3453 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3454 3455 if self._match(TokenType.L_PAREN, advance=False): 3456 columns = self._parse_wrapped_csv(self._parse_with_operator) 3457 else: 3458 columns = None 3459 3460 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3461 partition_by = self._parse_partition_by() 3462 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3463 tablespace = ( 3464 self._parse_var(any_token=True) 3465 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3466 else None 3467 ) 3468 where = self._parse_where() 3469 3470 on = self._parse_field() if self._match(TokenType.ON) else None 3471 3472 return self.expression( 3473 exp.IndexParameters, 3474 using=using, 3475 columns=columns, 3476 include=include, 3477 partition_by=partition_by, 3478 where=where, 3479 with_storage=with_storage, 3480 tablespace=tablespace, 3481 on=on, 3482 ) 3483 3484 def _parse_index( 3485 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3486 ) -> t.Optional[exp.Index]: 3487 if index or anonymous: 3488 unique = None 3489 primary = None 3490 amp = None 3491 3492 self._match(TokenType.ON) 3493 self._match(TokenType.TABLE) # hive 3494 table = self._parse_table_parts(schema=True) 3495 else: 3496 unique = self._match(TokenType.UNIQUE) 3497 primary = self._match_text_seq("PRIMARY") 3498 amp = self._match_text_seq("AMP") 3499 3500 if not self._match(TokenType.INDEX): 3501 return None 3502 3503 index = self._parse_id_var() 3504 table = None 3505 3506 params = self._parse_index_params() 3507 3508 return self.expression( 3509 exp.Index, 3510 this=index, 3511 table=table, 3512 unique=unique, 3513 primary=primary, 3514 amp=amp, 3515 params=params, 3516 ) 3517 3518 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3519 hints: t.List[exp.Expression] = [] 3520 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3521 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3522 hints.append( 3523 self.expression( 3524 exp.WithTableHint, 3525 expressions=self._parse_csv( 3526 lambda: self._parse_function() or self._parse_var(any_token=True) 3527 ), 3528 ) 3529 ) 3530 self._match_r_paren() 3531 else: 3532 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3533 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3534 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3535 3536 self._match_set((TokenType.INDEX, TokenType.KEY)) 3537 if self._match(TokenType.FOR): 3538 hint.set("target", self._advance_any() and self._prev.text.upper()) 3539 3540 hint.set("expressions", self._parse_wrapped_id_vars()) 3541 hints.append(hint) 3542 3543 return hints or None 3544 3545 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3546 return ( 3547 (not schema and self._parse_function(optional_parens=False)) 3548 or self._parse_id_var(any_token=False) 3549 or self._parse_string_as_identifier() 3550 or self._parse_placeholder() 3551 ) 3552 3553 def _parse_table_parts( 3554 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3555 ) -> exp.Table: 3556 catalog = None 3557 db = None 3558 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3559 3560 while self._match(TokenType.DOT): 3561 if catalog: 3562 # This allows nesting the table in arbitrarily many dot expressions if needed 3563 table = self.expression( 3564 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3565 ) 3566 else: 3567 catalog = db 3568 db = table 3569 # "" used for tsql FROM a..b case 3570 table = self._parse_table_part(schema=schema) or "" 3571 3572 if ( 3573 wildcard 3574 and self._is_connected() 3575 and (isinstance(table, exp.Identifier) or not table) 3576 and self._match(TokenType.STAR) 3577 ): 3578 if isinstance(table, exp.Identifier): 3579 table.args["this"] += "*" 3580 else: 3581 table = exp.Identifier(this="*") 3582 3583 # We bubble up comments from the Identifier to the Table 3584 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3585 3586 if is_db_reference: 3587 catalog = db 3588 db = table 3589 table = None 3590 3591 if not table and not is_db_reference: 3592 self.raise_error(f"Expected table name but got {self._curr}") 3593 if not db and is_db_reference: 3594 self.raise_error(f"Expected database name but got {self._curr}") 3595 3596 table = self.expression( 3597 exp.Table, 3598 comments=comments, 3599 this=table, 3600 db=db, 3601 catalog=catalog, 3602 ) 3603 3604 changes = self._parse_changes() 3605 if changes: 3606 table.set("changes", changes) 3607 3608 at_before = self._parse_historical_data() 3609 if at_before: 3610 table.set("when", at_before) 3611 3612 pivots = self._parse_pivots() 3613 if pivots: 3614 table.set("pivots", pivots) 3615 3616 return table 3617 3618 def _parse_table( 3619 self, 3620 schema: bool = False, 3621 joins: bool = False, 3622 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3623 parse_bracket: bool = False, 3624 is_db_reference: bool = False, 3625 parse_partition: bool = False, 3626 ) -> t.Optional[exp.Expression]: 3627 lateral = self._parse_lateral() 3628 if lateral: 3629 return lateral 3630 3631 unnest = self._parse_unnest() 3632 if unnest: 3633 return unnest 3634 3635 values = self._parse_derived_table_values() 3636 if values: 3637 return values 3638 3639 subquery = self._parse_select(table=True) 3640 if subquery: 3641 if not subquery.args.get("pivots"): 3642 subquery.set("pivots", self._parse_pivots()) 3643 return subquery 3644 3645 bracket = parse_bracket and self._parse_bracket(None) 3646 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3647 3648 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3649 self._parse_table 3650 ) 3651 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3652 3653 only = self._match(TokenType.ONLY) 3654 3655 this = t.cast( 3656 exp.Expression, 3657 bracket 3658 or rows_from 3659 or self._parse_bracket( 3660 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3661 ), 3662 ) 3663 3664 if only: 3665 this.set("only", only) 3666 3667 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3668 self._match_text_seq("*") 3669 3670 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3671 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3672 this.set("partition", self._parse_partition()) 3673 3674 if schema: 3675 return self._parse_schema(this=this) 3676 3677 version = self._parse_version() 3678 3679 if version: 3680 this.set("version", version) 3681 3682 if self.dialect.ALIAS_POST_TABLESAMPLE: 3683 this.set("sample", self._parse_table_sample()) 3684 3685 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3686 if alias: 3687 this.set("alias", alias) 3688 3689 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3690 return self.expression( 3691 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3692 ) 3693 3694 this.set("hints", self._parse_table_hints()) 3695 3696 if not this.args.get("pivots"): 3697 this.set("pivots", self._parse_pivots()) 3698 3699 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3700 this.set("sample", self._parse_table_sample()) 3701 3702 if joins: 3703 for join in self._parse_joins(): 3704 this.append("joins", join) 3705 3706 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3707 this.set("ordinality", True) 3708 this.set("alias", self._parse_table_alias()) 3709 3710 return this 3711 3712 def _parse_version(self) -> t.Optional[exp.Version]: 3713 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3714 this = "TIMESTAMP" 3715 elif self._match(TokenType.VERSION_SNAPSHOT): 3716 this = "VERSION" 3717 else: 3718 return None 3719 3720 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3721 kind = self._prev.text.upper() 3722 start = self._parse_bitwise() 3723 self._match_texts(("TO", "AND")) 3724 end = self._parse_bitwise() 3725 expression: t.Optional[exp.Expression] = self.expression( 3726 exp.Tuple, expressions=[start, end] 3727 ) 3728 elif self._match_text_seq("CONTAINED", "IN"): 3729 kind = "CONTAINED IN" 3730 expression = self.expression( 3731 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3732 ) 3733 elif self._match(TokenType.ALL): 3734 kind = "ALL" 3735 expression = None 3736 else: 3737 self._match_text_seq("AS", "OF") 3738 kind = "AS OF" 3739 expression = self._parse_type() 3740 3741 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3742 3743 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3744 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3745 index = self._index 3746 historical_data = None 3747 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3748 this = self._prev.text.upper() 3749 kind = ( 3750 self._match(TokenType.L_PAREN) 3751 and self._match_texts(self.HISTORICAL_DATA_KIND) 3752 and self._prev.text.upper() 3753 ) 3754 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3755 3756 if expression: 3757 self._match_r_paren() 3758 historical_data = self.expression( 3759 exp.HistoricalData, this=this, kind=kind, expression=expression 3760 ) 3761 else: 3762 self._retreat(index) 3763 3764 return historical_data 3765 3766 def _parse_changes(self) -> t.Optional[exp.Changes]: 3767 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3768 return None 3769 3770 information = self._parse_var(any_token=True) 3771 self._match_r_paren() 3772 3773 return self.expression( 3774 exp.Changes, 3775 information=information, 3776 at_before=self._parse_historical_data(), 3777 end=self._parse_historical_data(), 3778 ) 3779 3780 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3781 if not self._match(TokenType.UNNEST): 3782 return None 3783 3784 expressions = self._parse_wrapped_csv(self._parse_equality) 3785 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3786 3787 alias = self._parse_table_alias() if with_alias else None 3788 3789 if alias: 3790 if self.dialect.UNNEST_COLUMN_ONLY: 3791 if alias.args.get("columns"): 3792 self.raise_error("Unexpected extra column alias in unnest.") 3793 3794 alias.set("columns", [alias.this]) 3795 alias.set("this", None) 3796 3797 columns = alias.args.get("columns") or [] 3798 if offset and len(expressions) < len(columns): 3799 offset = columns.pop() 3800 3801 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3802 self._match(TokenType.ALIAS) 3803 offset = self._parse_id_var( 3804 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3805 ) or exp.to_identifier("offset") 3806 3807 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3808 3809 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3810 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3811 if not is_derived and not ( 3812 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3813 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3814 ): 3815 return None 3816 3817 expressions = self._parse_csv(self._parse_value) 3818 alias = self._parse_table_alias() 3819 3820 if is_derived: 3821 self._match_r_paren() 3822 3823 return self.expression( 3824 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3825 ) 3826 3827 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3828 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3829 as_modifier and self._match_text_seq("USING", "SAMPLE") 3830 ): 3831 return None 3832 3833 bucket_numerator = None 3834 bucket_denominator = None 3835 bucket_field = None 3836 percent = None 3837 size = None 3838 seed = None 3839 3840 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3841 matched_l_paren = self._match(TokenType.L_PAREN) 3842 3843 if self.TABLESAMPLE_CSV: 3844 num = None 3845 expressions = self._parse_csv(self._parse_primary) 3846 else: 3847 expressions = None 3848 num = ( 3849 self._parse_factor() 3850 if self._match(TokenType.NUMBER, advance=False) 3851 else self._parse_primary() or self._parse_placeholder() 3852 ) 3853 3854 if self._match_text_seq("BUCKET"): 3855 bucket_numerator = self._parse_number() 3856 self._match_text_seq("OUT", "OF") 3857 bucket_denominator = bucket_denominator = self._parse_number() 3858 self._match(TokenType.ON) 3859 bucket_field = self._parse_field() 3860 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3861 percent = num 3862 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3863 size = num 3864 else: 3865 percent = num 3866 3867 if matched_l_paren: 3868 self._match_r_paren() 3869 3870 if self._match(TokenType.L_PAREN): 3871 method = self._parse_var(upper=True) 3872 seed = self._match(TokenType.COMMA) and self._parse_number() 3873 self._match_r_paren() 3874 elif self._match_texts(("SEED", "REPEATABLE")): 3875 seed = self._parse_wrapped(self._parse_number) 3876 3877 if not method and self.DEFAULT_SAMPLING_METHOD: 3878 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3879 3880 return self.expression( 3881 exp.TableSample, 3882 expressions=expressions, 3883 method=method, 3884 bucket_numerator=bucket_numerator, 3885 bucket_denominator=bucket_denominator, 3886 bucket_field=bucket_field, 3887 percent=percent, 3888 size=size, 3889 seed=seed, 3890 ) 3891 3892 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3893 return list(iter(self._parse_pivot, None)) or None 3894 3895 def _parse_joins(self) -> t.Iterator[exp.Join]: 3896 return iter(self._parse_join, None) 3897 3898 # https://duckdb.org/docs/sql/statements/pivot 3899 def _parse_simplified_pivot(self) -> exp.Pivot: 3900 def _parse_on() -> t.Optional[exp.Expression]: 3901 this = self._parse_bitwise() 3902 return self._parse_in(this) if self._match(TokenType.IN) else this 3903 3904 this = self._parse_table() 3905 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3906 using = self._match(TokenType.USING) and self._parse_csv( 3907 lambda: self._parse_alias(self._parse_function()) 3908 ) 3909 group = self._parse_group() 3910 return self.expression( 3911 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3912 ) 3913 3914 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3915 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3916 this = self._parse_select_or_expression() 3917 3918 self._match(TokenType.ALIAS) 3919 alias = self._parse_bitwise() 3920 if alias: 3921 if isinstance(alias, exp.Column) and not alias.db: 3922 alias = alias.this 3923 return self.expression(exp.PivotAlias, this=this, alias=alias) 3924 3925 return this 3926 3927 value = self._parse_column() 3928 3929 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3930 self.raise_error("Expecting IN (") 3931 3932 if self._match(TokenType.ANY): 3933 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3934 else: 3935 exprs = self._parse_csv(_parse_aliased_expression) 3936 3937 self._match_r_paren() 3938 return self.expression(exp.In, this=value, expressions=exprs) 3939 3940 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3941 index = self._index 3942 include_nulls = None 3943 3944 if self._match(TokenType.PIVOT): 3945 unpivot = False 3946 elif self._match(TokenType.UNPIVOT): 3947 unpivot = True 3948 3949 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3950 if self._match_text_seq("INCLUDE", "NULLS"): 3951 include_nulls = True 3952 elif self._match_text_seq("EXCLUDE", "NULLS"): 3953 include_nulls = False 3954 else: 3955 return None 3956 3957 expressions = [] 3958 3959 if not self._match(TokenType.L_PAREN): 3960 self._retreat(index) 3961 return None 3962 3963 if unpivot: 3964 expressions = self._parse_csv(self._parse_column) 3965 else: 3966 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3967 3968 if not expressions: 3969 self.raise_error("Failed to parse PIVOT's aggregation list") 3970 3971 if not self._match(TokenType.FOR): 3972 self.raise_error("Expecting FOR") 3973 3974 field = self._parse_pivot_in() 3975 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3976 self._parse_bitwise 3977 ) 3978 3979 self._match_r_paren() 3980 3981 pivot = self.expression( 3982 exp.Pivot, 3983 expressions=expressions, 3984 field=field, 3985 unpivot=unpivot, 3986 include_nulls=include_nulls, 3987 default_on_null=default_on_null, 3988 ) 3989 3990 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3991 pivot.set("alias", self._parse_table_alias()) 3992 3993 if not unpivot: 3994 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3995 3996 columns: t.List[exp.Expression] = [] 3997 for fld in pivot.args["field"].expressions: 3998 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3999 for name in names: 4000 if self.PREFIXED_PIVOT_COLUMNS: 4001 name = f"{name}_{field_name}" if name else field_name 4002 else: 4003 name = f"{field_name}_{name}" if name else field_name 4004 4005 columns.append(exp.to_identifier(name)) 4006 4007 pivot.set("columns", columns) 4008 4009 return pivot 4010 4011 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4012 return [agg.alias for agg in aggregations] 4013 4014 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4015 if not skip_where_token and not self._match(TokenType.PREWHERE): 4016 return None 4017 4018 return self.expression( 4019 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4020 ) 4021 4022 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4023 if not skip_where_token and not self._match(TokenType.WHERE): 4024 return None 4025 4026 return self.expression( 4027 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4028 ) 4029 4030 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4031 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4032 return None 4033 4034 elements: t.Dict[str, t.Any] = defaultdict(list) 4035 4036 if self._match(TokenType.ALL): 4037 elements["all"] = True 4038 elif self._match(TokenType.DISTINCT): 4039 elements["all"] = False 4040 4041 while True: 4042 index = self._index 4043 4044 elements["expressions"].extend( 4045 self._parse_csv( 4046 lambda: None 4047 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4048 else self._parse_assignment() 4049 ) 4050 ) 4051 4052 before_with_index = self._index 4053 with_prefix = self._match(TokenType.WITH) 4054 4055 if self._match(TokenType.ROLLUP): 4056 elements["rollup"].append( 4057 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4058 ) 4059 elif self._match(TokenType.CUBE): 4060 elements["cube"].append( 4061 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4062 ) 4063 elif self._match(TokenType.GROUPING_SETS): 4064 elements["grouping_sets"].append( 4065 self.expression( 4066 exp.GroupingSets, 4067 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4068 ) 4069 ) 4070 elif self._match_text_seq("TOTALS"): 4071 elements["totals"] = True # type: ignore 4072 4073 if before_with_index <= self._index <= before_with_index + 1: 4074 self._retreat(before_with_index) 4075 break 4076 4077 if index == self._index: 4078 break 4079 4080 return self.expression(exp.Group, **elements) # type: ignore 4081 4082 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4083 return self.expression( 4084 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4085 ) 4086 4087 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4088 if self._match(TokenType.L_PAREN): 4089 grouping_set = self._parse_csv(self._parse_column) 4090 self._match_r_paren() 4091 return self.expression(exp.Tuple, expressions=grouping_set) 4092 4093 return self._parse_column() 4094 4095 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4096 if not skip_having_token and not self._match(TokenType.HAVING): 4097 return None 4098 return self.expression(exp.Having, this=self._parse_assignment()) 4099 4100 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4101 if not self._match(TokenType.QUALIFY): 4102 return None 4103 return self.expression(exp.Qualify, this=self._parse_assignment()) 4104 4105 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4106 if skip_start_token: 4107 start = None 4108 elif self._match(TokenType.START_WITH): 4109 start = self._parse_assignment() 4110 else: 4111 return None 4112 4113 self._match(TokenType.CONNECT_BY) 4114 nocycle = self._match_text_seq("NOCYCLE") 4115 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4116 exp.Prior, this=self._parse_bitwise() 4117 ) 4118 connect = self._parse_assignment() 4119 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4120 4121 if not start and self._match(TokenType.START_WITH): 4122 start = self._parse_assignment() 4123 4124 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4125 4126 def _parse_name_as_expression(self) -> exp.Alias: 4127 return self.expression( 4128 exp.Alias, 4129 alias=self._parse_id_var(any_token=True), 4130 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4131 ) 4132 4133 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4134 if self._match_text_seq("INTERPOLATE"): 4135 return self._parse_wrapped_csv(self._parse_name_as_expression) 4136 return None 4137 4138 def _parse_order( 4139 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4140 ) -> t.Optional[exp.Expression]: 4141 siblings = None 4142 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4143 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4144 return this 4145 4146 siblings = True 4147 4148 return self.expression( 4149 exp.Order, 4150 this=this, 4151 expressions=self._parse_csv(self._parse_ordered), 4152 siblings=siblings, 4153 ) 4154 4155 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4156 if not self._match(token): 4157 return None 4158 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4159 4160 def _parse_ordered( 4161 self, parse_method: t.Optional[t.Callable] = None 4162 ) -> t.Optional[exp.Ordered]: 4163 this = parse_method() if parse_method else self._parse_assignment() 4164 if not this: 4165 return None 4166 4167 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4168 this = exp.var("ALL") 4169 4170 asc = self._match(TokenType.ASC) 4171 desc = self._match(TokenType.DESC) or (asc and False) 4172 4173 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4174 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4175 4176 nulls_first = is_nulls_first or False 4177 explicitly_null_ordered = is_nulls_first or is_nulls_last 4178 4179 if ( 4180 not explicitly_null_ordered 4181 and ( 4182 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4183 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4184 ) 4185 and self.dialect.NULL_ORDERING != "nulls_are_last" 4186 ): 4187 nulls_first = True 4188 4189 if self._match_text_seq("WITH", "FILL"): 4190 with_fill = self.expression( 4191 exp.WithFill, 4192 **{ # type: ignore 4193 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4194 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4195 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4196 "interpolate": self._parse_interpolate(), 4197 }, 4198 ) 4199 else: 4200 with_fill = None 4201 4202 return self.expression( 4203 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4204 ) 4205 4206 def _parse_limit( 4207 self, 4208 this: t.Optional[exp.Expression] = None, 4209 top: bool = False, 4210 skip_limit_token: bool = False, 4211 ) -> t.Optional[exp.Expression]: 4212 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4213 comments = self._prev_comments 4214 if top: 4215 limit_paren = self._match(TokenType.L_PAREN) 4216 expression = self._parse_term() if limit_paren else self._parse_number() 4217 4218 if limit_paren: 4219 self._match_r_paren() 4220 else: 4221 expression = self._parse_term() 4222 4223 if self._match(TokenType.COMMA): 4224 offset = expression 4225 expression = self._parse_term() 4226 else: 4227 offset = None 4228 4229 limit_exp = self.expression( 4230 exp.Limit, 4231 this=this, 4232 expression=expression, 4233 offset=offset, 4234 comments=comments, 4235 expressions=self._parse_limit_by(), 4236 ) 4237 4238 return limit_exp 4239 4240 if self._match(TokenType.FETCH): 4241 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4242 direction = self._prev.text.upper() if direction else "FIRST" 4243 4244 count = self._parse_field(tokens=self.FETCH_TOKENS) 4245 percent = self._match(TokenType.PERCENT) 4246 4247 self._match_set((TokenType.ROW, TokenType.ROWS)) 4248 4249 only = self._match_text_seq("ONLY") 4250 with_ties = self._match_text_seq("WITH", "TIES") 4251 4252 if only and with_ties: 4253 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4254 4255 return self.expression( 4256 exp.Fetch, 4257 direction=direction, 4258 count=count, 4259 percent=percent, 4260 with_ties=with_ties, 4261 ) 4262 4263 return this 4264 4265 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4266 if not self._match(TokenType.OFFSET): 4267 return this 4268 4269 count = self._parse_term() 4270 self._match_set((TokenType.ROW, TokenType.ROWS)) 4271 4272 return self.expression( 4273 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4274 ) 4275 4276 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4277 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4278 4279 def _parse_locks(self) -> t.List[exp.Lock]: 4280 locks = [] 4281 while True: 4282 if self._match_text_seq("FOR", "UPDATE"): 4283 update = True 4284 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4285 "LOCK", "IN", "SHARE", "MODE" 4286 ): 4287 update = False 4288 else: 4289 break 4290 4291 expressions = None 4292 if self._match_text_seq("OF"): 4293 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4294 4295 wait: t.Optional[bool | exp.Expression] = None 4296 if self._match_text_seq("NOWAIT"): 4297 wait = True 4298 elif self._match_text_seq("WAIT"): 4299 wait = self._parse_primary() 4300 elif self._match_text_seq("SKIP", "LOCKED"): 4301 wait = False 4302 4303 locks.append( 4304 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4305 ) 4306 4307 return locks 4308 4309 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4310 while this and self._match_set(self.SET_OPERATIONS): 4311 token_type = self._prev.token_type 4312 4313 if token_type == TokenType.UNION: 4314 operation: t.Type[exp.SetOperation] = exp.Union 4315 elif token_type == TokenType.EXCEPT: 4316 operation = exp.Except 4317 else: 4318 operation = exp.Intersect 4319 4320 comments = self._prev.comments 4321 4322 if self._match(TokenType.DISTINCT): 4323 distinct: t.Optional[bool] = True 4324 elif self._match(TokenType.ALL): 4325 distinct = False 4326 else: 4327 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4328 if distinct is None: 4329 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4330 4331 by_name = self._match_text_seq("BY", "NAME") 4332 expression = self._parse_select(nested=True, parse_set_operation=False) 4333 4334 this = self.expression( 4335 operation, 4336 comments=comments, 4337 this=this, 4338 distinct=distinct, 4339 by_name=by_name, 4340 expression=expression, 4341 ) 4342 4343 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4344 expression = this.expression 4345 4346 if expression: 4347 for arg in self.SET_OP_MODIFIERS: 4348 expr = expression.args.get(arg) 4349 if expr: 4350 this.set(arg, expr.pop()) 4351 4352 return this 4353 4354 def _parse_expression(self) -> t.Optional[exp.Expression]: 4355 return self._parse_alias(self._parse_assignment()) 4356 4357 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4358 this = self._parse_disjunction() 4359 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4360 # This allows us to parse <non-identifier token> := <expr> 4361 this = exp.column( 4362 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4363 ) 4364 4365 while self._match_set(self.ASSIGNMENT): 4366 if isinstance(this, exp.Column) and len(this.parts) == 1: 4367 this = this.this 4368 4369 this = self.expression( 4370 self.ASSIGNMENT[self._prev.token_type], 4371 this=this, 4372 comments=self._prev_comments, 4373 expression=self._parse_assignment(), 4374 ) 4375 4376 return this 4377 4378 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4379 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4380 4381 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4382 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4383 4384 def _parse_equality(self) -> t.Optional[exp.Expression]: 4385 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4386 4387 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4388 return self._parse_tokens(self._parse_range, self.COMPARISON) 4389 4390 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4391 this = this or self._parse_bitwise() 4392 negate = self._match(TokenType.NOT) 4393 4394 if self._match_set(self.RANGE_PARSERS): 4395 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4396 if not expression: 4397 return this 4398 4399 this = expression 4400 elif self._match(TokenType.ISNULL): 4401 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4402 4403 # Postgres supports ISNULL and NOTNULL for conditions. 4404 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4405 if self._match(TokenType.NOTNULL): 4406 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4407 this = self.expression(exp.Not, this=this) 4408 4409 if negate: 4410 this = self._negate_range(this) 4411 4412 if self._match(TokenType.IS): 4413 this = self._parse_is(this) 4414 4415 return this 4416 4417 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4418 if not this: 4419 return this 4420 4421 return self.expression(exp.Not, this=this) 4422 4423 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4424 index = self._index - 1 4425 negate = self._match(TokenType.NOT) 4426 4427 if self._match_text_seq("DISTINCT", "FROM"): 4428 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4429 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4430 4431 if self._match(TokenType.JSON): 4432 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4433 4434 if self._match_text_seq("WITH"): 4435 _with = True 4436 elif self._match_text_seq("WITHOUT"): 4437 _with = False 4438 else: 4439 _with = None 4440 4441 unique = self._match(TokenType.UNIQUE) 4442 self._match_text_seq("KEYS") 4443 expression: t.Optional[exp.Expression] = self.expression( 4444 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4445 ) 4446 else: 4447 expression = self._parse_primary() or self._parse_null() 4448 if not expression: 4449 self._retreat(index) 4450 return None 4451 4452 this = self.expression(exp.Is, this=this, expression=expression) 4453 return self.expression(exp.Not, this=this) if negate else this 4454 4455 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4456 unnest = self._parse_unnest(with_alias=False) 4457 if unnest: 4458 this = self.expression(exp.In, this=this, unnest=unnest) 4459 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4460 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4461 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4462 4463 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4464 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4465 else: 4466 this = self.expression(exp.In, this=this, expressions=expressions) 4467 4468 if matched_l_paren: 4469 self._match_r_paren(this) 4470 elif not self._match(TokenType.R_BRACKET, expression=this): 4471 self.raise_error("Expecting ]") 4472 else: 4473 this = self.expression(exp.In, this=this, field=self._parse_field()) 4474 4475 return this 4476 4477 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4478 low = self._parse_bitwise() 4479 self._match(TokenType.AND) 4480 high = self._parse_bitwise() 4481 return self.expression(exp.Between, this=this, low=low, high=high) 4482 4483 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4484 if not self._match(TokenType.ESCAPE): 4485 return this 4486 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4487 4488 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4489 index = self._index 4490 4491 if not self._match(TokenType.INTERVAL) and match_interval: 4492 return None 4493 4494 if self._match(TokenType.STRING, advance=False): 4495 this = self._parse_primary() 4496 else: 4497 this = self._parse_term() 4498 4499 if not this or ( 4500 isinstance(this, exp.Column) 4501 and not this.table 4502 and not this.this.quoted 4503 and this.name.upper() == "IS" 4504 ): 4505 self._retreat(index) 4506 return None 4507 4508 unit = self._parse_function() or ( 4509 not self._match(TokenType.ALIAS, advance=False) 4510 and self._parse_var(any_token=True, upper=True) 4511 ) 4512 4513 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4514 # each INTERVAL expression into this canonical form so it's easy to transpile 4515 if this and this.is_number: 4516 this = exp.Literal.string(this.to_py()) 4517 elif this and this.is_string: 4518 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4519 if len(parts) == 1: 4520 if unit: 4521 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4522 self._retreat(self._index - 1) 4523 4524 this = exp.Literal.string(parts[0][0]) 4525 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4526 4527 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4528 unit = self.expression( 4529 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4530 ) 4531 4532 interval = self.expression(exp.Interval, this=this, unit=unit) 4533 4534 index = self._index 4535 self._match(TokenType.PLUS) 4536 4537 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4538 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4539 return self.expression( 4540 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4541 ) 4542 4543 self._retreat(index) 4544 return interval 4545 4546 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4547 this = self._parse_term() 4548 4549 while True: 4550 if self._match_set(self.BITWISE): 4551 this = self.expression( 4552 self.BITWISE[self._prev.token_type], 4553 this=this, 4554 expression=self._parse_term(), 4555 ) 4556 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4557 this = self.expression( 4558 exp.DPipe, 4559 this=this, 4560 expression=self._parse_term(), 4561 safe=not self.dialect.STRICT_STRING_CONCAT, 4562 ) 4563 elif self._match(TokenType.DQMARK): 4564 this = self.expression( 4565 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4566 ) 4567 elif self._match_pair(TokenType.LT, TokenType.LT): 4568 this = self.expression( 4569 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4570 ) 4571 elif self._match_pair(TokenType.GT, TokenType.GT): 4572 this = self.expression( 4573 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4574 ) 4575 else: 4576 break 4577 4578 return this 4579 4580 def _parse_term(self) -> t.Optional[exp.Expression]: 4581 this = self._parse_factor() 4582 4583 while self._match_set(self.TERM): 4584 klass = self.TERM[self._prev.token_type] 4585 comments = self._prev_comments 4586 expression = self._parse_factor() 4587 4588 this = self.expression(klass, this=this, comments=comments, expression=expression) 4589 4590 if isinstance(this, exp.Collate): 4591 expr = this.expression 4592 4593 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4594 # fallback to Identifier / Var 4595 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4596 ident = expr.this 4597 if isinstance(ident, exp.Identifier): 4598 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4599 4600 return this 4601 4602 def _parse_factor(self) -> t.Optional[exp.Expression]: 4603 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4604 this = parse_method() 4605 4606 while self._match_set(self.FACTOR): 4607 klass = self.FACTOR[self._prev.token_type] 4608 comments = self._prev_comments 4609 expression = parse_method() 4610 4611 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4612 self._retreat(self._index - 1) 4613 return this 4614 4615 this = self.expression(klass, this=this, comments=comments, expression=expression) 4616 4617 if isinstance(this, exp.Div): 4618 this.args["typed"] = self.dialect.TYPED_DIVISION 4619 this.args["safe"] = self.dialect.SAFE_DIVISION 4620 4621 return this 4622 4623 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4624 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4625 4626 def _parse_unary(self) -> t.Optional[exp.Expression]: 4627 if self._match_set(self.UNARY_PARSERS): 4628 return self.UNARY_PARSERS[self._prev.token_type](self) 4629 return self._parse_at_time_zone(self._parse_type()) 4630 4631 def _parse_type( 4632 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4633 ) -> t.Optional[exp.Expression]: 4634 interval = parse_interval and self._parse_interval() 4635 if interval: 4636 return interval 4637 4638 index = self._index 4639 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4640 4641 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4642 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4643 if isinstance(data_type, exp.Cast): 4644 # This constructor can contain ops directly after it, for instance struct unnesting: 4645 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4646 return self._parse_column_ops(data_type) 4647 4648 if data_type: 4649 index2 = self._index 4650 this = self._parse_primary() 4651 4652 if isinstance(this, exp.Literal): 4653 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4654 if parser: 4655 return parser(self, this, data_type) 4656 4657 return self.expression(exp.Cast, this=this, to=data_type) 4658 4659 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4660 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4661 # 4662 # If the index difference here is greater than 1, that means the parser itself must have 4663 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4664 # 4665 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4666 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4667 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4668 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4669 # 4670 # In these cases, we don't really want to return the converted type, but instead retreat 4671 # and try to parse a Column or Identifier in the section below. 4672 if data_type.expressions and index2 - index > 1: 4673 self._retreat(index2) 4674 return self._parse_column_ops(data_type) 4675 4676 self._retreat(index) 4677 4678 if fallback_to_identifier: 4679 return self._parse_id_var() 4680 4681 this = self._parse_column() 4682 return this and self._parse_column_ops(this) 4683 4684 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4685 this = self._parse_type() 4686 if not this: 4687 return None 4688 4689 if isinstance(this, exp.Column) and not this.table: 4690 this = exp.var(this.name.upper()) 4691 4692 return self.expression( 4693 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4694 ) 4695 4696 def _parse_types( 4697 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4698 ) -> t.Optional[exp.Expression]: 4699 index = self._index 4700 4701 this: t.Optional[exp.Expression] = None 4702 prefix = self._match_text_seq("SYSUDTLIB", ".") 4703 4704 if not self._match_set(self.TYPE_TOKENS): 4705 identifier = allow_identifiers and self._parse_id_var( 4706 any_token=False, tokens=(TokenType.VAR,) 4707 ) 4708 if isinstance(identifier, exp.Identifier): 4709 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4710 4711 if len(tokens) != 1: 4712 self.raise_error("Unexpected identifier", self._prev) 4713 4714 if tokens[0].token_type in self.TYPE_TOKENS: 4715 self._prev = tokens[0] 4716 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4717 type_name = identifier.name 4718 4719 while self._match(TokenType.DOT): 4720 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4721 4722 this = exp.DataType.build(type_name, udt=True) 4723 else: 4724 self._retreat(self._index - 1) 4725 return None 4726 else: 4727 return None 4728 4729 type_token = self._prev.token_type 4730 4731 if type_token == TokenType.PSEUDO_TYPE: 4732 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4733 4734 if type_token == TokenType.OBJECT_IDENTIFIER: 4735 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4736 4737 # https://materialize.com/docs/sql/types/map/ 4738 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4739 key_type = self._parse_types( 4740 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4741 ) 4742 if not self._match(TokenType.FARROW): 4743 self._retreat(index) 4744 return None 4745 4746 value_type = self._parse_types( 4747 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4748 ) 4749 if not self._match(TokenType.R_BRACKET): 4750 self._retreat(index) 4751 return None 4752 4753 return exp.DataType( 4754 this=exp.DataType.Type.MAP, 4755 expressions=[key_type, value_type], 4756 nested=True, 4757 prefix=prefix, 4758 ) 4759 4760 nested = type_token in self.NESTED_TYPE_TOKENS 4761 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4762 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4763 expressions = None 4764 maybe_func = False 4765 4766 if self._match(TokenType.L_PAREN): 4767 if is_struct: 4768 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4769 elif nested: 4770 expressions = self._parse_csv( 4771 lambda: self._parse_types( 4772 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4773 ) 4774 ) 4775 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4776 this = expressions[0] 4777 this.set("nullable", True) 4778 self._match_r_paren() 4779 return this 4780 elif type_token in self.ENUM_TYPE_TOKENS: 4781 expressions = self._parse_csv(self._parse_equality) 4782 elif is_aggregate: 4783 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4784 any_token=False, tokens=(TokenType.VAR,) 4785 ) 4786 if not func_or_ident or not self._match(TokenType.COMMA): 4787 return None 4788 expressions = self._parse_csv( 4789 lambda: self._parse_types( 4790 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4791 ) 4792 ) 4793 expressions.insert(0, func_or_ident) 4794 else: 4795 expressions = self._parse_csv(self._parse_type_size) 4796 4797 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4798 if type_token == TokenType.VECTOR and len(expressions) == 2: 4799 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4800 4801 if not expressions or not self._match(TokenType.R_PAREN): 4802 self._retreat(index) 4803 return None 4804 4805 maybe_func = True 4806 4807 values: t.Optional[t.List[exp.Expression]] = None 4808 4809 if nested and self._match(TokenType.LT): 4810 if is_struct: 4811 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4812 else: 4813 expressions = self._parse_csv( 4814 lambda: self._parse_types( 4815 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4816 ) 4817 ) 4818 4819 if not self._match(TokenType.GT): 4820 self.raise_error("Expecting >") 4821 4822 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4823 values = self._parse_csv(self._parse_assignment) 4824 if not values and is_struct: 4825 values = None 4826 self._retreat(self._index - 1) 4827 else: 4828 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4829 4830 if type_token in self.TIMESTAMPS: 4831 if self._match_text_seq("WITH", "TIME", "ZONE"): 4832 maybe_func = False 4833 tz_type = ( 4834 exp.DataType.Type.TIMETZ 4835 if type_token in self.TIMES 4836 else exp.DataType.Type.TIMESTAMPTZ 4837 ) 4838 this = exp.DataType(this=tz_type, expressions=expressions) 4839 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4840 maybe_func = False 4841 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4842 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4843 maybe_func = False 4844 elif type_token == TokenType.INTERVAL: 4845 unit = self._parse_var(upper=True) 4846 if unit: 4847 if self._match_text_seq("TO"): 4848 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4849 4850 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4851 else: 4852 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4853 4854 if maybe_func and check_func: 4855 index2 = self._index 4856 peek = self._parse_string() 4857 4858 if not peek: 4859 self._retreat(index) 4860 return None 4861 4862 self._retreat(index2) 4863 4864 if not this: 4865 if self._match_text_seq("UNSIGNED"): 4866 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4867 if not unsigned_type_token: 4868 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4869 4870 type_token = unsigned_type_token or type_token 4871 4872 this = exp.DataType( 4873 this=exp.DataType.Type[type_token.value], 4874 expressions=expressions, 4875 nested=nested, 4876 prefix=prefix, 4877 ) 4878 4879 # Empty arrays/structs are allowed 4880 if values is not None: 4881 cls = exp.Struct if is_struct else exp.Array 4882 this = exp.cast(cls(expressions=values), this, copy=False) 4883 4884 elif expressions: 4885 this.set("expressions", expressions) 4886 4887 # https://materialize.com/docs/sql/types/list/#type-name 4888 while self._match(TokenType.LIST): 4889 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4890 4891 index = self._index 4892 4893 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4894 matched_array = self._match(TokenType.ARRAY) 4895 4896 while self._curr: 4897 datatype_token = self._prev.token_type 4898 matched_l_bracket = self._match(TokenType.L_BRACKET) 4899 if not matched_l_bracket and not matched_array: 4900 break 4901 4902 matched_array = False 4903 values = self._parse_csv(self._parse_assignment) or None 4904 if ( 4905 values 4906 and not schema 4907 and ( 4908 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4909 ) 4910 ): 4911 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4912 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4913 self._retreat(index) 4914 break 4915 4916 this = exp.DataType( 4917 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4918 ) 4919 self._match(TokenType.R_BRACKET) 4920 4921 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4922 converter = self.TYPE_CONVERTERS.get(this.this) 4923 if converter: 4924 this = converter(t.cast(exp.DataType, this)) 4925 4926 return this 4927 4928 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4929 index = self._index 4930 4931 if ( 4932 self._curr 4933 and self._next 4934 and self._curr.token_type in self.TYPE_TOKENS 4935 and self._next.token_type in self.TYPE_TOKENS 4936 ): 4937 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4938 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4939 this = self._parse_id_var() 4940 else: 4941 this = ( 4942 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4943 or self._parse_id_var() 4944 ) 4945 4946 self._match(TokenType.COLON) 4947 4948 if ( 4949 type_required 4950 and not isinstance(this, exp.DataType) 4951 and not self._match_set(self.TYPE_TOKENS, advance=False) 4952 ): 4953 self._retreat(index) 4954 return self._parse_types() 4955 4956 return self._parse_column_def(this) 4957 4958 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4959 if not self._match_text_seq("AT", "TIME", "ZONE"): 4960 return this 4961 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4962 4963 def _parse_column(self) -> t.Optional[exp.Expression]: 4964 this = self._parse_column_reference() 4965 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4966 4967 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4968 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4969 4970 return column 4971 4972 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4973 this = self._parse_field() 4974 if ( 4975 not this 4976 and self._match(TokenType.VALUES, advance=False) 4977 and self.VALUES_FOLLOWED_BY_PAREN 4978 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4979 ): 4980 this = self._parse_id_var() 4981 4982 if isinstance(this, exp.Identifier): 4983 # We bubble up comments from the Identifier to the Column 4984 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4985 4986 return this 4987 4988 def _parse_colon_as_variant_extract( 4989 self, this: t.Optional[exp.Expression] 4990 ) -> t.Optional[exp.Expression]: 4991 casts = [] 4992 json_path = [] 4993 escape = None 4994 4995 while self._match(TokenType.COLON): 4996 start_index = self._index 4997 4998 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4999 path = self._parse_column_ops( 5000 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5001 ) 5002 5003 # The cast :: operator has a lower precedence than the extraction operator :, so 5004 # we rearrange the AST appropriately to avoid casting the JSON path 5005 while isinstance(path, exp.Cast): 5006 casts.append(path.to) 5007 path = path.this 5008 5009 if casts: 5010 dcolon_offset = next( 5011 i 5012 for i, t in enumerate(self._tokens[start_index:]) 5013 if t.token_type == TokenType.DCOLON 5014 ) 5015 end_token = self._tokens[start_index + dcolon_offset - 1] 5016 else: 5017 end_token = self._prev 5018 5019 if path: 5020 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5021 # it'll roundtrip to a string literal in GET_PATH 5022 if isinstance(path, exp.Identifier) and path.quoted: 5023 escape = True 5024 5025 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5026 5027 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5028 # Databricks transforms it back to the colon/dot notation 5029 if json_path: 5030 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5031 5032 if json_path_expr: 5033 json_path_expr.set("escape", escape) 5034 5035 this = self.expression( 5036 exp.JSONExtract, 5037 this=this, 5038 expression=json_path_expr, 5039 variant_extract=True, 5040 ) 5041 5042 while casts: 5043 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5044 5045 return this 5046 5047 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5048 return self._parse_types() 5049 5050 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5051 this = self._parse_bracket(this) 5052 5053 while self._match_set(self.COLUMN_OPERATORS): 5054 op_token = self._prev.token_type 5055 op = self.COLUMN_OPERATORS.get(op_token) 5056 5057 if op_token == TokenType.DCOLON: 5058 field = self._parse_dcolon() 5059 if not field: 5060 self.raise_error("Expected type") 5061 elif op and self._curr: 5062 field = self._parse_column_reference() or self._parse_bracket() 5063 else: 5064 field = self._parse_field(any_token=True, anonymous_func=True) 5065 5066 if isinstance(field, exp.Func) and this: 5067 # bigquery allows function calls like x.y.count(...) 5068 # SAFE.SUBSTR(...) 5069 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5070 this = exp.replace_tree( 5071 this, 5072 lambda n: ( 5073 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5074 if n.table 5075 else n.this 5076 ) 5077 if isinstance(n, exp.Column) 5078 else n, 5079 ) 5080 5081 if op: 5082 this = op(self, this, field) 5083 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5084 this = self.expression( 5085 exp.Column, 5086 comments=this.comments, 5087 this=field, 5088 table=this.this, 5089 db=this.args.get("table"), 5090 catalog=this.args.get("db"), 5091 ) 5092 else: 5093 this = self.expression(exp.Dot, this=this, expression=field) 5094 5095 this = self._parse_bracket(this) 5096 5097 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5098 5099 def _parse_primary(self) -> t.Optional[exp.Expression]: 5100 if self._match_set(self.PRIMARY_PARSERS): 5101 token_type = self._prev.token_type 5102 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5103 5104 if token_type == TokenType.STRING: 5105 expressions = [primary] 5106 while self._match(TokenType.STRING): 5107 expressions.append(exp.Literal.string(self._prev.text)) 5108 5109 if len(expressions) > 1: 5110 return self.expression(exp.Concat, expressions=expressions) 5111 5112 return primary 5113 5114 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5115 return exp.Literal.number(f"0.{self._prev.text}") 5116 5117 if self._match(TokenType.L_PAREN): 5118 comments = self._prev_comments 5119 query = self._parse_select() 5120 5121 if query: 5122 expressions = [query] 5123 else: 5124 expressions = self._parse_expressions() 5125 5126 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5127 5128 if not this and self._match(TokenType.R_PAREN, advance=False): 5129 this = self.expression(exp.Tuple) 5130 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5131 this = self._parse_subquery(this=this, parse_alias=False) 5132 elif isinstance(this, exp.Subquery): 5133 this = self._parse_subquery( 5134 this=self._parse_set_operations(this), parse_alias=False 5135 ) 5136 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5137 this = self.expression(exp.Tuple, expressions=expressions) 5138 else: 5139 this = self.expression(exp.Paren, this=this) 5140 5141 if this: 5142 this.add_comments(comments) 5143 5144 self._match_r_paren(expression=this) 5145 return this 5146 5147 return None 5148 5149 def _parse_field( 5150 self, 5151 any_token: bool = False, 5152 tokens: t.Optional[t.Collection[TokenType]] = None, 5153 anonymous_func: bool = False, 5154 ) -> t.Optional[exp.Expression]: 5155 if anonymous_func: 5156 field = ( 5157 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5158 or self._parse_primary() 5159 ) 5160 else: 5161 field = self._parse_primary() or self._parse_function( 5162 anonymous=anonymous_func, any_token=any_token 5163 ) 5164 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5165 5166 def _parse_function( 5167 self, 5168 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5169 anonymous: bool = False, 5170 optional_parens: bool = True, 5171 any_token: bool = False, 5172 ) -> t.Optional[exp.Expression]: 5173 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5174 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5175 fn_syntax = False 5176 if ( 5177 self._match(TokenType.L_BRACE, advance=False) 5178 and self._next 5179 and self._next.text.upper() == "FN" 5180 ): 5181 self._advance(2) 5182 fn_syntax = True 5183 5184 func = self._parse_function_call( 5185 functions=functions, 5186 anonymous=anonymous, 5187 optional_parens=optional_parens, 5188 any_token=any_token, 5189 ) 5190 5191 if fn_syntax: 5192 self._match(TokenType.R_BRACE) 5193 5194 return func 5195 5196 def _parse_function_call( 5197 self, 5198 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5199 anonymous: bool = False, 5200 optional_parens: bool = True, 5201 any_token: bool = False, 5202 ) -> t.Optional[exp.Expression]: 5203 if not self._curr: 5204 return None 5205 5206 comments = self._curr.comments 5207 token_type = self._curr.token_type 5208 this = self._curr.text 5209 upper = this.upper() 5210 5211 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5212 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5213 self._advance() 5214 return self._parse_window(parser(self)) 5215 5216 if not self._next or self._next.token_type != TokenType.L_PAREN: 5217 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5218 self._advance() 5219 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5220 5221 return None 5222 5223 if any_token: 5224 if token_type in self.RESERVED_TOKENS: 5225 return None 5226 elif token_type not in self.FUNC_TOKENS: 5227 return None 5228 5229 self._advance(2) 5230 5231 parser = self.FUNCTION_PARSERS.get(upper) 5232 if parser and not anonymous: 5233 this = parser(self) 5234 else: 5235 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5236 5237 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5238 this = self.expression( 5239 subquery_predicate, comments=comments, this=self._parse_select() 5240 ) 5241 self._match_r_paren() 5242 return this 5243 5244 if functions is None: 5245 functions = self.FUNCTIONS 5246 5247 function = functions.get(upper) 5248 5249 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5250 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5251 5252 if alias: 5253 args = self._kv_to_prop_eq(args) 5254 5255 if function and not anonymous: 5256 if "dialect" in function.__code__.co_varnames: 5257 func = function(args, dialect=self.dialect) 5258 else: 5259 func = function(args) 5260 5261 func = self.validate_expression(func, args) 5262 if not self.dialect.NORMALIZE_FUNCTIONS: 5263 func.meta["name"] = this 5264 5265 this = func 5266 else: 5267 if token_type == TokenType.IDENTIFIER: 5268 this = exp.Identifier(this=this, quoted=True) 5269 this = self.expression(exp.Anonymous, this=this, expressions=args) 5270 5271 if isinstance(this, exp.Expression): 5272 this.add_comments(comments) 5273 5274 self._match_r_paren(this) 5275 return self._parse_window(this) 5276 5277 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5278 return expression 5279 5280 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5281 transformed = [] 5282 5283 for index, e in enumerate(expressions): 5284 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5285 if isinstance(e, exp.Alias): 5286 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5287 5288 if not isinstance(e, exp.PropertyEQ): 5289 e = self.expression( 5290 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5291 ) 5292 5293 if isinstance(e.this, exp.Column): 5294 e.this.replace(e.this.this) 5295 else: 5296 e = self._to_prop_eq(e, index) 5297 5298 transformed.append(e) 5299 5300 return transformed 5301 5302 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5303 return self._parse_column_def(self._parse_id_var()) 5304 5305 def _parse_user_defined_function( 5306 self, kind: t.Optional[TokenType] = None 5307 ) -> t.Optional[exp.Expression]: 5308 this = self._parse_id_var() 5309 5310 while self._match(TokenType.DOT): 5311 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5312 5313 if not self._match(TokenType.L_PAREN): 5314 return this 5315 5316 expressions = self._parse_csv(self._parse_function_parameter) 5317 self._match_r_paren() 5318 return self.expression( 5319 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5320 ) 5321 5322 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5323 literal = self._parse_primary() 5324 if literal: 5325 return self.expression(exp.Introducer, this=token.text, expression=literal) 5326 5327 return self.expression(exp.Identifier, this=token.text) 5328 5329 def _parse_session_parameter(self) -> exp.SessionParameter: 5330 kind = None 5331 this = self._parse_id_var() or self._parse_primary() 5332 5333 if this and self._match(TokenType.DOT): 5334 kind = this.name 5335 this = self._parse_var() or self._parse_primary() 5336 5337 return self.expression(exp.SessionParameter, this=this, kind=kind) 5338 5339 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5340 return self._parse_id_var() 5341 5342 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5343 index = self._index 5344 5345 if self._match(TokenType.L_PAREN): 5346 expressions = t.cast( 5347 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5348 ) 5349 5350 if not self._match(TokenType.R_PAREN): 5351 self._retreat(index) 5352 else: 5353 expressions = [self._parse_lambda_arg()] 5354 5355 if self._match_set(self.LAMBDAS): 5356 return self.LAMBDAS[self._prev.token_type](self, expressions) 5357 5358 self._retreat(index) 5359 5360 this: t.Optional[exp.Expression] 5361 5362 if self._match(TokenType.DISTINCT): 5363 this = self.expression( 5364 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5365 ) 5366 else: 5367 this = self._parse_select_or_expression(alias=alias) 5368 5369 return self._parse_limit( 5370 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5371 ) 5372 5373 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5374 index = self._index 5375 if not self._match(TokenType.L_PAREN): 5376 return this 5377 5378 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5379 # expr can be of both types 5380 if self._match_set(self.SELECT_START_TOKENS): 5381 self._retreat(index) 5382 return this 5383 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5384 self._match_r_paren() 5385 return self.expression(exp.Schema, this=this, expressions=args) 5386 5387 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5388 return self._parse_column_def(self._parse_field(any_token=True)) 5389 5390 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5391 # column defs are not really columns, they're identifiers 5392 if isinstance(this, exp.Column): 5393 this = this.this 5394 5395 kind = self._parse_types(schema=True) 5396 5397 if self._match_text_seq("FOR", "ORDINALITY"): 5398 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5399 5400 constraints: t.List[exp.Expression] = [] 5401 5402 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5403 ("ALIAS", "MATERIALIZED") 5404 ): 5405 persisted = self._prev.text.upper() == "MATERIALIZED" 5406 constraint_kind = exp.ComputedColumnConstraint( 5407 this=self._parse_assignment(), 5408 persisted=persisted or self._match_text_seq("PERSISTED"), 5409 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5410 ) 5411 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5412 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5413 self._match(TokenType.ALIAS) 5414 constraints.append( 5415 self.expression( 5416 exp.ColumnConstraint, 5417 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5418 ) 5419 ) 5420 5421 while True: 5422 constraint = self._parse_column_constraint() 5423 if not constraint: 5424 break 5425 constraints.append(constraint) 5426 5427 if not kind and not constraints: 5428 return this 5429 5430 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5431 5432 def _parse_auto_increment( 5433 self, 5434 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5435 start = None 5436 increment = None 5437 5438 if self._match(TokenType.L_PAREN, advance=False): 5439 args = self._parse_wrapped_csv(self._parse_bitwise) 5440 start = seq_get(args, 0) 5441 increment = seq_get(args, 1) 5442 elif self._match_text_seq("START"): 5443 start = self._parse_bitwise() 5444 self._match_text_seq("INCREMENT") 5445 increment = self._parse_bitwise() 5446 5447 if start and increment: 5448 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5449 5450 return exp.AutoIncrementColumnConstraint() 5451 5452 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5453 if not self._match_text_seq("REFRESH"): 5454 self._retreat(self._index - 1) 5455 return None 5456 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5457 5458 def _parse_compress(self) -> exp.CompressColumnConstraint: 5459 if self._match(TokenType.L_PAREN, advance=False): 5460 return self.expression( 5461 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5462 ) 5463 5464 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5465 5466 def _parse_generated_as_identity( 5467 self, 5468 ) -> ( 5469 exp.GeneratedAsIdentityColumnConstraint 5470 | exp.ComputedColumnConstraint 5471 | exp.GeneratedAsRowColumnConstraint 5472 ): 5473 if self._match_text_seq("BY", "DEFAULT"): 5474 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5475 this = self.expression( 5476 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5477 ) 5478 else: 5479 self._match_text_seq("ALWAYS") 5480 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5481 5482 self._match(TokenType.ALIAS) 5483 5484 if self._match_text_seq("ROW"): 5485 start = self._match_text_seq("START") 5486 if not start: 5487 self._match(TokenType.END) 5488 hidden = self._match_text_seq("HIDDEN") 5489 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5490 5491 identity = self._match_text_seq("IDENTITY") 5492 5493 if self._match(TokenType.L_PAREN): 5494 if self._match(TokenType.START_WITH): 5495 this.set("start", self._parse_bitwise()) 5496 if self._match_text_seq("INCREMENT", "BY"): 5497 this.set("increment", self._parse_bitwise()) 5498 if self._match_text_seq("MINVALUE"): 5499 this.set("minvalue", self._parse_bitwise()) 5500 if self._match_text_seq("MAXVALUE"): 5501 this.set("maxvalue", self._parse_bitwise()) 5502 5503 if self._match_text_seq("CYCLE"): 5504 this.set("cycle", True) 5505 elif self._match_text_seq("NO", "CYCLE"): 5506 this.set("cycle", False) 5507 5508 if not identity: 5509 this.set("expression", self._parse_range()) 5510 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5511 args = self._parse_csv(self._parse_bitwise) 5512 this.set("start", seq_get(args, 0)) 5513 this.set("increment", seq_get(args, 1)) 5514 5515 self._match_r_paren() 5516 5517 return this 5518 5519 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5520 self._match_text_seq("LENGTH") 5521 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5522 5523 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5524 if self._match_text_seq("NULL"): 5525 return self.expression(exp.NotNullColumnConstraint) 5526 if self._match_text_seq("CASESPECIFIC"): 5527 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5528 if self._match_text_seq("FOR", "REPLICATION"): 5529 return self.expression(exp.NotForReplicationColumnConstraint) 5530 5531 # Unconsume the `NOT` token 5532 self._retreat(self._index - 1) 5533 return None 5534 5535 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5536 if self._match(TokenType.CONSTRAINT): 5537 this = self._parse_id_var() 5538 else: 5539 this = None 5540 5541 if self._match_texts(self.CONSTRAINT_PARSERS): 5542 return self.expression( 5543 exp.ColumnConstraint, 5544 this=this, 5545 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5546 ) 5547 5548 return this 5549 5550 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5551 if not self._match(TokenType.CONSTRAINT): 5552 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5553 5554 return self.expression( 5555 exp.Constraint, 5556 this=self._parse_id_var(), 5557 expressions=self._parse_unnamed_constraints(), 5558 ) 5559 5560 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5561 constraints = [] 5562 while True: 5563 constraint = self._parse_unnamed_constraint() or self._parse_function() 5564 if not constraint: 5565 break 5566 constraints.append(constraint) 5567 5568 return constraints 5569 5570 def _parse_unnamed_constraint( 5571 self, constraints: t.Optional[t.Collection[str]] = None 5572 ) -> t.Optional[exp.Expression]: 5573 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5574 constraints or self.CONSTRAINT_PARSERS 5575 ): 5576 return None 5577 5578 constraint = self._prev.text.upper() 5579 if constraint not in self.CONSTRAINT_PARSERS: 5580 self.raise_error(f"No parser found for schema constraint {constraint}.") 5581 5582 return self.CONSTRAINT_PARSERS[constraint](self) 5583 5584 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5585 return self._parse_id_var(any_token=False) 5586 5587 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5588 self._match_text_seq("KEY") 5589 return self.expression( 5590 exp.UniqueColumnConstraint, 5591 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5592 this=self._parse_schema(self._parse_unique_key()), 5593 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5594 on_conflict=self._parse_on_conflict(), 5595 ) 5596 5597 def _parse_key_constraint_options(self) -> t.List[str]: 5598 options = [] 5599 while True: 5600 if not self._curr: 5601 break 5602 5603 if self._match(TokenType.ON): 5604 action = None 5605 on = self._advance_any() and self._prev.text 5606 5607 if self._match_text_seq("NO", "ACTION"): 5608 action = "NO ACTION" 5609 elif self._match_text_seq("CASCADE"): 5610 action = "CASCADE" 5611 elif self._match_text_seq("RESTRICT"): 5612 action = "RESTRICT" 5613 elif self._match_pair(TokenType.SET, TokenType.NULL): 5614 action = "SET NULL" 5615 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5616 action = "SET DEFAULT" 5617 else: 5618 self.raise_error("Invalid key constraint") 5619 5620 options.append(f"ON {on} {action}") 5621 else: 5622 var = self._parse_var_from_options( 5623 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5624 ) 5625 if not var: 5626 break 5627 options.append(var.name) 5628 5629 return options 5630 5631 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5632 if match and not self._match(TokenType.REFERENCES): 5633 return None 5634 5635 expressions = None 5636 this = self._parse_table(schema=True) 5637 options = self._parse_key_constraint_options() 5638 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5639 5640 def _parse_foreign_key(self) -> exp.ForeignKey: 5641 expressions = self._parse_wrapped_id_vars() 5642 reference = self._parse_references() 5643 options = {} 5644 5645 while self._match(TokenType.ON): 5646 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5647 self.raise_error("Expected DELETE or UPDATE") 5648 5649 kind = self._prev.text.lower() 5650 5651 if self._match_text_seq("NO", "ACTION"): 5652 action = "NO ACTION" 5653 elif self._match(TokenType.SET): 5654 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5655 action = "SET " + self._prev.text.upper() 5656 else: 5657 self._advance() 5658 action = self._prev.text.upper() 5659 5660 options[kind] = action 5661 5662 return self.expression( 5663 exp.ForeignKey, 5664 expressions=expressions, 5665 reference=reference, 5666 **options, # type: ignore 5667 ) 5668 5669 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5670 return self._parse_field() 5671 5672 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5673 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5674 self._retreat(self._index - 1) 5675 return None 5676 5677 id_vars = self._parse_wrapped_id_vars() 5678 return self.expression( 5679 exp.PeriodForSystemTimeConstraint, 5680 this=seq_get(id_vars, 0), 5681 expression=seq_get(id_vars, 1), 5682 ) 5683 5684 def _parse_primary_key( 5685 self, wrapped_optional: bool = False, in_props: bool = False 5686 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5687 desc = ( 5688 self._match_set((TokenType.ASC, TokenType.DESC)) 5689 and self._prev.token_type == TokenType.DESC 5690 ) 5691 5692 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5693 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5694 5695 expressions = self._parse_wrapped_csv( 5696 self._parse_primary_key_part, optional=wrapped_optional 5697 ) 5698 options = self._parse_key_constraint_options() 5699 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5700 5701 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5702 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5703 5704 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5705 """ 5706 Parses a datetime column in ODBC format. We parse the column into the corresponding 5707 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5708 same as we did for `DATE('yyyy-mm-dd')`. 5709 5710 Reference: 5711 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5712 """ 5713 self._match(TokenType.VAR) 5714 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5715 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5716 if not self._match(TokenType.R_BRACE): 5717 self.raise_error("Expected }") 5718 return expression 5719 5720 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5721 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5722 return this 5723 5724 bracket_kind = self._prev.token_type 5725 if ( 5726 bracket_kind == TokenType.L_BRACE 5727 and self._curr 5728 and self._curr.token_type == TokenType.VAR 5729 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5730 ): 5731 return self._parse_odbc_datetime_literal() 5732 5733 expressions = self._parse_csv( 5734 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5735 ) 5736 5737 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5738 self.raise_error("Expected ]") 5739 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5740 self.raise_error("Expected }") 5741 5742 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5743 if bracket_kind == TokenType.L_BRACE: 5744 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5745 elif not this: 5746 this = build_array_constructor( 5747 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5748 ) 5749 else: 5750 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5751 if constructor_type: 5752 return build_array_constructor( 5753 constructor_type, 5754 args=expressions, 5755 bracket_kind=bracket_kind, 5756 dialect=self.dialect, 5757 ) 5758 5759 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5760 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5761 5762 self._add_comments(this) 5763 return self._parse_bracket(this) 5764 5765 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5766 if self._match(TokenType.COLON): 5767 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5768 return this 5769 5770 def _parse_case(self) -> t.Optional[exp.Expression]: 5771 ifs = [] 5772 default = None 5773 5774 comments = self._prev_comments 5775 expression = self._parse_assignment() 5776 5777 while self._match(TokenType.WHEN): 5778 this = self._parse_assignment() 5779 self._match(TokenType.THEN) 5780 then = self._parse_assignment() 5781 ifs.append(self.expression(exp.If, this=this, true=then)) 5782 5783 if self._match(TokenType.ELSE): 5784 default = self._parse_assignment() 5785 5786 if not self._match(TokenType.END): 5787 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5788 default = exp.column("interval") 5789 else: 5790 self.raise_error("Expected END after CASE", self._prev) 5791 5792 return self.expression( 5793 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5794 ) 5795 5796 def _parse_if(self) -> t.Optional[exp.Expression]: 5797 if self._match(TokenType.L_PAREN): 5798 args = self._parse_csv(self._parse_assignment) 5799 this = self.validate_expression(exp.If.from_arg_list(args), args) 5800 self._match_r_paren() 5801 else: 5802 index = self._index - 1 5803 5804 if self.NO_PAREN_IF_COMMANDS and index == 0: 5805 return self._parse_as_command(self._prev) 5806 5807 condition = self._parse_assignment() 5808 5809 if not condition: 5810 self._retreat(index) 5811 return None 5812 5813 self._match(TokenType.THEN) 5814 true = self._parse_assignment() 5815 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5816 self._match(TokenType.END) 5817 this = self.expression(exp.If, this=condition, true=true, false=false) 5818 5819 return this 5820 5821 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5822 if not self._match_text_seq("VALUE", "FOR"): 5823 self._retreat(self._index - 1) 5824 return None 5825 5826 return self.expression( 5827 exp.NextValueFor, 5828 this=self._parse_column(), 5829 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5830 ) 5831 5832 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5833 this = self._parse_function() or self._parse_var_or_string(upper=True) 5834 5835 if self._match(TokenType.FROM): 5836 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5837 5838 if not self._match(TokenType.COMMA): 5839 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5840 5841 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5842 5843 def _parse_gap_fill(self) -> exp.GapFill: 5844 self._match(TokenType.TABLE) 5845 this = self._parse_table() 5846 5847 self._match(TokenType.COMMA) 5848 args = [this, *self._parse_csv(self._parse_lambda)] 5849 5850 gap_fill = exp.GapFill.from_arg_list(args) 5851 return self.validate_expression(gap_fill, args) 5852 5853 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5854 this = self._parse_assignment() 5855 5856 if not self._match(TokenType.ALIAS): 5857 if self._match(TokenType.COMMA): 5858 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5859 5860 self.raise_error("Expected AS after CAST") 5861 5862 fmt = None 5863 to = self._parse_types() 5864 5865 if self._match(TokenType.FORMAT): 5866 fmt_string = self._parse_string() 5867 fmt = self._parse_at_time_zone(fmt_string) 5868 5869 if not to: 5870 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5871 if to.this in exp.DataType.TEMPORAL_TYPES: 5872 this = self.expression( 5873 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5874 this=this, 5875 format=exp.Literal.string( 5876 format_time( 5877 fmt_string.this if fmt_string else "", 5878 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5879 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5880 ) 5881 ), 5882 safe=safe, 5883 ) 5884 5885 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5886 this.set("zone", fmt.args["zone"]) 5887 return this 5888 elif not to: 5889 self.raise_error("Expected TYPE after CAST") 5890 elif isinstance(to, exp.Identifier): 5891 to = exp.DataType.build(to.name, udt=True) 5892 elif to.this == exp.DataType.Type.CHAR: 5893 if self._match(TokenType.CHARACTER_SET): 5894 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5895 5896 return self.expression( 5897 exp.Cast if strict else exp.TryCast, 5898 this=this, 5899 to=to, 5900 format=fmt, 5901 safe=safe, 5902 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5903 ) 5904 5905 def _parse_string_agg(self) -> exp.Expression: 5906 if self._match(TokenType.DISTINCT): 5907 args: t.List[t.Optional[exp.Expression]] = [ 5908 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5909 ] 5910 if self._match(TokenType.COMMA): 5911 args.extend(self._parse_csv(self._parse_assignment)) 5912 else: 5913 args = self._parse_csv(self._parse_assignment) # type: ignore 5914 5915 index = self._index 5916 if not self._match(TokenType.R_PAREN) and args: 5917 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5918 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5919 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5920 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5921 5922 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5923 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5924 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5925 if not self._match_text_seq("WITHIN", "GROUP"): 5926 self._retreat(index) 5927 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5928 5929 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5930 order = self._parse_order(this=seq_get(args, 0)) 5931 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5932 5933 def _parse_convert( 5934 self, strict: bool, safe: t.Optional[bool] = None 5935 ) -> t.Optional[exp.Expression]: 5936 this = self._parse_bitwise() 5937 5938 if self._match(TokenType.USING): 5939 to: t.Optional[exp.Expression] = self.expression( 5940 exp.CharacterSet, this=self._parse_var() 5941 ) 5942 elif self._match(TokenType.COMMA): 5943 to = self._parse_types() 5944 else: 5945 to = None 5946 5947 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5948 5949 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5950 """ 5951 There are generally two variants of the DECODE function: 5952 5953 - DECODE(bin, charset) 5954 - DECODE(expression, search, result [, search, result] ... [, default]) 5955 5956 The second variant will always be parsed into a CASE expression. Note that NULL 5957 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5958 instead of relying on pattern matching. 5959 """ 5960 args = self._parse_csv(self._parse_assignment) 5961 5962 if len(args) < 3: 5963 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5964 5965 expression, *expressions = args 5966 if not expression: 5967 return None 5968 5969 ifs = [] 5970 for search, result in zip(expressions[::2], expressions[1::2]): 5971 if not search or not result: 5972 return None 5973 5974 if isinstance(search, exp.Literal): 5975 ifs.append( 5976 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5977 ) 5978 elif isinstance(search, exp.Null): 5979 ifs.append( 5980 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5981 ) 5982 else: 5983 cond = exp.or_( 5984 exp.EQ(this=expression.copy(), expression=search), 5985 exp.and_( 5986 exp.Is(this=expression.copy(), expression=exp.Null()), 5987 exp.Is(this=search.copy(), expression=exp.Null()), 5988 copy=False, 5989 ), 5990 copy=False, 5991 ) 5992 ifs.append(exp.If(this=cond, true=result)) 5993 5994 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5995 5996 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5997 self._match_text_seq("KEY") 5998 key = self._parse_column() 5999 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6000 self._match_text_seq("VALUE") 6001 value = self._parse_bitwise() 6002 6003 if not key and not value: 6004 return None 6005 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6006 6007 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6008 if not this or not self._match_text_seq("FORMAT", "JSON"): 6009 return this 6010 6011 return self.expression(exp.FormatJson, this=this) 6012 6013 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6014 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6015 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6016 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6017 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6018 else: 6019 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6020 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6021 6022 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6023 6024 if not empty and not error and not null: 6025 return None 6026 6027 return self.expression( 6028 exp.OnCondition, 6029 empty=empty, 6030 error=error, 6031 null=null, 6032 ) 6033 6034 def _parse_on_handling( 6035 self, on: str, *values: str 6036 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6037 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6038 for value in values: 6039 if self._match_text_seq(value, "ON", on): 6040 return f"{value} ON {on}" 6041 6042 index = self._index 6043 if self._match(TokenType.DEFAULT): 6044 default_value = self._parse_bitwise() 6045 if self._match_text_seq("ON", on): 6046 return default_value 6047 6048 self._retreat(index) 6049 6050 return None 6051 6052 @t.overload 6053 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6054 6055 @t.overload 6056 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6057 6058 def _parse_json_object(self, agg=False): 6059 star = self._parse_star() 6060 expressions = ( 6061 [star] 6062 if star 6063 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6064 ) 6065 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6066 6067 unique_keys = None 6068 if self._match_text_seq("WITH", "UNIQUE"): 6069 unique_keys = True 6070 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6071 unique_keys = False 6072 6073 self._match_text_seq("KEYS") 6074 6075 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6076 self._parse_type() 6077 ) 6078 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6079 6080 return self.expression( 6081 exp.JSONObjectAgg if agg else exp.JSONObject, 6082 expressions=expressions, 6083 null_handling=null_handling, 6084 unique_keys=unique_keys, 6085 return_type=return_type, 6086 encoding=encoding, 6087 ) 6088 6089 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6090 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6091 if not self._match_text_seq("NESTED"): 6092 this = self._parse_id_var() 6093 kind = self._parse_types(allow_identifiers=False) 6094 nested = None 6095 else: 6096 this = None 6097 kind = None 6098 nested = True 6099 6100 path = self._match_text_seq("PATH") and self._parse_string() 6101 nested_schema = nested and self._parse_json_schema() 6102 6103 return self.expression( 6104 exp.JSONColumnDef, 6105 this=this, 6106 kind=kind, 6107 path=path, 6108 nested_schema=nested_schema, 6109 ) 6110 6111 def _parse_json_schema(self) -> exp.JSONSchema: 6112 self._match_text_seq("COLUMNS") 6113 return self.expression( 6114 exp.JSONSchema, 6115 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6116 ) 6117 6118 def _parse_json_table(self) -> exp.JSONTable: 6119 this = self._parse_format_json(self._parse_bitwise()) 6120 path = self._match(TokenType.COMMA) and self._parse_string() 6121 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6122 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6123 schema = self._parse_json_schema() 6124 6125 return exp.JSONTable( 6126 this=this, 6127 schema=schema, 6128 path=path, 6129 error_handling=error_handling, 6130 empty_handling=empty_handling, 6131 ) 6132 6133 def _parse_match_against(self) -> exp.MatchAgainst: 6134 expressions = self._parse_csv(self._parse_column) 6135 6136 self._match_text_seq(")", "AGAINST", "(") 6137 6138 this = self._parse_string() 6139 6140 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6141 modifier = "IN NATURAL LANGUAGE MODE" 6142 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6143 modifier = f"{modifier} WITH QUERY EXPANSION" 6144 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6145 modifier = "IN BOOLEAN MODE" 6146 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6147 modifier = "WITH QUERY EXPANSION" 6148 else: 6149 modifier = None 6150 6151 return self.expression( 6152 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6153 ) 6154 6155 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6156 def _parse_open_json(self) -> exp.OpenJSON: 6157 this = self._parse_bitwise() 6158 path = self._match(TokenType.COMMA) and self._parse_string() 6159 6160 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6161 this = self._parse_field(any_token=True) 6162 kind = self._parse_types() 6163 path = self._parse_string() 6164 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6165 6166 return self.expression( 6167 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6168 ) 6169 6170 expressions = None 6171 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6172 self._match_l_paren() 6173 expressions = self._parse_csv(_parse_open_json_column_def) 6174 6175 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6176 6177 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6178 args = self._parse_csv(self._parse_bitwise) 6179 6180 if self._match(TokenType.IN): 6181 return self.expression( 6182 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6183 ) 6184 6185 if haystack_first: 6186 haystack = seq_get(args, 0) 6187 needle = seq_get(args, 1) 6188 else: 6189 needle = seq_get(args, 0) 6190 haystack = seq_get(args, 1) 6191 6192 return self.expression( 6193 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6194 ) 6195 6196 def _parse_predict(self) -> exp.Predict: 6197 self._match_text_seq("MODEL") 6198 this = self._parse_table() 6199 6200 self._match(TokenType.COMMA) 6201 self._match_text_seq("TABLE") 6202 6203 return self.expression( 6204 exp.Predict, 6205 this=this, 6206 expression=self._parse_table(), 6207 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6208 ) 6209 6210 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6211 args = self._parse_csv(self._parse_table) 6212 return exp.JoinHint(this=func_name.upper(), expressions=args) 6213 6214 def _parse_substring(self) -> exp.Substring: 6215 # Postgres supports the form: substring(string [from int] [for int]) 6216 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6217 6218 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6219 6220 if self._match(TokenType.FROM): 6221 args.append(self._parse_bitwise()) 6222 if self._match(TokenType.FOR): 6223 if len(args) == 1: 6224 args.append(exp.Literal.number(1)) 6225 args.append(self._parse_bitwise()) 6226 6227 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6228 6229 def _parse_trim(self) -> exp.Trim: 6230 # https://www.w3resource.com/sql/character-functions/trim.php 6231 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6232 6233 position = None 6234 collation = None 6235 expression = None 6236 6237 if self._match_texts(self.TRIM_TYPES): 6238 position = self._prev.text.upper() 6239 6240 this = self._parse_bitwise() 6241 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6242 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6243 expression = self._parse_bitwise() 6244 6245 if invert_order: 6246 this, expression = expression, this 6247 6248 if self._match(TokenType.COLLATE): 6249 collation = self._parse_bitwise() 6250 6251 return self.expression( 6252 exp.Trim, this=this, position=position, expression=expression, collation=collation 6253 ) 6254 6255 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6256 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6257 6258 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6259 return self._parse_window(self._parse_id_var(), alias=True) 6260 6261 def _parse_respect_or_ignore_nulls( 6262 self, this: t.Optional[exp.Expression] 6263 ) -> t.Optional[exp.Expression]: 6264 if self._match_text_seq("IGNORE", "NULLS"): 6265 return self.expression(exp.IgnoreNulls, this=this) 6266 if self._match_text_seq("RESPECT", "NULLS"): 6267 return self.expression(exp.RespectNulls, this=this) 6268 return this 6269 6270 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6271 if self._match(TokenType.HAVING): 6272 self._match_texts(("MAX", "MIN")) 6273 max = self._prev.text.upper() != "MIN" 6274 return self.expression( 6275 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6276 ) 6277 6278 return this 6279 6280 def _parse_window( 6281 self, this: t.Optional[exp.Expression], alias: bool = False 6282 ) -> t.Optional[exp.Expression]: 6283 func = this 6284 comments = func.comments if isinstance(func, exp.Expression) else None 6285 6286 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6287 self._match(TokenType.WHERE) 6288 this = self.expression( 6289 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6290 ) 6291 self._match_r_paren() 6292 6293 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6294 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6295 if self._match_text_seq("WITHIN", "GROUP"): 6296 order = self._parse_wrapped(self._parse_order) 6297 this = self.expression(exp.WithinGroup, this=this, expression=order) 6298 6299 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6300 # Some dialects choose to implement and some do not. 6301 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6302 6303 # There is some code above in _parse_lambda that handles 6304 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6305 6306 # The below changes handle 6307 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6308 6309 # Oracle allows both formats 6310 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6311 # and Snowflake chose to do the same for familiarity 6312 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6313 if isinstance(this, exp.AggFunc): 6314 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6315 6316 if ignore_respect and ignore_respect is not this: 6317 ignore_respect.replace(ignore_respect.this) 6318 this = self.expression(ignore_respect.__class__, this=this) 6319 6320 this = self._parse_respect_or_ignore_nulls(this) 6321 6322 # bigquery select from window x AS (partition by ...) 6323 if alias: 6324 over = None 6325 self._match(TokenType.ALIAS) 6326 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6327 return this 6328 else: 6329 over = self._prev.text.upper() 6330 6331 if comments and isinstance(func, exp.Expression): 6332 func.pop_comments() 6333 6334 if not self._match(TokenType.L_PAREN): 6335 return self.expression( 6336 exp.Window, 6337 comments=comments, 6338 this=this, 6339 alias=self._parse_id_var(False), 6340 over=over, 6341 ) 6342 6343 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6344 6345 first = self._match(TokenType.FIRST) 6346 if self._match_text_seq("LAST"): 6347 first = False 6348 6349 partition, order = self._parse_partition_and_order() 6350 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6351 6352 if kind: 6353 self._match(TokenType.BETWEEN) 6354 start = self._parse_window_spec() 6355 self._match(TokenType.AND) 6356 end = self._parse_window_spec() 6357 6358 spec = self.expression( 6359 exp.WindowSpec, 6360 kind=kind, 6361 start=start["value"], 6362 start_side=start["side"], 6363 end=end["value"], 6364 end_side=end["side"], 6365 ) 6366 else: 6367 spec = None 6368 6369 self._match_r_paren() 6370 6371 window = self.expression( 6372 exp.Window, 6373 comments=comments, 6374 this=this, 6375 partition_by=partition, 6376 order=order, 6377 spec=spec, 6378 alias=window_alias, 6379 over=over, 6380 first=first, 6381 ) 6382 6383 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6384 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6385 return self._parse_window(window, alias=alias) 6386 6387 return window 6388 6389 def _parse_partition_and_order( 6390 self, 6391 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6392 return self._parse_partition_by(), self._parse_order() 6393 6394 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6395 self._match(TokenType.BETWEEN) 6396 6397 return { 6398 "value": ( 6399 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6400 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6401 or self._parse_bitwise() 6402 ), 6403 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6404 } 6405 6406 def _parse_alias( 6407 self, this: t.Optional[exp.Expression], explicit: bool = False 6408 ) -> t.Optional[exp.Expression]: 6409 any_token = self._match(TokenType.ALIAS) 6410 comments = self._prev_comments or [] 6411 6412 if explicit and not any_token: 6413 return this 6414 6415 if self._match(TokenType.L_PAREN): 6416 aliases = self.expression( 6417 exp.Aliases, 6418 comments=comments, 6419 this=this, 6420 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6421 ) 6422 self._match_r_paren(aliases) 6423 return aliases 6424 6425 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6426 self.STRING_ALIASES and self._parse_string_as_identifier() 6427 ) 6428 6429 if alias: 6430 comments.extend(alias.pop_comments()) 6431 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6432 column = this.this 6433 6434 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6435 if not this.comments and column and column.comments: 6436 this.comments = column.pop_comments() 6437 6438 return this 6439 6440 def _parse_id_var( 6441 self, 6442 any_token: bool = True, 6443 tokens: t.Optional[t.Collection[TokenType]] = None, 6444 ) -> t.Optional[exp.Expression]: 6445 expression = self._parse_identifier() 6446 if not expression and ( 6447 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6448 ): 6449 quoted = self._prev.token_type == TokenType.STRING 6450 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6451 6452 return expression 6453 6454 def _parse_string(self) -> t.Optional[exp.Expression]: 6455 if self._match_set(self.STRING_PARSERS): 6456 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6457 return self._parse_placeholder() 6458 6459 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6460 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6461 6462 def _parse_number(self) -> t.Optional[exp.Expression]: 6463 if self._match_set(self.NUMERIC_PARSERS): 6464 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6465 return self._parse_placeholder() 6466 6467 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6468 if self._match(TokenType.IDENTIFIER): 6469 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6470 return self._parse_placeholder() 6471 6472 def _parse_var( 6473 self, 6474 any_token: bool = False, 6475 tokens: t.Optional[t.Collection[TokenType]] = None, 6476 upper: bool = False, 6477 ) -> t.Optional[exp.Expression]: 6478 if ( 6479 (any_token and self._advance_any()) 6480 or self._match(TokenType.VAR) 6481 or (self._match_set(tokens) if tokens else False) 6482 ): 6483 return self.expression( 6484 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6485 ) 6486 return self._parse_placeholder() 6487 6488 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6489 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6490 self._advance() 6491 return self._prev 6492 return None 6493 6494 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6495 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6496 6497 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6498 return self._parse_primary() or self._parse_var(any_token=True) 6499 6500 def _parse_null(self) -> t.Optional[exp.Expression]: 6501 if self._match_set(self.NULL_TOKENS): 6502 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6503 return self._parse_placeholder() 6504 6505 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6506 if self._match(TokenType.TRUE): 6507 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6508 if self._match(TokenType.FALSE): 6509 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6510 return self._parse_placeholder() 6511 6512 def _parse_star(self) -> t.Optional[exp.Expression]: 6513 if self._match(TokenType.STAR): 6514 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6515 return self._parse_placeholder() 6516 6517 def _parse_parameter(self) -> exp.Parameter: 6518 this = self._parse_identifier() or self._parse_primary_or_var() 6519 return self.expression(exp.Parameter, this=this) 6520 6521 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6522 if self._match_set(self.PLACEHOLDER_PARSERS): 6523 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6524 if placeholder: 6525 return placeholder 6526 self._advance(-1) 6527 return None 6528 6529 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6530 if not self._match_texts(keywords): 6531 return None 6532 if self._match(TokenType.L_PAREN, advance=False): 6533 return self._parse_wrapped_csv(self._parse_expression) 6534 6535 expression = self._parse_expression() 6536 return [expression] if expression else None 6537 6538 def _parse_csv( 6539 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6540 ) -> t.List[exp.Expression]: 6541 parse_result = parse_method() 6542 items = [parse_result] if parse_result is not None else [] 6543 6544 while self._match(sep): 6545 self._add_comments(parse_result) 6546 parse_result = parse_method() 6547 if parse_result is not None: 6548 items.append(parse_result) 6549 6550 return items 6551 6552 def _parse_tokens( 6553 self, parse_method: t.Callable, expressions: t.Dict 6554 ) -> t.Optional[exp.Expression]: 6555 this = parse_method() 6556 6557 while self._match_set(expressions): 6558 this = self.expression( 6559 expressions[self._prev.token_type], 6560 this=this, 6561 comments=self._prev_comments, 6562 expression=parse_method(), 6563 ) 6564 6565 return this 6566 6567 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6568 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6569 6570 def _parse_wrapped_csv( 6571 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6572 ) -> t.List[exp.Expression]: 6573 return self._parse_wrapped( 6574 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6575 ) 6576 6577 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6578 wrapped = self._match(TokenType.L_PAREN) 6579 if not wrapped and not optional: 6580 self.raise_error("Expecting (") 6581 parse_result = parse_method() 6582 if wrapped: 6583 self._match_r_paren() 6584 return parse_result 6585 6586 def _parse_expressions(self) -> t.List[exp.Expression]: 6587 return self._parse_csv(self._parse_expression) 6588 6589 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6590 return self._parse_select() or self._parse_set_operations( 6591 self._parse_expression() if alias else self._parse_assignment() 6592 ) 6593 6594 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6595 return self._parse_query_modifiers( 6596 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6597 ) 6598 6599 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6600 this = None 6601 if self._match_texts(self.TRANSACTION_KIND): 6602 this = self._prev.text 6603 6604 self._match_texts(("TRANSACTION", "WORK")) 6605 6606 modes = [] 6607 while True: 6608 mode = [] 6609 while self._match(TokenType.VAR): 6610 mode.append(self._prev.text) 6611 6612 if mode: 6613 modes.append(" ".join(mode)) 6614 if not self._match(TokenType.COMMA): 6615 break 6616 6617 return self.expression(exp.Transaction, this=this, modes=modes) 6618 6619 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6620 chain = None 6621 savepoint = None 6622 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6623 6624 self._match_texts(("TRANSACTION", "WORK")) 6625 6626 if self._match_text_seq("TO"): 6627 self._match_text_seq("SAVEPOINT") 6628 savepoint = self._parse_id_var() 6629 6630 if self._match(TokenType.AND): 6631 chain = not self._match_text_seq("NO") 6632 self._match_text_seq("CHAIN") 6633 6634 if is_rollback: 6635 return self.expression(exp.Rollback, savepoint=savepoint) 6636 6637 return self.expression(exp.Commit, chain=chain) 6638 6639 def _parse_refresh(self) -> exp.Refresh: 6640 self._match(TokenType.TABLE) 6641 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6642 6643 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6644 if not self._match_text_seq("ADD"): 6645 return None 6646 6647 self._match(TokenType.COLUMN) 6648 exists_column = self._parse_exists(not_=True) 6649 expression = self._parse_field_def() 6650 6651 if expression: 6652 expression.set("exists", exists_column) 6653 6654 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6655 if self._match_texts(("FIRST", "AFTER")): 6656 position = self._prev.text 6657 column_position = self.expression( 6658 exp.ColumnPosition, this=self._parse_column(), position=position 6659 ) 6660 expression.set("position", column_position) 6661 6662 return expression 6663 6664 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6665 drop = self._match(TokenType.DROP) and self._parse_drop() 6666 if drop and not isinstance(drop, exp.Command): 6667 drop.set("kind", drop.args.get("kind", "COLUMN")) 6668 return drop 6669 6670 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6671 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6672 return self.expression( 6673 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6674 ) 6675 6676 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6677 index = self._index - 1 6678 6679 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6680 return self._parse_csv( 6681 lambda: self.expression( 6682 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6683 ) 6684 ) 6685 6686 self._retreat(index) 6687 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6688 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6689 6690 if self._match_text_seq("ADD", "COLUMNS"): 6691 schema = self._parse_schema() 6692 if schema: 6693 return [schema] 6694 return [] 6695 6696 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6697 6698 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6699 if self._match_texts(self.ALTER_ALTER_PARSERS): 6700 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6701 6702 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6703 # keyword after ALTER we default to parsing this statement 6704 self._match(TokenType.COLUMN) 6705 column = self._parse_field(any_token=True) 6706 6707 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6708 return self.expression(exp.AlterColumn, this=column, drop=True) 6709 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6710 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6711 if self._match(TokenType.COMMENT): 6712 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6713 if self._match_text_seq("DROP", "NOT", "NULL"): 6714 return self.expression( 6715 exp.AlterColumn, 6716 this=column, 6717 drop=True, 6718 allow_null=True, 6719 ) 6720 if self._match_text_seq("SET", "NOT", "NULL"): 6721 return self.expression( 6722 exp.AlterColumn, 6723 this=column, 6724 allow_null=False, 6725 ) 6726 self._match_text_seq("SET", "DATA") 6727 self._match_text_seq("TYPE") 6728 return self.expression( 6729 exp.AlterColumn, 6730 this=column, 6731 dtype=self._parse_types(), 6732 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6733 using=self._match(TokenType.USING) and self._parse_assignment(), 6734 ) 6735 6736 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6737 if self._match_texts(("ALL", "EVEN", "AUTO")): 6738 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6739 6740 self._match_text_seq("KEY", "DISTKEY") 6741 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6742 6743 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6744 if compound: 6745 self._match_text_seq("SORTKEY") 6746 6747 if self._match(TokenType.L_PAREN, advance=False): 6748 return self.expression( 6749 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6750 ) 6751 6752 self._match_texts(("AUTO", "NONE")) 6753 return self.expression( 6754 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6755 ) 6756 6757 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6758 index = self._index - 1 6759 6760 partition_exists = self._parse_exists() 6761 if self._match(TokenType.PARTITION, advance=False): 6762 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6763 6764 self._retreat(index) 6765 return self._parse_csv(self._parse_drop_column) 6766 6767 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6768 if self._match(TokenType.COLUMN): 6769 exists = self._parse_exists() 6770 old_column = self._parse_column() 6771 to = self._match_text_seq("TO") 6772 new_column = self._parse_column() 6773 6774 if old_column is None or to is None or new_column is None: 6775 return None 6776 6777 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6778 6779 self._match_text_seq("TO") 6780 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6781 6782 def _parse_alter_table_set(self) -> exp.AlterSet: 6783 alter_set = self.expression(exp.AlterSet) 6784 6785 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6786 "TABLE", "PROPERTIES" 6787 ): 6788 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6789 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6790 alter_set.set("expressions", [self._parse_assignment()]) 6791 elif self._match_texts(("LOGGED", "UNLOGGED")): 6792 alter_set.set("option", exp.var(self._prev.text.upper())) 6793 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6794 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6795 elif self._match_text_seq("LOCATION"): 6796 alter_set.set("location", self._parse_field()) 6797 elif self._match_text_seq("ACCESS", "METHOD"): 6798 alter_set.set("access_method", self._parse_field()) 6799 elif self._match_text_seq("TABLESPACE"): 6800 alter_set.set("tablespace", self._parse_field()) 6801 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6802 alter_set.set("file_format", [self._parse_field()]) 6803 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6804 alter_set.set("file_format", self._parse_wrapped_options()) 6805 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6806 alter_set.set("copy_options", self._parse_wrapped_options()) 6807 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6808 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6809 else: 6810 if self._match_text_seq("SERDE"): 6811 alter_set.set("serde", self._parse_field()) 6812 6813 alter_set.set("expressions", [self._parse_properties()]) 6814 6815 return alter_set 6816 6817 def _parse_alter(self) -> exp.Alter | exp.Command: 6818 start = self._prev 6819 6820 alter_token = self._match_set(self.ALTERABLES) and self._prev 6821 if not alter_token: 6822 return self._parse_as_command(start) 6823 6824 exists = self._parse_exists() 6825 only = self._match_text_seq("ONLY") 6826 this = self._parse_table(schema=True) 6827 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6828 6829 if self._next: 6830 self._advance() 6831 6832 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6833 if parser: 6834 actions = ensure_list(parser(self)) 6835 not_valid = self._match_text_seq("NOT", "VALID") 6836 options = self._parse_csv(self._parse_property) 6837 6838 if not self._curr and actions: 6839 return self.expression( 6840 exp.Alter, 6841 this=this, 6842 kind=alter_token.text.upper(), 6843 exists=exists, 6844 actions=actions, 6845 only=only, 6846 options=options, 6847 cluster=cluster, 6848 not_valid=not_valid, 6849 ) 6850 6851 return self._parse_as_command(start) 6852 6853 def _parse_merge(self) -> exp.Merge: 6854 self._match(TokenType.INTO) 6855 target = self._parse_table() 6856 6857 if target and self._match(TokenType.ALIAS, advance=False): 6858 target.set("alias", self._parse_table_alias()) 6859 6860 self._match(TokenType.USING) 6861 using = self._parse_table() 6862 6863 self._match(TokenType.ON) 6864 on = self._parse_assignment() 6865 6866 return self.expression( 6867 exp.Merge, 6868 this=target, 6869 using=using, 6870 on=on, 6871 expressions=self._parse_when_matched(), 6872 returning=self._parse_returning(), 6873 ) 6874 6875 def _parse_when_matched(self) -> t.List[exp.When]: 6876 whens = [] 6877 6878 while self._match(TokenType.WHEN): 6879 matched = not self._match(TokenType.NOT) 6880 self._match_text_seq("MATCHED") 6881 source = ( 6882 False 6883 if self._match_text_seq("BY", "TARGET") 6884 else self._match_text_seq("BY", "SOURCE") 6885 ) 6886 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6887 6888 self._match(TokenType.THEN) 6889 6890 if self._match(TokenType.INSERT): 6891 this = self._parse_star() 6892 if this: 6893 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6894 else: 6895 then = self.expression( 6896 exp.Insert, 6897 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6898 expression=self._match_text_seq("VALUES") and self._parse_value(), 6899 ) 6900 elif self._match(TokenType.UPDATE): 6901 expressions = self._parse_star() 6902 if expressions: 6903 then = self.expression(exp.Update, expressions=expressions) 6904 else: 6905 then = self.expression( 6906 exp.Update, 6907 expressions=self._match(TokenType.SET) 6908 and self._parse_csv(self._parse_equality), 6909 ) 6910 elif self._match(TokenType.DELETE): 6911 then = self.expression(exp.Var, this=self._prev.text) 6912 else: 6913 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6914 6915 whens.append( 6916 self.expression( 6917 exp.When, 6918 matched=matched, 6919 source=source, 6920 condition=condition, 6921 then=then, 6922 ) 6923 ) 6924 return whens 6925 6926 def _parse_show(self) -> t.Optional[exp.Expression]: 6927 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6928 if parser: 6929 return parser(self) 6930 return self._parse_as_command(self._prev) 6931 6932 def _parse_set_item_assignment( 6933 self, kind: t.Optional[str] = None 6934 ) -> t.Optional[exp.Expression]: 6935 index = self._index 6936 6937 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6938 return self._parse_set_transaction(global_=kind == "GLOBAL") 6939 6940 left = self._parse_primary() or self._parse_column() 6941 assignment_delimiter = self._match_texts(("=", "TO")) 6942 6943 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6944 self._retreat(index) 6945 return None 6946 6947 right = self._parse_statement() or self._parse_id_var() 6948 if isinstance(right, (exp.Column, exp.Identifier)): 6949 right = exp.var(right.name) 6950 6951 this = self.expression(exp.EQ, this=left, expression=right) 6952 return self.expression(exp.SetItem, this=this, kind=kind) 6953 6954 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6955 self._match_text_seq("TRANSACTION") 6956 characteristics = self._parse_csv( 6957 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6958 ) 6959 return self.expression( 6960 exp.SetItem, 6961 expressions=characteristics, 6962 kind="TRANSACTION", 6963 **{"global": global_}, # type: ignore 6964 ) 6965 6966 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6967 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6968 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6969 6970 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6971 index = self._index 6972 set_ = self.expression( 6973 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6974 ) 6975 6976 if self._curr: 6977 self._retreat(index) 6978 return self._parse_as_command(self._prev) 6979 6980 return set_ 6981 6982 def _parse_var_from_options( 6983 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6984 ) -> t.Optional[exp.Var]: 6985 start = self._curr 6986 if not start: 6987 return None 6988 6989 option = start.text.upper() 6990 continuations = options.get(option) 6991 6992 index = self._index 6993 self._advance() 6994 for keywords in continuations or []: 6995 if isinstance(keywords, str): 6996 keywords = (keywords,) 6997 6998 if self._match_text_seq(*keywords): 6999 option = f"{option} {' '.join(keywords)}" 7000 break 7001 else: 7002 if continuations or continuations is None: 7003 if raise_unmatched: 7004 self.raise_error(f"Unknown option {option}") 7005 7006 self._retreat(index) 7007 return None 7008 7009 return exp.var(option) 7010 7011 def _parse_as_command(self, start: Token) -> exp.Command: 7012 while self._curr: 7013 self._advance() 7014 text = self._find_sql(start, self._prev) 7015 size = len(start.text) 7016 self._warn_unsupported() 7017 return exp.Command(this=text[:size], expression=text[size:]) 7018 7019 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7020 settings = [] 7021 7022 self._match_l_paren() 7023 kind = self._parse_id_var() 7024 7025 if self._match(TokenType.L_PAREN): 7026 while True: 7027 key = self._parse_id_var() 7028 value = self._parse_primary() 7029 7030 if not key and value is None: 7031 break 7032 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7033 self._match(TokenType.R_PAREN) 7034 7035 self._match_r_paren() 7036 7037 return self.expression( 7038 exp.DictProperty, 7039 this=this, 7040 kind=kind.this if kind else None, 7041 settings=settings, 7042 ) 7043 7044 def _parse_dict_range(self, this: str) -> exp.DictRange: 7045 self._match_l_paren() 7046 has_min = self._match_text_seq("MIN") 7047 if has_min: 7048 min = self._parse_var() or self._parse_primary() 7049 self._match_text_seq("MAX") 7050 max = self._parse_var() or self._parse_primary() 7051 else: 7052 max = self._parse_var() or self._parse_primary() 7053 min = exp.Literal.number(0) 7054 self._match_r_paren() 7055 return self.expression(exp.DictRange, this=this, min=min, max=max) 7056 7057 def _parse_comprehension( 7058 self, this: t.Optional[exp.Expression] 7059 ) -> t.Optional[exp.Comprehension]: 7060 index = self._index 7061 expression = self._parse_column() 7062 if not self._match(TokenType.IN): 7063 self._retreat(index - 1) 7064 return None 7065 iterator = self._parse_column() 7066 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7067 return self.expression( 7068 exp.Comprehension, 7069 this=this, 7070 expression=expression, 7071 iterator=iterator, 7072 condition=condition, 7073 ) 7074 7075 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7076 if self._match(TokenType.HEREDOC_STRING): 7077 return self.expression(exp.Heredoc, this=self._prev.text) 7078 7079 if not self._match_text_seq("$"): 7080 return None 7081 7082 tags = ["$"] 7083 tag_text = None 7084 7085 if self._is_connected(): 7086 self._advance() 7087 tags.append(self._prev.text.upper()) 7088 else: 7089 self.raise_error("No closing $ found") 7090 7091 if tags[-1] != "$": 7092 if self._is_connected() and self._match_text_seq("$"): 7093 tag_text = tags[-1] 7094 tags.append("$") 7095 else: 7096 self.raise_error("No closing $ found") 7097 7098 heredoc_start = self._curr 7099 7100 while self._curr: 7101 if self._match_text_seq(*tags, advance=False): 7102 this = self._find_sql(heredoc_start, self._prev) 7103 self._advance(len(tags)) 7104 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7105 7106 self._advance() 7107 7108 self.raise_error(f"No closing {''.join(tags)} found") 7109 return None 7110 7111 def _find_parser( 7112 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7113 ) -> t.Optional[t.Callable]: 7114 if not self._curr: 7115 return None 7116 7117 index = self._index 7118 this = [] 7119 while True: 7120 # The current token might be multiple words 7121 curr = self._curr.text.upper() 7122 key = curr.split(" ") 7123 this.append(curr) 7124 7125 self._advance() 7126 result, trie = in_trie(trie, key) 7127 if result == TrieResult.FAILED: 7128 break 7129 7130 if result == TrieResult.EXISTS: 7131 subparser = parsers[" ".join(this)] 7132 return subparser 7133 7134 self._retreat(index) 7135 return None 7136 7137 def _match(self, token_type, advance=True, expression=None): 7138 if not self._curr: 7139 return None 7140 7141 if self._curr.token_type == token_type: 7142 if advance: 7143 self._advance() 7144 self._add_comments(expression) 7145 return True 7146 7147 return None 7148 7149 def _match_set(self, types, advance=True): 7150 if not self._curr: 7151 return None 7152 7153 if self._curr.token_type in types: 7154 if advance: 7155 self._advance() 7156 return True 7157 7158 return None 7159 7160 def _match_pair(self, token_type_a, token_type_b, advance=True): 7161 if not self._curr or not self._next: 7162 return None 7163 7164 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7165 if advance: 7166 self._advance(2) 7167 return True 7168 7169 return None 7170 7171 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7172 if not self._match(TokenType.L_PAREN, expression=expression): 7173 self.raise_error("Expecting (") 7174 7175 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7176 if not self._match(TokenType.R_PAREN, expression=expression): 7177 self.raise_error("Expecting )") 7178 7179 def _match_texts(self, texts, advance=True): 7180 if ( 7181 self._curr 7182 and self._curr.token_type != TokenType.STRING 7183 and self._curr.text.upper() in texts 7184 ): 7185 if advance: 7186 self._advance() 7187 return True 7188 return None 7189 7190 def _match_text_seq(self, *texts, advance=True): 7191 index = self._index 7192 for text in texts: 7193 if ( 7194 self._curr 7195 and self._curr.token_type != TokenType.STRING 7196 and self._curr.text.upper() == text 7197 ): 7198 self._advance() 7199 else: 7200 self._retreat(index) 7201 return None 7202 7203 if not advance: 7204 self._retreat(index) 7205 7206 return True 7207 7208 def _replace_lambda( 7209 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7210 ) -> t.Optional[exp.Expression]: 7211 if not node: 7212 return node 7213 7214 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7215 7216 for column in node.find_all(exp.Column): 7217 typ = lambda_types.get(column.parts[0].name) 7218 if typ is not None: 7219 dot_or_id = column.to_dot() if column.table else column.this 7220 7221 if typ: 7222 dot_or_id = self.expression( 7223 exp.Cast, 7224 this=dot_or_id, 7225 to=typ, 7226 ) 7227 7228 parent = column.parent 7229 7230 while isinstance(parent, exp.Dot): 7231 if not isinstance(parent.parent, exp.Dot): 7232 parent.replace(dot_or_id) 7233 break 7234 parent = parent.parent 7235 else: 7236 if column is node: 7237 node = dot_or_id 7238 else: 7239 column.replace(dot_or_id) 7240 return node 7241 7242 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7243 start = self._prev 7244 7245 # Not to be confused with TRUNCATE(number, decimals) function call 7246 if self._match(TokenType.L_PAREN): 7247 self._retreat(self._index - 2) 7248 return self._parse_function() 7249 7250 # Clickhouse supports TRUNCATE DATABASE as well 7251 is_database = self._match(TokenType.DATABASE) 7252 7253 self._match(TokenType.TABLE) 7254 7255 exists = self._parse_exists(not_=False) 7256 7257 expressions = self._parse_csv( 7258 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7259 ) 7260 7261 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7262 7263 if self._match_text_seq("RESTART", "IDENTITY"): 7264 identity = "RESTART" 7265 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7266 identity = "CONTINUE" 7267 else: 7268 identity = None 7269 7270 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7271 option = self._prev.text 7272 else: 7273 option = None 7274 7275 partition = self._parse_partition() 7276 7277 # Fallback case 7278 if self._curr: 7279 return self._parse_as_command(start) 7280 7281 return self.expression( 7282 exp.TruncateTable, 7283 expressions=expressions, 7284 is_database=is_database, 7285 exists=exists, 7286 cluster=cluster, 7287 identity=identity, 7288 option=option, 7289 partition=partition, 7290 ) 7291 7292 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7293 this = self._parse_ordered(self._parse_opclass) 7294 7295 if not self._match(TokenType.WITH): 7296 return this 7297 7298 op = self._parse_var(any_token=True) 7299 7300 return self.expression(exp.WithOperator, this=this, op=op) 7301 7302 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7303 self._match(TokenType.EQ) 7304 self._match(TokenType.L_PAREN) 7305 7306 opts: t.List[t.Optional[exp.Expression]] = [] 7307 while self._curr and not self._match(TokenType.R_PAREN): 7308 if self._match_text_seq("FORMAT_NAME", "="): 7309 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7310 # so we parse it separately to use _parse_field() 7311 prop = self.expression( 7312 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7313 ) 7314 opts.append(prop) 7315 else: 7316 opts.append(self._parse_property()) 7317 7318 self._match(TokenType.COMMA) 7319 7320 return opts 7321 7322 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7323 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7324 7325 options = [] 7326 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7327 option = self._parse_var(any_token=True) 7328 prev = self._prev.text.upper() 7329 7330 # Different dialects might separate options and values by white space, "=" and "AS" 7331 self._match(TokenType.EQ) 7332 self._match(TokenType.ALIAS) 7333 7334 param = self.expression(exp.CopyParameter, this=option) 7335 7336 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7337 TokenType.L_PAREN, advance=False 7338 ): 7339 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7340 param.set("expressions", self._parse_wrapped_options()) 7341 elif prev == "FILE_FORMAT": 7342 # T-SQL's external file format case 7343 param.set("expression", self._parse_field()) 7344 else: 7345 param.set("expression", self._parse_unquoted_field()) 7346 7347 options.append(param) 7348 self._match(sep) 7349 7350 return options 7351 7352 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7353 expr = self.expression(exp.Credentials) 7354 7355 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7356 expr.set("storage", self._parse_field()) 7357 if self._match_text_seq("CREDENTIALS"): 7358 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7359 creds = ( 7360 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7361 ) 7362 expr.set("credentials", creds) 7363 if self._match_text_seq("ENCRYPTION"): 7364 expr.set("encryption", self._parse_wrapped_options()) 7365 if self._match_text_seq("IAM_ROLE"): 7366 expr.set("iam_role", self._parse_field()) 7367 if self._match_text_seq("REGION"): 7368 expr.set("region", self._parse_field()) 7369 7370 return expr 7371 7372 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7373 return self._parse_field() 7374 7375 def _parse_copy(self) -> exp.Copy | exp.Command: 7376 start = self._prev 7377 7378 self._match(TokenType.INTO) 7379 7380 this = ( 7381 self._parse_select(nested=True, parse_subquery_alias=False) 7382 if self._match(TokenType.L_PAREN, advance=False) 7383 else self._parse_table(schema=True) 7384 ) 7385 7386 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7387 7388 files = self._parse_csv(self._parse_file_location) 7389 credentials = self._parse_credentials() 7390 7391 self._match_text_seq("WITH") 7392 7393 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7394 7395 # Fallback case 7396 if self._curr: 7397 return self._parse_as_command(start) 7398 7399 return self.expression( 7400 exp.Copy, 7401 this=this, 7402 kind=kind, 7403 credentials=credentials, 7404 files=files, 7405 params=params, 7406 ) 7407 7408 def _parse_normalize(self) -> exp.Normalize: 7409 return self.expression( 7410 exp.Normalize, 7411 this=self._parse_bitwise(), 7412 form=self._match(TokenType.COMMA) and self._parse_var(), 7413 ) 7414 7415 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7416 if self._match_text_seq("COLUMNS", "(", advance=False): 7417 this = self._parse_function() 7418 if isinstance(this, exp.Columns): 7419 this.set("unpack", True) 7420 return this 7421 7422 return self.expression( 7423 exp.Star, 7424 **{ # type: ignore 7425 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7426 "replace": self._parse_star_op("REPLACE"), 7427 "rename": self._parse_star_op("RENAME"), 7428 }, 7429 ) 7430 7431 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7432 privilege_parts = [] 7433 7434 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7435 # (end of privilege list) or L_PAREN (start of column list) are met 7436 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7437 privilege_parts.append(self._curr.text.upper()) 7438 self._advance() 7439 7440 this = exp.var(" ".join(privilege_parts)) 7441 expressions = ( 7442 self._parse_wrapped_csv(self._parse_column) 7443 if self._match(TokenType.L_PAREN, advance=False) 7444 else None 7445 ) 7446 7447 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7448 7449 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7450 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7451 principal = self._parse_id_var() 7452 7453 if not principal: 7454 return None 7455 7456 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7457 7458 def _parse_grant(self) -> exp.Grant | exp.Command: 7459 start = self._prev 7460 7461 privileges = self._parse_csv(self._parse_grant_privilege) 7462 7463 self._match(TokenType.ON) 7464 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7465 7466 # Attempt to parse the securable e.g. MySQL allows names 7467 # such as "foo.*", "*.*" which are not easily parseable yet 7468 securable = self._try_parse(self._parse_table_parts) 7469 7470 if not securable or not self._match_text_seq("TO"): 7471 return self._parse_as_command(start) 7472 7473 principals = self._parse_csv(self._parse_grant_principal) 7474 7475 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7476 7477 if self._curr: 7478 return self._parse_as_command(start) 7479 7480 return self.expression( 7481 exp.Grant, 7482 privileges=privileges, 7483 kind=kind, 7484 securable=securable, 7485 principals=principals, 7486 grant_option=grant_option, 7487 ) 7488 7489 def _parse_overlay(self) -> exp.Overlay: 7490 return self.expression( 7491 exp.Overlay, 7492 **{ # type: ignore 7493 "this": self._parse_bitwise(), 7494 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7495 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7496 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7497 }, 7498 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME64, 337 TokenType.DATE, 338 TokenType.DATE32, 339 TokenType.INT4RANGE, 340 TokenType.INT4MULTIRANGE, 341 TokenType.INT8RANGE, 342 TokenType.INT8MULTIRANGE, 343 TokenType.NUMRANGE, 344 TokenType.NUMMULTIRANGE, 345 TokenType.TSRANGE, 346 TokenType.TSMULTIRANGE, 347 TokenType.TSTZRANGE, 348 TokenType.TSTZMULTIRANGE, 349 TokenType.DATERANGE, 350 TokenType.DATEMULTIRANGE, 351 TokenType.DECIMAL, 352 TokenType.DECIMAL32, 353 TokenType.DECIMAL64, 354 TokenType.DECIMAL128, 355 TokenType.UDECIMAL, 356 TokenType.BIGDECIMAL, 357 TokenType.UUID, 358 TokenType.GEOGRAPHY, 359 TokenType.GEOMETRY, 360 TokenType.HLLSKETCH, 361 TokenType.HSTORE, 362 TokenType.PSEUDO_TYPE, 363 TokenType.SUPER, 364 TokenType.SERIAL, 365 TokenType.SMALLSERIAL, 366 TokenType.BIGSERIAL, 367 TokenType.XML, 368 TokenType.YEAR, 369 TokenType.UNIQUEIDENTIFIER, 370 TokenType.USERDEFINED, 371 TokenType.MONEY, 372 TokenType.SMALLMONEY, 373 TokenType.ROWVERSION, 374 TokenType.IMAGE, 375 TokenType.VARIANT, 376 TokenType.VECTOR, 377 TokenType.OBJECT, 378 TokenType.OBJECT_IDENTIFIER, 379 TokenType.INET, 380 TokenType.IPADDRESS, 381 TokenType.IPPREFIX, 382 TokenType.IPV4, 383 TokenType.IPV6, 384 TokenType.UNKNOWN, 385 TokenType.NULL, 386 TokenType.NAME, 387 TokenType.TDIGEST, 388 *ENUM_TYPE_TOKENS, 389 *NESTED_TYPE_TOKENS, 390 *AGGREGATE_TYPE_TOKENS, 391 } 392 393 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 394 TokenType.BIGINT: TokenType.UBIGINT, 395 TokenType.INT: TokenType.UINT, 396 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 397 TokenType.SMALLINT: TokenType.USMALLINT, 398 TokenType.TINYINT: TokenType.UTINYINT, 399 TokenType.DECIMAL: TokenType.UDECIMAL, 400 } 401 402 SUBQUERY_PREDICATES = { 403 TokenType.ANY: exp.Any, 404 TokenType.ALL: exp.All, 405 TokenType.EXISTS: exp.Exists, 406 TokenType.SOME: exp.Any, 407 } 408 409 RESERVED_TOKENS = { 410 *Tokenizer.SINGLE_TOKENS.values(), 411 TokenType.SELECT, 412 } - {TokenType.IDENTIFIER} 413 414 DB_CREATABLES = { 415 TokenType.DATABASE, 416 TokenType.DICTIONARY, 417 TokenType.MODEL, 418 TokenType.SCHEMA, 419 TokenType.SEQUENCE, 420 TokenType.STORAGE_INTEGRATION, 421 TokenType.TABLE, 422 TokenType.TAG, 423 TokenType.VIEW, 424 TokenType.WAREHOUSE, 425 TokenType.STREAMLIT, 426 } 427 428 CREATABLES = { 429 TokenType.COLUMN, 430 TokenType.CONSTRAINT, 431 TokenType.FOREIGN_KEY, 432 TokenType.FUNCTION, 433 TokenType.INDEX, 434 TokenType.PROCEDURE, 435 *DB_CREATABLES, 436 } 437 438 ALTERABLES = { 439 TokenType.INDEX, 440 TokenType.TABLE, 441 TokenType.VIEW, 442 } 443 444 # Tokens that can represent identifiers 445 ID_VAR_TOKENS = { 446 TokenType.ALL, 447 TokenType.VAR, 448 TokenType.ANTI, 449 TokenType.APPLY, 450 TokenType.ASC, 451 TokenType.ASOF, 452 TokenType.AUTO_INCREMENT, 453 TokenType.BEGIN, 454 TokenType.BPCHAR, 455 TokenType.CACHE, 456 TokenType.CASE, 457 TokenType.COLLATE, 458 TokenType.COMMAND, 459 TokenType.COMMENT, 460 TokenType.COMMIT, 461 TokenType.CONSTRAINT, 462 TokenType.COPY, 463 TokenType.CUBE, 464 TokenType.DEFAULT, 465 TokenType.DELETE, 466 TokenType.DESC, 467 TokenType.DESCRIBE, 468 TokenType.DICTIONARY, 469 TokenType.DIV, 470 TokenType.END, 471 TokenType.EXECUTE, 472 TokenType.ESCAPE, 473 TokenType.FALSE, 474 TokenType.FIRST, 475 TokenType.FILTER, 476 TokenType.FINAL, 477 TokenType.FORMAT, 478 TokenType.FULL, 479 TokenType.IDENTIFIER, 480 TokenType.IS, 481 TokenType.ISNULL, 482 TokenType.INTERVAL, 483 TokenType.KEEP, 484 TokenType.KILL, 485 TokenType.LEFT, 486 TokenType.LOAD, 487 TokenType.MERGE, 488 TokenType.NATURAL, 489 TokenType.NEXT, 490 TokenType.OFFSET, 491 TokenType.OPERATOR, 492 TokenType.ORDINALITY, 493 TokenType.OVERLAPS, 494 TokenType.OVERWRITE, 495 TokenType.PARTITION, 496 TokenType.PERCENT, 497 TokenType.PIVOT, 498 TokenType.PRAGMA, 499 TokenType.RANGE, 500 TokenType.RECURSIVE, 501 TokenType.REFERENCES, 502 TokenType.REFRESH, 503 TokenType.RENAME, 504 TokenType.REPLACE, 505 TokenType.RIGHT, 506 TokenType.ROLLUP, 507 TokenType.ROW, 508 TokenType.ROWS, 509 TokenType.SEMI, 510 TokenType.SET, 511 TokenType.SETTINGS, 512 TokenType.SHOW, 513 TokenType.TEMPORARY, 514 TokenType.TOP, 515 TokenType.TRUE, 516 TokenType.TRUNCATE, 517 TokenType.UNIQUE, 518 TokenType.UNNEST, 519 TokenType.UNPIVOT, 520 TokenType.UPDATE, 521 TokenType.USE, 522 TokenType.VOLATILE, 523 TokenType.WINDOW, 524 *CREATABLES, 525 *SUBQUERY_PREDICATES, 526 *TYPE_TOKENS, 527 *NO_PAREN_FUNCTIONS, 528 } 529 ID_VAR_TOKENS.remove(TokenType.UNION) 530 531 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 532 533 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 534 TokenType.ANTI, 535 TokenType.APPLY, 536 TokenType.ASOF, 537 TokenType.FULL, 538 TokenType.LEFT, 539 TokenType.LOCK, 540 TokenType.NATURAL, 541 TokenType.OFFSET, 542 TokenType.RIGHT, 543 TokenType.SEMI, 544 TokenType.WINDOW, 545 } 546 547 ALIAS_TOKENS = ID_VAR_TOKENS 548 549 ARRAY_CONSTRUCTORS = { 550 "ARRAY": exp.Array, 551 "LIST": exp.List, 552 } 553 554 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 555 556 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 557 558 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 559 560 FUNC_TOKENS = { 561 TokenType.COLLATE, 562 TokenType.COMMAND, 563 TokenType.CURRENT_DATE, 564 TokenType.CURRENT_DATETIME, 565 TokenType.CURRENT_TIMESTAMP, 566 TokenType.CURRENT_TIME, 567 TokenType.CURRENT_USER, 568 TokenType.FILTER, 569 TokenType.FIRST, 570 TokenType.FORMAT, 571 TokenType.GLOB, 572 TokenType.IDENTIFIER, 573 TokenType.INDEX, 574 TokenType.ISNULL, 575 TokenType.ILIKE, 576 TokenType.INSERT, 577 TokenType.LIKE, 578 TokenType.MERGE, 579 TokenType.OFFSET, 580 TokenType.PRIMARY_KEY, 581 TokenType.RANGE, 582 TokenType.REPLACE, 583 TokenType.RLIKE, 584 TokenType.ROW, 585 TokenType.UNNEST, 586 TokenType.VAR, 587 TokenType.LEFT, 588 TokenType.RIGHT, 589 TokenType.SEQUENCE, 590 TokenType.DATE, 591 TokenType.DATETIME, 592 TokenType.TABLE, 593 TokenType.TIMESTAMP, 594 TokenType.TIMESTAMPTZ, 595 TokenType.TRUNCATE, 596 TokenType.WINDOW, 597 TokenType.XOR, 598 *TYPE_TOKENS, 599 *SUBQUERY_PREDICATES, 600 } 601 602 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 603 TokenType.AND: exp.And, 604 } 605 606 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 607 TokenType.COLON_EQ: exp.PropertyEQ, 608 } 609 610 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 611 TokenType.OR: exp.Or, 612 } 613 614 EQUALITY = { 615 TokenType.EQ: exp.EQ, 616 TokenType.NEQ: exp.NEQ, 617 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 618 } 619 620 COMPARISON = { 621 TokenType.GT: exp.GT, 622 TokenType.GTE: exp.GTE, 623 TokenType.LT: exp.LT, 624 TokenType.LTE: exp.LTE, 625 } 626 627 BITWISE = { 628 TokenType.AMP: exp.BitwiseAnd, 629 TokenType.CARET: exp.BitwiseXor, 630 TokenType.PIPE: exp.BitwiseOr, 631 } 632 633 TERM = { 634 TokenType.DASH: exp.Sub, 635 TokenType.PLUS: exp.Add, 636 TokenType.MOD: exp.Mod, 637 TokenType.COLLATE: exp.Collate, 638 } 639 640 FACTOR = { 641 TokenType.DIV: exp.IntDiv, 642 TokenType.LR_ARROW: exp.Distance, 643 TokenType.SLASH: exp.Div, 644 TokenType.STAR: exp.Mul, 645 } 646 647 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 648 649 TIMES = { 650 TokenType.TIME, 651 TokenType.TIMETZ, 652 } 653 654 TIMESTAMPS = { 655 TokenType.TIMESTAMP, 656 TokenType.TIMESTAMPTZ, 657 TokenType.TIMESTAMPLTZ, 658 *TIMES, 659 } 660 661 SET_OPERATIONS = { 662 TokenType.UNION, 663 TokenType.INTERSECT, 664 TokenType.EXCEPT, 665 } 666 667 JOIN_METHODS = { 668 TokenType.ASOF, 669 TokenType.NATURAL, 670 TokenType.POSITIONAL, 671 } 672 673 JOIN_SIDES = { 674 TokenType.LEFT, 675 TokenType.RIGHT, 676 TokenType.FULL, 677 } 678 679 JOIN_KINDS = { 680 TokenType.ANTI, 681 TokenType.CROSS, 682 TokenType.INNER, 683 TokenType.OUTER, 684 TokenType.SEMI, 685 TokenType.STRAIGHT_JOIN, 686 } 687 688 JOIN_HINTS: t.Set[str] = set() 689 690 LAMBDAS = { 691 TokenType.ARROW: lambda self, expressions: self.expression( 692 exp.Lambda, 693 this=self._replace_lambda( 694 self._parse_assignment(), 695 expressions, 696 ), 697 expressions=expressions, 698 ), 699 TokenType.FARROW: lambda self, expressions: self.expression( 700 exp.Kwarg, 701 this=exp.var(expressions[0].name), 702 expression=self._parse_assignment(), 703 ), 704 } 705 706 COLUMN_OPERATORS = { 707 TokenType.DOT: None, 708 TokenType.DCOLON: lambda self, this, to: self.expression( 709 exp.Cast if self.STRICT_CAST else exp.TryCast, 710 this=this, 711 to=to, 712 ), 713 TokenType.ARROW: lambda self, this, path: self.expression( 714 exp.JSONExtract, 715 this=this, 716 expression=self.dialect.to_json_path(path), 717 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 718 ), 719 TokenType.DARROW: lambda self, this, path: self.expression( 720 exp.JSONExtractScalar, 721 this=this, 722 expression=self.dialect.to_json_path(path), 723 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 724 ), 725 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 726 exp.JSONBExtract, 727 this=this, 728 expression=path, 729 ), 730 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 731 exp.JSONBExtractScalar, 732 this=this, 733 expression=path, 734 ), 735 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 736 exp.JSONBContains, 737 this=this, 738 expression=key, 739 ), 740 } 741 742 EXPRESSION_PARSERS = { 743 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 744 exp.Column: lambda self: self._parse_column(), 745 exp.Condition: lambda self: self._parse_assignment(), 746 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 747 exp.Expression: lambda self: self._parse_expression(), 748 exp.From: lambda self: self._parse_from(joins=True), 749 exp.Group: lambda self: self._parse_group(), 750 exp.Having: lambda self: self._parse_having(), 751 exp.Identifier: lambda self: self._parse_id_var(), 752 exp.Join: lambda self: self._parse_join(), 753 exp.Lambda: lambda self: self._parse_lambda(), 754 exp.Lateral: lambda self: self._parse_lateral(), 755 exp.Limit: lambda self: self._parse_limit(), 756 exp.Offset: lambda self: self._parse_offset(), 757 exp.Order: lambda self: self._parse_order(), 758 exp.Ordered: lambda self: self._parse_ordered(), 759 exp.Properties: lambda self: self._parse_properties(), 760 exp.Qualify: lambda self: self._parse_qualify(), 761 exp.Returning: lambda self: self._parse_returning(), 762 exp.Select: lambda self: self._parse_select(), 763 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 764 exp.Table: lambda self: self._parse_table_parts(), 765 exp.TableAlias: lambda self: self._parse_table_alias(), 766 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 767 exp.Where: lambda self: self._parse_where(), 768 exp.Window: lambda self: self._parse_named_window(), 769 exp.With: lambda self: self._parse_with(), 770 "JOIN_TYPE": lambda self: self._parse_join_parts(), 771 } 772 773 STATEMENT_PARSERS = { 774 TokenType.ALTER: lambda self: self._parse_alter(), 775 TokenType.BEGIN: lambda self: self._parse_transaction(), 776 TokenType.CACHE: lambda self: self._parse_cache(), 777 TokenType.COMMENT: lambda self: self._parse_comment(), 778 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 779 TokenType.COPY: lambda self: self._parse_copy(), 780 TokenType.CREATE: lambda self: self._parse_create(), 781 TokenType.DELETE: lambda self: self._parse_delete(), 782 TokenType.DESC: lambda self: self._parse_describe(), 783 TokenType.DESCRIBE: lambda self: self._parse_describe(), 784 TokenType.DROP: lambda self: self._parse_drop(), 785 TokenType.GRANT: lambda self: self._parse_grant(), 786 TokenType.INSERT: lambda self: self._parse_insert(), 787 TokenType.KILL: lambda self: self._parse_kill(), 788 TokenType.LOAD: lambda self: self._parse_load(), 789 TokenType.MERGE: lambda self: self._parse_merge(), 790 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 791 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 792 TokenType.REFRESH: lambda self: self._parse_refresh(), 793 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 794 TokenType.SET: lambda self: self._parse_set(), 795 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 796 TokenType.UNCACHE: lambda self: self._parse_uncache(), 797 TokenType.UPDATE: lambda self: self._parse_update(), 798 TokenType.USE: lambda self: self.expression( 799 exp.Use, 800 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 801 this=self._parse_table(schema=False), 802 ), 803 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 804 } 805 806 UNARY_PARSERS = { 807 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 808 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 809 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 810 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 811 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 812 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 813 } 814 815 STRING_PARSERS = { 816 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 817 exp.RawString, this=token.text 818 ), 819 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 820 exp.National, this=token.text 821 ), 822 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 823 TokenType.STRING: lambda self, token: self.expression( 824 exp.Literal, this=token.text, is_string=True 825 ), 826 TokenType.UNICODE_STRING: lambda self, token: self.expression( 827 exp.UnicodeString, 828 this=token.text, 829 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 830 ), 831 } 832 833 NUMERIC_PARSERS = { 834 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 835 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 836 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 837 TokenType.NUMBER: lambda self, token: self.expression( 838 exp.Literal, this=token.text, is_string=False 839 ), 840 } 841 842 PRIMARY_PARSERS = { 843 **STRING_PARSERS, 844 **NUMERIC_PARSERS, 845 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 846 TokenType.NULL: lambda self, _: self.expression(exp.Null), 847 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 848 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 849 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 850 TokenType.STAR: lambda self, _: self._parse_star_ops(), 851 } 852 853 PLACEHOLDER_PARSERS = { 854 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 855 TokenType.PARAMETER: lambda self: self._parse_parameter(), 856 TokenType.COLON: lambda self: ( 857 self.expression(exp.Placeholder, this=self._prev.text) 858 if self._match_set(self.ID_VAR_TOKENS) 859 else None 860 ), 861 } 862 863 RANGE_PARSERS = { 864 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 865 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 866 TokenType.GLOB: binary_range_parser(exp.Glob), 867 TokenType.ILIKE: binary_range_parser(exp.ILike), 868 TokenType.IN: lambda self, this: self._parse_in(this), 869 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 870 TokenType.IS: lambda self, this: self._parse_is(this), 871 TokenType.LIKE: binary_range_parser(exp.Like), 872 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 873 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 874 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 875 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 876 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 877 } 878 879 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 880 "ALLOWED_VALUES": lambda self: self.expression( 881 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 882 ), 883 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 884 "AUTO": lambda self: self._parse_auto_property(), 885 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 886 "BACKUP": lambda self: self.expression( 887 exp.BackupProperty, this=self._parse_var(any_token=True) 888 ), 889 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 890 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 891 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 892 "CHECKSUM": lambda self: self._parse_checksum(), 893 "CLUSTER BY": lambda self: self._parse_cluster(), 894 "CLUSTERED": lambda self: self._parse_clustered_by(), 895 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 896 exp.CollateProperty, **kwargs 897 ), 898 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 899 "CONTAINS": lambda self: self._parse_contains_property(), 900 "COPY": lambda self: self._parse_copy_property(), 901 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 902 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 903 "DEFINER": lambda self: self._parse_definer(), 904 "DETERMINISTIC": lambda self: self.expression( 905 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 906 ), 907 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 908 "DUPLICATE": lambda self: self._parse_duplicate(), 909 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 910 "DISTKEY": lambda self: self._parse_distkey(), 911 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 912 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 913 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 914 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 915 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 916 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 917 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 918 "FREESPACE": lambda self: self._parse_freespace(), 919 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 920 "HEAP": lambda self: self.expression(exp.HeapProperty), 921 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 922 "IMMUTABLE": lambda self: self.expression( 923 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 924 ), 925 "INHERITS": lambda self: self.expression( 926 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 927 ), 928 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 929 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 930 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 931 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 932 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 933 "LIKE": lambda self: self._parse_create_like(), 934 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 935 "LOCK": lambda self: self._parse_locking(), 936 "LOCKING": lambda self: self._parse_locking(), 937 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 938 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 939 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 940 "MODIFIES": lambda self: self._parse_modifies_property(), 941 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 942 "NO": lambda self: self._parse_no_property(), 943 "ON": lambda self: self._parse_on_property(), 944 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 945 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 946 "PARTITION": lambda self: self._parse_partitioned_of(), 947 "PARTITION BY": lambda self: self._parse_partitioned_by(), 948 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 949 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 950 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 951 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 952 "READS": lambda self: self._parse_reads_property(), 953 "REMOTE": lambda self: self._parse_remote_with_connection(), 954 "RETURNS": lambda self: self._parse_returns(), 955 "STRICT": lambda self: self.expression(exp.StrictProperty), 956 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 957 "ROW": lambda self: self._parse_row(), 958 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 959 "SAMPLE": lambda self: self.expression( 960 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 961 ), 962 "SECURE": lambda self: self.expression(exp.SecureProperty), 963 "SECURITY": lambda self: self._parse_security(), 964 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 965 "SETTINGS": lambda self: self._parse_settings_property(), 966 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 967 "SORTKEY": lambda self: self._parse_sortkey(), 968 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 969 "STABLE": lambda self: self.expression( 970 exp.StabilityProperty, this=exp.Literal.string("STABLE") 971 ), 972 "STORED": lambda self: self._parse_stored(), 973 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 974 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 975 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 976 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 977 "TO": lambda self: self._parse_to_table(), 978 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 979 "TRANSFORM": lambda self: self.expression( 980 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 981 ), 982 "TTL": lambda self: self._parse_ttl(), 983 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 984 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 985 "VOLATILE": lambda self: self._parse_volatile_property(), 986 "WITH": lambda self: self._parse_with_property(), 987 } 988 989 CONSTRAINT_PARSERS = { 990 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 991 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 992 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 993 "CHARACTER SET": lambda self: self.expression( 994 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 995 ), 996 "CHECK": lambda self: self.expression( 997 exp.CheckColumnConstraint, 998 this=self._parse_wrapped(self._parse_assignment), 999 enforced=self._match_text_seq("ENFORCED"), 1000 ), 1001 "COLLATE": lambda self: self.expression( 1002 exp.CollateColumnConstraint, 1003 this=self._parse_identifier() or self._parse_column(), 1004 ), 1005 "COMMENT": lambda self: self.expression( 1006 exp.CommentColumnConstraint, this=self._parse_string() 1007 ), 1008 "COMPRESS": lambda self: self._parse_compress(), 1009 "CLUSTERED": lambda self: self.expression( 1010 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1011 ), 1012 "NONCLUSTERED": lambda self: self.expression( 1013 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1014 ), 1015 "DEFAULT": lambda self: self.expression( 1016 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1017 ), 1018 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1019 "EPHEMERAL": lambda self: self.expression( 1020 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1021 ), 1022 "EXCLUDE": lambda self: self.expression( 1023 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1024 ), 1025 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1026 "FORMAT": lambda self: self.expression( 1027 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1028 ), 1029 "GENERATED": lambda self: self._parse_generated_as_identity(), 1030 "IDENTITY": lambda self: self._parse_auto_increment(), 1031 "INLINE": lambda self: self._parse_inline(), 1032 "LIKE": lambda self: self._parse_create_like(), 1033 "NOT": lambda self: self._parse_not_constraint(), 1034 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1035 "ON": lambda self: ( 1036 self._match(TokenType.UPDATE) 1037 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1038 ) 1039 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1040 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1041 "PERIOD": lambda self: self._parse_period_for_system_time(), 1042 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1043 "REFERENCES": lambda self: self._parse_references(match=False), 1044 "TITLE": lambda self: self.expression( 1045 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1046 ), 1047 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1048 "UNIQUE": lambda self: self._parse_unique(), 1049 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1050 "WITH": lambda self: self.expression( 1051 exp.Properties, expressions=self._parse_wrapped_properties() 1052 ), 1053 } 1054 1055 ALTER_PARSERS = { 1056 "ADD": lambda self: self._parse_alter_table_add(), 1057 "ALTER": lambda self: self._parse_alter_table_alter(), 1058 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1059 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1060 "DROP": lambda self: self._parse_alter_table_drop(), 1061 "RENAME": lambda self: self._parse_alter_table_rename(), 1062 "SET": lambda self: self._parse_alter_table_set(), 1063 "AS": lambda self: self._parse_select(), 1064 } 1065 1066 ALTER_ALTER_PARSERS = { 1067 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1068 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1069 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1070 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1071 } 1072 1073 SCHEMA_UNNAMED_CONSTRAINTS = { 1074 "CHECK", 1075 "EXCLUDE", 1076 "FOREIGN KEY", 1077 "LIKE", 1078 "PERIOD", 1079 "PRIMARY KEY", 1080 "UNIQUE", 1081 } 1082 1083 NO_PAREN_FUNCTION_PARSERS = { 1084 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1085 "CASE": lambda self: self._parse_case(), 1086 "CONNECT_BY_ROOT": lambda self: self.expression( 1087 exp.ConnectByRoot, this=self._parse_column() 1088 ), 1089 "IF": lambda self: self._parse_if(), 1090 "NEXT": lambda self: self._parse_next_value_for(), 1091 } 1092 1093 INVALID_FUNC_NAME_TOKENS = { 1094 TokenType.IDENTIFIER, 1095 TokenType.STRING, 1096 } 1097 1098 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1099 1100 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1101 1102 FUNCTION_PARSERS = { 1103 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1104 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1105 "DECODE": lambda self: self._parse_decode(), 1106 "EXTRACT": lambda self: self._parse_extract(), 1107 "GAP_FILL": lambda self: self._parse_gap_fill(), 1108 "JSON_OBJECT": lambda self: self._parse_json_object(), 1109 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1110 "JSON_TABLE": lambda self: self._parse_json_table(), 1111 "MATCH": lambda self: self._parse_match_against(), 1112 "NORMALIZE": lambda self: self._parse_normalize(), 1113 "OPENJSON": lambda self: self._parse_open_json(), 1114 "OVERLAY": lambda self: self._parse_overlay(), 1115 "POSITION": lambda self: self._parse_position(), 1116 "PREDICT": lambda self: self._parse_predict(), 1117 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1118 "STRING_AGG": lambda self: self._parse_string_agg(), 1119 "SUBSTRING": lambda self: self._parse_substring(), 1120 "TRIM": lambda self: self._parse_trim(), 1121 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1122 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1123 } 1124 1125 QUERY_MODIFIER_PARSERS = { 1126 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1127 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1128 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1129 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1130 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1131 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1132 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1133 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1134 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1135 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1136 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1137 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1138 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1139 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1140 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1141 TokenType.CLUSTER_BY: lambda self: ( 1142 "cluster", 1143 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1144 ), 1145 TokenType.DISTRIBUTE_BY: lambda self: ( 1146 "distribute", 1147 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1148 ), 1149 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1150 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1151 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1152 } 1153 1154 SET_PARSERS = { 1155 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1156 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1157 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1158 "TRANSACTION": lambda self: self._parse_set_transaction(), 1159 } 1160 1161 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1162 1163 TYPE_LITERAL_PARSERS = { 1164 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1165 } 1166 1167 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1168 1169 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1170 1171 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1172 1173 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1174 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1175 "ISOLATION": ( 1176 ("LEVEL", "REPEATABLE", "READ"), 1177 ("LEVEL", "READ", "COMMITTED"), 1178 ("LEVEL", "READ", "UNCOMITTED"), 1179 ("LEVEL", "SERIALIZABLE"), 1180 ), 1181 "READ": ("WRITE", "ONLY"), 1182 } 1183 1184 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1185 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1186 ) 1187 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1188 1189 CREATE_SEQUENCE: OPTIONS_TYPE = { 1190 "SCALE": ("EXTEND", "NOEXTEND"), 1191 "SHARD": ("EXTEND", "NOEXTEND"), 1192 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1193 **dict.fromkeys( 1194 ( 1195 "SESSION", 1196 "GLOBAL", 1197 "KEEP", 1198 "NOKEEP", 1199 "ORDER", 1200 "NOORDER", 1201 "NOCACHE", 1202 "CYCLE", 1203 "NOCYCLE", 1204 "NOMINVALUE", 1205 "NOMAXVALUE", 1206 "NOSCALE", 1207 "NOSHARD", 1208 ), 1209 tuple(), 1210 ), 1211 } 1212 1213 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1214 1215 USABLES: OPTIONS_TYPE = dict.fromkeys( 1216 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1217 ) 1218 1219 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1220 1221 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1222 "TYPE": ("EVOLUTION",), 1223 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1224 } 1225 1226 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1227 "NOT": ("ENFORCED",), 1228 "MATCH": ( 1229 "FULL", 1230 "PARTIAL", 1231 "SIMPLE", 1232 ), 1233 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1234 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1235 } 1236 1237 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1238 1239 CLONE_KEYWORDS = {"CLONE", "COPY"} 1240 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1241 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1242 1243 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1244 1245 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1246 1247 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1248 1249 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1250 1251 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1252 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1253 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1254 1255 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1256 1257 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1258 1259 ADD_CONSTRAINT_TOKENS = { 1260 TokenType.CONSTRAINT, 1261 TokenType.FOREIGN_KEY, 1262 TokenType.INDEX, 1263 TokenType.KEY, 1264 TokenType.PRIMARY_KEY, 1265 TokenType.UNIQUE, 1266 } 1267 1268 DISTINCT_TOKENS = {TokenType.DISTINCT} 1269 1270 NULL_TOKENS = {TokenType.NULL} 1271 1272 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1273 1274 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1275 1276 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1277 1278 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1279 1280 ODBC_DATETIME_LITERALS = { 1281 "d": exp.Date, 1282 "t": exp.Time, 1283 "ts": exp.Timestamp, 1284 } 1285 1286 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1287 1288 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1289 1290 STRICT_CAST = True 1291 1292 PREFIXED_PIVOT_COLUMNS = False 1293 IDENTIFY_PIVOT_STRINGS = False 1294 1295 LOG_DEFAULTS_TO_LN = False 1296 1297 # Whether ADD is present for each column added by ALTER TABLE 1298 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1299 1300 # Whether the table sample clause expects CSV syntax 1301 TABLESAMPLE_CSV = False 1302 1303 # The default method used for table sampling 1304 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1305 1306 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1307 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1308 1309 # Whether the TRIM function expects the characters to trim as its first argument 1310 TRIM_PATTERN_FIRST = False 1311 1312 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1313 STRING_ALIASES = False 1314 1315 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1316 MODIFIERS_ATTACHED_TO_SET_OP = True 1317 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1318 1319 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1320 NO_PAREN_IF_COMMANDS = True 1321 1322 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1323 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1324 1325 # Whether the `:` operator is used to extract a value from a VARIANT column 1326 COLON_IS_VARIANT_EXTRACT = False 1327 1328 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1329 # If this is True and '(' is not found, the keyword will be treated as an identifier 1330 VALUES_FOLLOWED_BY_PAREN = True 1331 1332 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1333 SUPPORTS_IMPLICIT_UNNEST = False 1334 1335 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1336 INTERVAL_SPANS = True 1337 1338 # Whether a PARTITION clause can follow a table reference 1339 SUPPORTS_PARTITION_SELECTION = False 1340 1341 __slots__ = ( 1342 "error_level", 1343 "error_message_context", 1344 "max_errors", 1345 "dialect", 1346 "sql", 1347 "errors", 1348 "_tokens", 1349 "_index", 1350 "_curr", 1351 "_next", 1352 "_prev", 1353 "_prev_comments", 1354 ) 1355 1356 # Autofilled 1357 SHOW_TRIE: t.Dict = {} 1358 SET_TRIE: t.Dict = {} 1359 1360 def __init__( 1361 self, 1362 error_level: t.Optional[ErrorLevel] = None, 1363 error_message_context: int = 100, 1364 max_errors: int = 3, 1365 dialect: DialectType = None, 1366 ): 1367 from sqlglot.dialects import Dialect 1368 1369 self.error_level = error_level or ErrorLevel.IMMEDIATE 1370 self.error_message_context = error_message_context 1371 self.max_errors = max_errors 1372 self.dialect = Dialect.get_or_raise(dialect) 1373 self.reset() 1374 1375 def reset(self): 1376 self.sql = "" 1377 self.errors = [] 1378 self._tokens = [] 1379 self._index = 0 1380 self._curr = None 1381 self._next = None 1382 self._prev = None 1383 self._prev_comments = None 1384 1385 def parse( 1386 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1387 ) -> t.List[t.Optional[exp.Expression]]: 1388 """ 1389 Parses a list of tokens and returns a list of syntax trees, one tree 1390 per parsed SQL statement. 1391 1392 Args: 1393 raw_tokens: The list of tokens. 1394 sql: The original SQL string, used to produce helpful debug messages. 1395 1396 Returns: 1397 The list of the produced syntax trees. 1398 """ 1399 return self._parse( 1400 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1401 ) 1402 1403 def parse_into( 1404 self, 1405 expression_types: exp.IntoType, 1406 raw_tokens: t.List[Token], 1407 sql: t.Optional[str] = None, 1408 ) -> t.List[t.Optional[exp.Expression]]: 1409 """ 1410 Parses a list of tokens into a given Expression type. If a collection of Expression 1411 types is given instead, this method will try to parse the token list into each one 1412 of them, stopping at the first for which the parsing succeeds. 1413 1414 Args: 1415 expression_types: The expression type(s) to try and parse the token list into. 1416 raw_tokens: The list of tokens. 1417 sql: The original SQL string, used to produce helpful debug messages. 1418 1419 Returns: 1420 The target Expression. 1421 """ 1422 errors = [] 1423 for expression_type in ensure_list(expression_types): 1424 parser = self.EXPRESSION_PARSERS.get(expression_type) 1425 if not parser: 1426 raise TypeError(f"No parser registered for {expression_type}") 1427 1428 try: 1429 return self._parse(parser, raw_tokens, sql) 1430 except ParseError as e: 1431 e.errors[0]["into_expression"] = expression_type 1432 errors.append(e) 1433 1434 raise ParseError( 1435 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1436 errors=merge_errors(errors), 1437 ) from errors[-1] 1438 1439 def _parse( 1440 self, 1441 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1442 raw_tokens: t.List[Token], 1443 sql: t.Optional[str] = None, 1444 ) -> t.List[t.Optional[exp.Expression]]: 1445 self.reset() 1446 self.sql = sql or "" 1447 1448 total = len(raw_tokens) 1449 chunks: t.List[t.List[Token]] = [[]] 1450 1451 for i, token in enumerate(raw_tokens): 1452 if token.token_type == TokenType.SEMICOLON: 1453 if token.comments: 1454 chunks.append([token]) 1455 1456 if i < total - 1: 1457 chunks.append([]) 1458 else: 1459 chunks[-1].append(token) 1460 1461 expressions = [] 1462 1463 for tokens in chunks: 1464 self._index = -1 1465 self._tokens = tokens 1466 self._advance() 1467 1468 expressions.append(parse_method(self)) 1469 1470 if self._index < len(self._tokens): 1471 self.raise_error("Invalid expression / Unexpected token") 1472 1473 self.check_errors() 1474 1475 return expressions 1476 1477 def check_errors(self) -> None: 1478 """Logs or raises any found errors, depending on the chosen error level setting.""" 1479 if self.error_level == ErrorLevel.WARN: 1480 for error in self.errors: 1481 logger.error(str(error)) 1482 elif self.error_level == ErrorLevel.RAISE and self.errors: 1483 raise ParseError( 1484 concat_messages(self.errors, self.max_errors), 1485 errors=merge_errors(self.errors), 1486 ) 1487 1488 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1489 """ 1490 Appends an error in the list of recorded errors or raises it, depending on the chosen 1491 error level setting. 1492 """ 1493 token = token or self._curr or self._prev or Token.string("") 1494 start = token.start 1495 end = token.end + 1 1496 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1497 highlight = self.sql[start:end] 1498 end_context = self.sql[end : end + self.error_message_context] 1499 1500 error = ParseError.new( 1501 f"{message}. Line {token.line}, Col: {token.col}.\n" 1502 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1503 description=message, 1504 line=token.line, 1505 col=token.col, 1506 start_context=start_context, 1507 highlight=highlight, 1508 end_context=end_context, 1509 ) 1510 1511 if self.error_level == ErrorLevel.IMMEDIATE: 1512 raise error 1513 1514 self.errors.append(error) 1515 1516 def expression( 1517 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1518 ) -> E: 1519 """ 1520 Creates a new, validated Expression. 1521 1522 Args: 1523 exp_class: The expression class to instantiate. 1524 comments: An optional list of comments to attach to the expression. 1525 kwargs: The arguments to set for the expression along with their respective values. 1526 1527 Returns: 1528 The target expression. 1529 """ 1530 instance = exp_class(**kwargs) 1531 instance.add_comments(comments) if comments else self._add_comments(instance) 1532 return self.validate_expression(instance) 1533 1534 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1535 if expression and self._prev_comments: 1536 expression.add_comments(self._prev_comments) 1537 self._prev_comments = None 1538 1539 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1540 """ 1541 Validates an Expression, making sure that all its mandatory arguments are set. 1542 1543 Args: 1544 expression: The expression to validate. 1545 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1546 1547 Returns: 1548 The validated expression. 1549 """ 1550 if self.error_level != ErrorLevel.IGNORE: 1551 for error_message in expression.error_messages(args): 1552 self.raise_error(error_message) 1553 1554 return expression 1555 1556 def _find_sql(self, start: Token, end: Token) -> str: 1557 return self.sql[start.start : end.end + 1] 1558 1559 def _is_connected(self) -> bool: 1560 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1561 1562 def _advance(self, times: int = 1) -> None: 1563 self._index += times 1564 self._curr = seq_get(self._tokens, self._index) 1565 self._next = seq_get(self._tokens, self._index + 1) 1566 1567 if self._index > 0: 1568 self._prev = self._tokens[self._index - 1] 1569 self._prev_comments = self._prev.comments 1570 else: 1571 self._prev = None 1572 self._prev_comments = None 1573 1574 def _retreat(self, index: int) -> None: 1575 if index != self._index: 1576 self._advance(index - self._index) 1577 1578 def _warn_unsupported(self) -> None: 1579 if len(self._tokens) <= 1: 1580 return 1581 1582 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1583 # interested in emitting a warning for the one being currently processed. 1584 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1585 1586 logger.warning( 1587 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1588 ) 1589 1590 def _parse_command(self) -> exp.Command: 1591 self._warn_unsupported() 1592 return self.expression( 1593 exp.Command, 1594 comments=self._prev_comments, 1595 this=self._prev.text.upper(), 1596 expression=self._parse_string(), 1597 ) 1598 1599 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1600 """ 1601 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1602 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1603 solve this by setting & resetting the parser state accordingly 1604 """ 1605 index = self._index 1606 error_level = self.error_level 1607 1608 self.error_level = ErrorLevel.IMMEDIATE 1609 try: 1610 this = parse_method() 1611 except ParseError: 1612 this = None 1613 finally: 1614 if not this or retreat: 1615 self._retreat(index) 1616 self.error_level = error_level 1617 1618 return this 1619 1620 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1621 start = self._prev 1622 exists = self._parse_exists() if allow_exists else None 1623 1624 self._match(TokenType.ON) 1625 1626 materialized = self._match_text_seq("MATERIALIZED") 1627 kind = self._match_set(self.CREATABLES) and self._prev 1628 if not kind: 1629 return self._parse_as_command(start) 1630 1631 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1632 this = self._parse_user_defined_function(kind=kind.token_type) 1633 elif kind.token_type == TokenType.TABLE: 1634 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1635 elif kind.token_type == TokenType.COLUMN: 1636 this = self._parse_column() 1637 else: 1638 this = self._parse_id_var() 1639 1640 self._match(TokenType.IS) 1641 1642 return self.expression( 1643 exp.Comment, 1644 this=this, 1645 kind=kind.text, 1646 expression=self._parse_string(), 1647 exists=exists, 1648 materialized=materialized, 1649 ) 1650 1651 def _parse_to_table( 1652 self, 1653 ) -> exp.ToTableProperty: 1654 table = self._parse_table_parts(schema=True) 1655 return self.expression(exp.ToTableProperty, this=table) 1656 1657 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1658 def _parse_ttl(self) -> exp.Expression: 1659 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1660 this = self._parse_bitwise() 1661 1662 if self._match_text_seq("DELETE"): 1663 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1664 if self._match_text_seq("RECOMPRESS"): 1665 return self.expression( 1666 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1667 ) 1668 if self._match_text_seq("TO", "DISK"): 1669 return self.expression( 1670 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1671 ) 1672 if self._match_text_seq("TO", "VOLUME"): 1673 return self.expression( 1674 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1675 ) 1676 1677 return this 1678 1679 expressions = self._parse_csv(_parse_ttl_action) 1680 where = self._parse_where() 1681 group = self._parse_group() 1682 1683 aggregates = None 1684 if group and self._match(TokenType.SET): 1685 aggregates = self._parse_csv(self._parse_set_item) 1686 1687 return self.expression( 1688 exp.MergeTreeTTL, 1689 expressions=expressions, 1690 where=where, 1691 group=group, 1692 aggregates=aggregates, 1693 ) 1694 1695 def _parse_statement(self) -> t.Optional[exp.Expression]: 1696 if self._curr is None: 1697 return None 1698 1699 if self._match_set(self.STATEMENT_PARSERS): 1700 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1701 1702 if self._match_set(self.dialect.tokenizer.COMMANDS): 1703 return self._parse_command() 1704 1705 expression = self._parse_expression() 1706 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1707 return self._parse_query_modifiers(expression) 1708 1709 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1710 start = self._prev 1711 temporary = self._match(TokenType.TEMPORARY) 1712 materialized = self._match_text_seq("MATERIALIZED") 1713 1714 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1715 if not kind: 1716 return self._parse_as_command(start) 1717 1718 concurrently = self._match_text_seq("CONCURRENTLY") 1719 if_exists = exists or self._parse_exists() 1720 table = self._parse_table_parts( 1721 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1722 ) 1723 1724 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1725 1726 if self._match(TokenType.L_PAREN, advance=False): 1727 expressions = self._parse_wrapped_csv(self._parse_types) 1728 else: 1729 expressions = None 1730 1731 return self.expression( 1732 exp.Drop, 1733 comments=start.comments, 1734 exists=if_exists, 1735 this=table, 1736 expressions=expressions, 1737 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1738 temporary=temporary, 1739 materialized=materialized, 1740 cascade=self._match_text_seq("CASCADE"), 1741 constraints=self._match_text_seq("CONSTRAINTS"), 1742 purge=self._match_text_seq("PURGE"), 1743 cluster=cluster, 1744 concurrently=concurrently, 1745 ) 1746 1747 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1748 return ( 1749 self._match_text_seq("IF") 1750 and (not not_ or self._match(TokenType.NOT)) 1751 and self._match(TokenType.EXISTS) 1752 ) 1753 1754 def _parse_create(self) -> exp.Create | exp.Command: 1755 # Note: this can't be None because we've matched a statement parser 1756 start = self._prev 1757 comments = self._prev_comments 1758 1759 replace = ( 1760 start.token_type == TokenType.REPLACE 1761 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1762 or self._match_pair(TokenType.OR, TokenType.ALTER) 1763 ) 1764 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1765 1766 unique = self._match(TokenType.UNIQUE) 1767 1768 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1769 clustered = True 1770 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1771 "COLUMNSTORE" 1772 ): 1773 clustered = False 1774 else: 1775 clustered = None 1776 1777 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1778 self._advance() 1779 1780 properties = None 1781 create_token = self._match_set(self.CREATABLES) and self._prev 1782 1783 if not create_token: 1784 # exp.Properties.Location.POST_CREATE 1785 properties = self._parse_properties() 1786 create_token = self._match_set(self.CREATABLES) and self._prev 1787 1788 if not properties or not create_token: 1789 return self._parse_as_command(start) 1790 1791 concurrently = self._match_text_seq("CONCURRENTLY") 1792 exists = self._parse_exists(not_=True) 1793 this = None 1794 expression: t.Optional[exp.Expression] = None 1795 indexes = None 1796 no_schema_binding = None 1797 begin = None 1798 end = None 1799 clone = None 1800 1801 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1802 nonlocal properties 1803 if properties and temp_props: 1804 properties.expressions.extend(temp_props.expressions) 1805 elif temp_props: 1806 properties = temp_props 1807 1808 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1809 this = self._parse_user_defined_function(kind=create_token.token_type) 1810 1811 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1812 extend_props(self._parse_properties()) 1813 1814 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1815 extend_props(self._parse_properties()) 1816 1817 if not expression: 1818 if self._match(TokenType.COMMAND): 1819 expression = self._parse_as_command(self._prev) 1820 else: 1821 begin = self._match(TokenType.BEGIN) 1822 return_ = self._match_text_seq("RETURN") 1823 1824 if self._match(TokenType.STRING, advance=False): 1825 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1826 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1827 expression = self._parse_string() 1828 extend_props(self._parse_properties()) 1829 else: 1830 expression = self._parse_statement() 1831 1832 end = self._match_text_seq("END") 1833 1834 if return_: 1835 expression = self.expression(exp.Return, this=expression) 1836 elif create_token.token_type == TokenType.INDEX: 1837 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1838 if not self._match(TokenType.ON): 1839 index = self._parse_id_var() 1840 anonymous = False 1841 else: 1842 index = None 1843 anonymous = True 1844 1845 this = self._parse_index(index=index, anonymous=anonymous) 1846 elif create_token.token_type in self.DB_CREATABLES: 1847 table_parts = self._parse_table_parts( 1848 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1849 ) 1850 1851 # exp.Properties.Location.POST_NAME 1852 self._match(TokenType.COMMA) 1853 extend_props(self._parse_properties(before=True)) 1854 1855 this = self._parse_schema(this=table_parts) 1856 1857 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1858 extend_props(self._parse_properties()) 1859 1860 self._match(TokenType.ALIAS) 1861 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1862 # exp.Properties.Location.POST_ALIAS 1863 extend_props(self._parse_properties()) 1864 1865 if create_token.token_type == TokenType.SEQUENCE: 1866 expression = self._parse_types() 1867 extend_props(self._parse_properties()) 1868 else: 1869 expression = self._parse_ddl_select() 1870 1871 if create_token.token_type == TokenType.TABLE: 1872 # exp.Properties.Location.POST_EXPRESSION 1873 extend_props(self._parse_properties()) 1874 1875 indexes = [] 1876 while True: 1877 index = self._parse_index() 1878 1879 # exp.Properties.Location.POST_INDEX 1880 extend_props(self._parse_properties()) 1881 if not index: 1882 break 1883 else: 1884 self._match(TokenType.COMMA) 1885 indexes.append(index) 1886 elif create_token.token_type == TokenType.VIEW: 1887 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1888 no_schema_binding = True 1889 1890 shallow = self._match_text_seq("SHALLOW") 1891 1892 if self._match_texts(self.CLONE_KEYWORDS): 1893 copy = self._prev.text.lower() == "copy" 1894 clone = self.expression( 1895 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1896 ) 1897 1898 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1899 return self._parse_as_command(start) 1900 1901 create_kind_text = create_token.text.upper() 1902 return self.expression( 1903 exp.Create, 1904 comments=comments, 1905 this=this, 1906 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1907 replace=replace, 1908 refresh=refresh, 1909 unique=unique, 1910 expression=expression, 1911 exists=exists, 1912 properties=properties, 1913 indexes=indexes, 1914 no_schema_binding=no_schema_binding, 1915 begin=begin, 1916 end=end, 1917 clone=clone, 1918 concurrently=concurrently, 1919 clustered=clustered, 1920 ) 1921 1922 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1923 seq = exp.SequenceProperties() 1924 1925 options = [] 1926 index = self._index 1927 1928 while self._curr: 1929 self._match(TokenType.COMMA) 1930 if self._match_text_seq("INCREMENT"): 1931 self._match_text_seq("BY") 1932 self._match_text_seq("=") 1933 seq.set("increment", self._parse_term()) 1934 elif self._match_text_seq("MINVALUE"): 1935 seq.set("minvalue", self._parse_term()) 1936 elif self._match_text_seq("MAXVALUE"): 1937 seq.set("maxvalue", self._parse_term()) 1938 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1939 self._match_text_seq("=") 1940 seq.set("start", self._parse_term()) 1941 elif self._match_text_seq("CACHE"): 1942 # T-SQL allows empty CACHE which is initialized dynamically 1943 seq.set("cache", self._parse_number() or True) 1944 elif self._match_text_seq("OWNED", "BY"): 1945 # "OWNED BY NONE" is the default 1946 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1947 else: 1948 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1949 if opt: 1950 options.append(opt) 1951 else: 1952 break 1953 1954 seq.set("options", options if options else None) 1955 return None if self._index == index else seq 1956 1957 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1958 # only used for teradata currently 1959 self._match(TokenType.COMMA) 1960 1961 kwargs = { 1962 "no": self._match_text_seq("NO"), 1963 "dual": self._match_text_seq("DUAL"), 1964 "before": self._match_text_seq("BEFORE"), 1965 "default": self._match_text_seq("DEFAULT"), 1966 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1967 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1968 "after": self._match_text_seq("AFTER"), 1969 "minimum": self._match_texts(("MIN", "MINIMUM")), 1970 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1971 } 1972 1973 if self._match_texts(self.PROPERTY_PARSERS): 1974 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1975 try: 1976 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1977 except TypeError: 1978 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1979 1980 return None 1981 1982 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1983 return self._parse_wrapped_csv(self._parse_property) 1984 1985 def _parse_property(self) -> t.Optional[exp.Expression]: 1986 if self._match_texts(self.PROPERTY_PARSERS): 1987 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1988 1989 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1990 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1991 1992 if self._match_text_seq("COMPOUND", "SORTKEY"): 1993 return self._parse_sortkey(compound=True) 1994 1995 if self._match_text_seq("SQL", "SECURITY"): 1996 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1997 1998 index = self._index 1999 key = self._parse_column() 2000 2001 if not self._match(TokenType.EQ): 2002 self._retreat(index) 2003 return self._parse_sequence_properties() 2004 2005 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2006 if isinstance(key, exp.Column): 2007 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2008 2009 value = self._parse_bitwise() or self._parse_var(any_token=True) 2010 2011 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2012 if isinstance(value, exp.Column): 2013 value = exp.var(value.name) 2014 2015 return self.expression(exp.Property, this=key, value=value) 2016 2017 def _parse_stored(self) -> exp.FileFormatProperty: 2018 self._match(TokenType.ALIAS) 2019 2020 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2021 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2022 2023 return self.expression( 2024 exp.FileFormatProperty, 2025 this=( 2026 self.expression( 2027 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2028 ) 2029 if input_format or output_format 2030 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2031 ), 2032 ) 2033 2034 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2035 field = self._parse_field() 2036 if isinstance(field, exp.Identifier) and not field.quoted: 2037 field = exp.var(field) 2038 2039 return field 2040 2041 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2042 self._match(TokenType.EQ) 2043 self._match(TokenType.ALIAS) 2044 2045 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2046 2047 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2048 properties = [] 2049 while True: 2050 if before: 2051 prop = self._parse_property_before() 2052 else: 2053 prop = self._parse_property() 2054 if not prop: 2055 break 2056 for p in ensure_list(prop): 2057 properties.append(p) 2058 2059 if properties: 2060 return self.expression(exp.Properties, expressions=properties) 2061 2062 return None 2063 2064 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2065 return self.expression( 2066 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2067 ) 2068 2069 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2070 if self._match_texts(("DEFINER", "INVOKER")): 2071 security_specifier = self._prev.text.upper() 2072 return self.expression(exp.SecurityProperty, this=security_specifier) 2073 return None 2074 2075 def _parse_settings_property(self) -> exp.SettingsProperty: 2076 return self.expression( 2077 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2078 ) 2079 2080 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2081 if self._index >= 2: 2082 pre_volatile_token = self._tokens[self._index - 2] 2083 else: 2084 pre_volatile_token = None 2085 2086 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2087 return exp.VolatileProperty() 2088 2089 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2090 2091 def _parse_retention_period(self) -> exp.Var: 2092 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2093 number = self._parse_number() 2094 number_str = f"{number} " if number else "" 2095 unit = self._parse_var(any_token=True) 2096 return exp.var(f"{number_str}{unit}") 2097 2098 def _parse_system_versioning_property( 2099 self, with_: bool = False 2100 ) -> exp.WithSystemVersioningProperty: 2101 self._match(TokenType.EQ) 2102 prop = self.expression( 2103 exp.WithSystemVersioningProperty, 2104 **{ # type: ignore 2105 "on": True, 2106 "with": with_, 2107 }, 2108 ) 2109 2110 if self._match_text_seq("OFF"): 2111 prop.set("on", False) 2112 return prop 2113 2114 self._match(TokenType.ON) 2115 if self._match(TokenType.L_PAREN): 2116 while self._curr and not self._match(TokenType.R_PAREN): 2117 if self._match_text_seq("HISTORY_TABLE", "="): 2118 prop.set("this", self._parse_table_parts()) 2119 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2120 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2121 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2122 prop.set("retention_period", self._parse_retention_period()) 2123 2124 self._match(TokenType.COMMA) 2125 2126 return prop 2127 2128 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2129 self._match(TokenType.EQ) 2130 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2131 prop = self.expression(exp.DataDeletionProperty, on=on) 2132 2133 if self._match(TokenType.L_PAREN): 2134 while self._curr and not self._match(TokenType.R_PAREN): 2135 if self._match_text_seq("FILTER_COLUMN", "="): 2136 prop.set("filter_column", self._parse_column()) 2137 elif self._match_text_seq("RETENTION_PERIOD", "="): 2138 prop.set("retention_period", self._parse_retention_period()) 2139 2140 self._match(TokenType.COMMA) 2141 2142 return prop 2143 2144 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2145 kind = "HASH" 2146 expressions: t.Optional[t.List[exp.Expression]] = None 2147 if self._match_text_seq("BY", "HASH"): 2148 expressions = self._parse_wrapped_csv(self._parse_id_var) 2149 elif self._match_text_seq("BY", "RANDOM"): 2150 kind = "RANDOM" 2151 2152 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2153 buckets: t.Optional[exp.Expression] = None 2154 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2155 buckets = self._parse_number() 2156 2157 return self.expression( 2158 exp.DistributedByProperty, 2159 expressions=expressions, 2160 kind=kind, 2161 buckets=buckets, 2162 order=self._parse_order(), 2163 ) 2164 2165 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2166 self._match_text_seq("KEY") 2167 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2168 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2169 2170 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2171 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2172 prop = self._parse_system_versioning_property(with_=True) 2173 self._match_r_paren() 2174 return prop 2175 2176 if self._match(TokenType.L_PAREN, advance=False): 2177 return self._parse_wrapped_properties() 2178 2179 if self._match_text_seq("JOURNAL"): 2180 return self._parse_withjournaltable() 2181 2182 if self._match_texts(self.VIEW_ATTRIBUTES): 2183 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2184 2185 if self._match_text_seq("DATA"): 2186 return self._parse_withdata(no=False) 2187 elif self._match_text_seq("NO", "DATA"): 2188 return self._parse_withdata(no=True) 2189 2190 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2191 return self._parse_serde_properties(with_=True) 2192 2193 if self._match(TokenType.SCHEMA): 2194 return self.expression( 2195 exp.WithSchemaBindingProperty, 2196 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2197 ) 2198 2199 if not self._next: 2200 return None 2201 2202 return self._parse_withisolatedloading() 2203 2204 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2205 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2206 self._match(TokenType.EQ) 2207 2208 user = self._parse_id_var() 2209 self._match(TokenType.PARAMETER) 2210 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2211 2212 if not user or not host: 2213 return None 2214 2215 return exp.DefinerProperty(this=f"{user}@{host}") 2216 2217 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2218 self._match(TokenType.TABLE) 2219 self._match(TokenType.EQ) 2220 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2221 2222 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2223 return self.expression(exp.LogProperty, no=no) 2224 2225 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2226 return self.expression(exp.JournalProperty, **kwargs) 2227 2228 def _parse_checksum(self) -> exp.ChecksumProperty: 2229 self._match(TokenType.EQ) 2230 2231 on = None 2232 if self._match(TokenType.ON): 2233 on = True 2234 elif self._match_text_seq("OFF"): 2235 on = False 2236 2237 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2238 2239 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2240 return self.expression( 2241 exp.Cluster, 2242 expressions=( 2243 self._parse_wrapped_csv(self._parse_ordered) 2244 if wrapped 2245 else self._parse_csv(self._parse_ordered) 2246 ), 2247 ) 2248 2249 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2250 self._match_text_seq("BY") 2251 2252 self._match_l_paren() 2253 expressions = self._parse_csv(self._parse_column) 2254 self._match_r_paren() 2255 2256 if self._match_text_seq("SORTED", "BY"): 2257 self._match_l_paren() 2258 sorted_by = self._parse_csv(self._parse_ordered) 2259 self._match_r_paren() 2260 else: 2261 sorted_by = None 2262 2263 self._match(TokenType.INTO) 2264 buckets = self._parse_number() 2265 self._match_text_seq("BUCKETS") 2266 2267 return self.expression( 2268 exp.ClusteredByProperty, 2269 expressions=expressions, 2270 sorted_by=sorted_by, 2271 buckets=buckets, 2272 ) 2273 2274 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2275 if not self._match_text_seq("GRANTS"): 2276 self._retreat(self._index - 1) 2277 return None 2278 2279 return self.expression(exp.CopyGrantsProperty) 2280 2281 def _parse_freespace(self) -> exp.FreespaceProperty: 2282 self._match(TokenType.EQ) 2283 return self.expression( 2284 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2285 ) 2286 2287 def _parse_mergeblockratio( 2288 self, no: bool = False, default: bool = False 2289 ) -> exp.MergeBlockRatioProperty: 2290 if self._match(TokenType.EQ): 2291 return self.expression( 2292 exp.MergeBlockRatioProperty, 2293 this=self._parse_number(), 2294 percent=self._match(TokenType.PERCENT), 2295 ) 2296 2297 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2298 2299 def _parse_datablocksize( 2300 self, 2301 default: t.Optional[bool] = None, 2302 minimum: t.Optional[bool] = None, 2303 maximum: t.Optional[bool] = None, 2304 ) -> exp.DataBlocksizeProperty: 2305 self._match(TokenType.EQ) 2306 size = self._parse_number() 2307 2308 units = None 2309 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2310 units = self._prev.text 2311 2312 return self.expression( 2313 exp.DataBlocksizeProperty, 2314 size=size, 2315 units=units, 2316 default=default, 2317 minimum=minimum, 2318 maximum=maximum, 2319 ) 2320 2321 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2322 self._match(TokenType.EQ) 2323 always = self._match_text_seq("ALWAYS") 2324 manual = self._match_text_seq("MANUAL") 2325 never = self._match_text_seq("NEVER") 2326 default = self._match_text_seq("DEFAULT") 2327 2328 autotemp = None 2329 if self._match_text_seq("AUTOTEMP"): 2330 autotemp = self._parse_schema() 2331 2332 return self.expression( 2333 exp.BlockCompressionProperty, 2334 always=always, 2335 manual=manual, 2336 never=never, 2337 default=default, 2338 autotemp=autotemp, 2339 ) 2340 2341 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2342 index = self._index 2343 no = self._match_text_seq("NO") 2344 concurrent = self._match_text_seq("CONCURRENT") 2345 2346 if not self._match_text_seq("ISOLATED", "LOADING"): 2347 self._retreat(index) 2348 return None 2349 2350 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2351 return self.expression( 2352 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2353 ) 2354 2355 def _parse_locking(self) -> exp.LockingProperty: 2356 if self._match(TokenType.TABLE): 2357 kind = "TABLE" 2358 elif self._match(TokenType.VIEW): 2359 kind = "VIEW" 2360 elif self._match(TokenType.ROW): 2361 kind = "ROW" 2362 elif self._match_text_seq("DATABASE"): 2363 kind = "DATABASE" 2364 else: 2365 kind = None 2366 2367 if kind in ("DATABASE", "TABLE", "VIEW"): 2368 this = self._parse_table_parts() 2369 else: 2370 this = None 2371 2372 if self._match(TokenType.FOR): 2373 for_or_in = "FOR" 2374 elif self._match(TokenType.IN): 2375 for_or_in = "IN" 2376 else: 2377 for_or_in = None 2378 2379 if self._match_text_seq("ACCESS"): 2380 lock_type = "ACCESS" 2381 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2382 lock_type = "EXCLUSIVE" 2383 elif self._match_text_seq("SHARE"): 2384 lock_type = "SHARE" 2385 elif self._match_text_seq("READ"): 2386 lock_type = "READ" 2387 elif self._match_text_seq("WRITE"): 2388 lock_type = "WRITE" 2389 elif self._match_text_seq("CHECKSUM"): 2390 lock_type = "CHECKSUM" 2391 else: 2392 lock_type = None 2393 2394 override = self._match_text_seq("OVERRIDE") 2395 2396 return self.expression( 2397 exp.LockingProperty, 2398 this=this, 2399 kind=kind, 2400 for_or_in=for_or_in, 2401 lock_type=lock_type, 2402 override=override, 2403 ) 2404 2405 def _parse_partition_by(self) -> t.List[exp.Expression]: 2406 if self._match(TokenType.PARTITION_BY): 2407 return self._parse_csv(self._parse_assignment) 2408 return [] 2409 2410 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2411 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2412 if self._match_text_seq("MINVALUE"): 2413 return exp.var("MINVALUE") 2414 if self._match_text_seq("MAXVALUE"): 2415 return exp.var("MAXVALUE") 2416 return self._parse_bitwise() 2417 2418 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2419 expression = None 2420 from_expressions = None 2421 to_expressions = None 2422 2423 if self._match(TokenType.IN): 2424 this = self._parse_wrapped_csv(self._parse_bitwise) 2425 elif self._match(TokenType.FROM): 2426 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2427 self._match_text_seq("TO") 2428 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2429 elif self._match_text_seq("WITH", "(", "MODULUS"): 2430 this = self._parse_number() 2431 self._match_text_seq(",", "REMAINDER") 2432 expression = self._parse_number() 2433 self._match_r_paren() 2434 else: 2435 self.raise_error("Failed to parse partition bound spec.") 2436 2437 return self.expression( 2438 exp.PartitionBoundSpec, 2439 this=this, 2440 expression=expression, 2441 from_expressions=from_expressions, 2442 to_expressions=to_expressions, 2443 ) 2444 2445 # https://www.postgresql.org/docs/current/sql-createtable.html 2446 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2447 if not self._match_text_seq("OF"): 2448 self._retreat(self._index - 1) 2449 return None 2450 2451 this = self._parse_table(schema=True) 2452 2453 if self._match(TokenType.DEFAULT): 2454 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2455 elif self._match_text_seq("FOR", "VALUES"): 2456 expression = self._parse_partition_bound_spec() 2457 else: 2458 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2459 2460 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2461 2462 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2463 self._match(TokenType.EQ) 2464 return self.expression( 2465 exp.PartitionedByProperty, 2466 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2467 ) 2468 2469 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2470 if self._match_text_seq("AND", "STATISTICS"): 2471 statistics = True 2472 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2473 statistics = False 2474 else: 2475 statistics = None 2476 2477 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2478 2479 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2480 if self._match_text_seq("SQL"): 2481 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2482 return None 2483 2484 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2485 if self._match_text_seq("SQL", "DATA"): 2486 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2487 return None 2488 2489 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2490 if self._match_text_seq("PRIMARY", "INDEX"): 2491 return exp.NoPrimaryIndexProperty() 2492 if self._match_text_seq("SQL"): 2493 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2494 return None 2495 2496 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2497 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2498 return exp.OnCommitProperty() 2499 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2500 return exp.OnCommitProperty(delete=True) 2501 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2502 2503 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2504 if self._match_text_seq("SQL", "DATA"): 2505 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2506 return None 2507 2508 def _parse_distkey(self) -> exp.DistKeyProperty: 2509 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2510 2511 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2512 table = self._parse_table(schema=True) 2513 2514 options = [] 2515 while self._match_texts(("INCLUDING", "EXCLUDING")): 2516 this = self._prev.text.upper() 2517 2518 id_var = self._parse_id_var() 2519 if not id_var: 2520 return None 2521 2522 options.append( 2523 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2524 ) 2525 2526 return self.expression(exp.LikeProperty, this=table, expressions=options) 2527 2528 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2529 return self.expression( 2530 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2531 ) 2532 2533 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2534 self._match(TokenType.EQ) 2535 return self.expression( 2536 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2537 ) 2538 2539 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2540 self._match_text_seq("WITH", "CONNECTION") 2541 return self.expression( 2542 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2543 ) 2544 2545 def _parse_returns(self) -> exp.ReturnsProperty: 2546 value: t.Optional[exp.Expression] 2547 null = None 2548 is_table = self._match(TokenType.TABLE) 2549 2550 if is_table: 2551 if self._match(TokenType.LT): 2552 value = self.expression( 2553 exp.Schema, 2554 this="TABLE", 2555 expressions=self._parse_csv(self._parse_struct_types), 2556 ) 2557 if not self._match(TokenType.GT): 2558 self.raise_error("Expecting >") 2559 else: 2560 value = self._parse_schema(exp.var("TABLE")) 2561 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2562 null = True 2563 value = None 2564 else: 2565 value = self._parse_types() 2566 2567 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2568 2569 def _parse_describe(self) -> exp.Describe: 2570 kind = self._match_set(self.CREATABLES) and self._prev.text 2571 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2572 if self._match(TokenType.DOT): 2573 style = None 2574 self._retreat(self._index - 2) 2575 this = self._parse_table(schema=True) 2576 properties = self._parse_properties() 2577 expressions = properties.expressions if properties else None 2578 partition = self._parse_partition() 2579 return self.expression( 2580 exp.Describe, 2581 this=this, 2582 style=style, 2583 kind=kind, 2584 expressions=expressions, 2585 partition=partition, 2586 ) 2587 2588 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2589 kind = self._prev.text.upper() 2590 expressions = [] 2591 2592 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2593 if self._match(TokenType.WHEN): 2594 expression = self._parse_disjunction() 2595 self._match(TokenType.THEN) 2596 else: 2597 expression = None 2598 2599 else_ = self._match(TokenType.ELSE) 2600 2601 if not self._match(TokenType.INTO): 2602 return None 2603 2604 return self.expression( 2605 exp.ConditionalInsert, 2606 this=self.expression( 2607 exp.Insert, 2608 this=self._parse_table(schema=True), 2609 expression=self._parse_derived_table_values(), 2610 ), 2611 expression=expression, 2612 else_=else_, 2613 ) 2614 2615 expression = parse_conditional_insert() 2616 while expression is not None: 2617 expressions.append(expression) 2618 expression = parse_conditional_insert() 2619 2620 return self.expression( 2621 exp.MultitableInserts, 2622 kind=kind, 2623 comments=comments, 2624 expressions=expressions, 2625 source=self._parse_table(), 2626 ) 2627 2628 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2629 comments = ensure_list(self._prev_comments) 2630 hint = self._parse_hint() 2631 overwrite = self._match(TokenType.OVERWRITE) 2632 ignore = self._match(TokenType.IGNORE) 2633 local = self._match_text_seq("LOCAL") 2634 alternative = None 2635 is_function = None 2636 2637 if self._match_text_seq("DIRECTORY"): 2638 this: t.Optional[exp.Expression] = self.expression( 2639 exp.Directory, 2640 this=self._parse_var_or_string(), 2641 local=local, 2642 row_format=self._parse_row_format(match_row=True), 2643 ) 2644 else: 2645 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2646 comments += ensure_list(self._prev_comments) 2647 return self._parse_multitable_inserts(comments) 2648 2649 if self._match(TokenType.OR): 2650 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2651 2652 self._match(TokenType.INTO) 2653 comments += ensure_list(self._prev_comments) 2654 self._match(TokenType.TABLE) 2655 is_function = self._match(TokenType.FUNCTION) 2656 2657 this = ( 2658 self._parse_table(schema=True, parse_partition=True) 2659 if not is_function 2660 else self._parse_function() 2661 ) 2662 2663 returning = self._parse_returning() 2664 2665 return self.expression( 2666 exp.Insert, 2667 comments=comments, 2668 hint=hint, 2669 is_function=is_function, 2670 this=this, 2671 stored=self._match_text_seq("STORED") and self._parse_stored(), 2672 by_name=self._match_text_seq("BY", "NAME"), 2673 exists=self._parse_exists(), 2674 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2675 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2676 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2677 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2678 conflict=self._parse_on_conflict(), 2679 returning=returning or self._parse_returning(), 2680 overwrite=overwrite, 2681 alternative=alternative, 2682 ignore=ignore, 2683 source=self._match(TokenType.TABLE) and self._parse_table(), 2684 ) 2685 2686 def _parse_kill(self) -> exp.Kill: 2687 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2688 2689 return self.expression( 2690 exp.Kill, 2691 this=self._parse_primary(), 2692 kind=kind, 2693 ) 2694 2695 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2696 conflict = self._match_text_seq("ON", "CONFLICT") 2697 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2698 2699 if not conflict and not duplicate: 2700 return None 2701 2702 conflict_keys = None 2703 constraint = None 2704 2705 if conflict: 2706 if self._match_text_seq("ON", "CONSTRAINT"): 2707 constraint = self._parse_id_var() 2708 elif self._match(TokenType.L_PAREN): 2709 conflict_keys = self._parse_csv(self._parse_id_var) 2710 self._match_r_paren() 2711 2712 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2713 if self._prev.token_type == TokenType.UPDATE: 2714 self._match(TokenType.SET) 2715 expressions = self._parse_csv(self._parse_equality) 2716 else: 2717 expressions = None 2718 2719 return self.expression( 2720 exp.OnConflict, 2721 duplicate=duplicate, 2722 expressions=expressions, 2723 action=action, 2724 conflict_keys=conflict_keys, 2725 constraint=constraint, 2726 ) 2727 2728 def _parse_returning(self) -> t.Optional[exp.Returning]: 2729 if not self._match(TokenType.RETURNING): 2730 return None 2731 return self.expression( 2732 exp.Returning, 2733 expressions=self._parse_csv(self._parse_expression), 2734 into=self._match(TokenType.INTO) and self._parse_table_part(), 2735 ) 2736 2737 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2738 if not self._match(TokenType.FORMAT): 2739 return None 2740 return self._parse_row_format() 2741 2742 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2743 index = self._index 2744 with_ = with_ or self._match_text_seq("WITH") 2745 2746 if not self._match(TokenType.SERDE_PROPERTIES): 2747 self._retreat(index) 2748 return None 2749 return self.expression( 2750 exp.SerdeProperties, 2751 **{ # type: ignore 2752 "expressions": self._parse_wrapped_properties(), 2753 "with": with_, 2754 }, 2755 ) 2756 2757 def _parse_row_format( 2758 self, match_row: bool = False 2759 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2760 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2761 return None 2762 2763 if self._match_text_seq("SERDE"): 2764 this = self._parse_string() 2765 2766 serde_properties = self._parse_serde_properties() 2767 2768 return self.expression( 2769 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2770 ) 2771 2772 self._match_text_seq("DELIMITED") 2773 2774 kwargs = {} 2775 2776 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2777 kwargs["fields"] = self._parse_string() 2778 if self._match_text_seq("ESCAPED", "BY"): 2779 kwargs["escaped"] = self._parse_string() 2780 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2781 kwargs["collection_items"] = self._parse_string() 2782 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2783 kwargs["map_keys"] = self._parse_string() 2784 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2785 kwargs["lines"] = self._parse_string() 2786 if self._match_text_seq("NULL", "DEFINED", "AS"): 2787 kwargs["null"] = self._parse_string() 2788 2789 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2790 2791 def _parse_load(self) -> exp.LoadData | exp.Command: 2792 if self._match_text_seq("DATA"): 2793 local = self._match_text_seq("LOCAL") 2794 self._match_text_seq("INPATH") 2795 inpath = self._parse_string() 2796 overwrite = self._match(TokenType.OVERWRITE) 2797 self._match_pair(TokenType.INTO, TokenType.TABLE) 2798 2799 return self.expression( 2800 exp.LoadData, 2801 this=self._parse_table(schema=True), 2802 local=local, 2803 overwrite=overwrite, 2804 inpath=inpath, 2805 partition=self._parse_partition(), 2806 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2807 serde=self._match_text_seq("SERDE") and self._parse_string(), 2808 ) 2809 return self._parse_as_command(self._prev) 2810 2811 def _parse_delete(self) -> exp.Delete: 2812 # This handles MySQL's "Multiple-Table Syntax" 2813 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2814 tables = None 2815 comments = self._prev_comments 2816 if not self._match(TokenType.FROM, advance=False): 2817 tables = self._parse_csv(self._parse_table) or None 2818 2819 returning = self._parse_returning() 2820 2821 return self.expression( 2822 exp.Delete, 2823 comments=comments, 2824 tables=tables, 2825 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2826 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2827 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2828 where=self._parse_where(), 2829 returning=returning or self._parse_returning(), 2830 limit=self._parse_limit(), 2831 ) 2832 2833 def _parse_update(self) -> exp.Update: 2834 comments = self._prev_comments 2835 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2836 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2837 returning = self._parse_returning() 2838 return self.expression( 2839 exp.Update, 2840 comments=comments, 2841 **{ # type: ignore 2842 "this": this, 2843 "expressions": expressions, 2844 "from": self._parse_from(joins=True), 2845 "where": self._parse_where(), 2846 "returning": returning or self._parse_returning(), 2847 "order": self._parse_order(), 2848 "limit": self._parse_limit(), 2849 }, 2850 ) 2851 2852 def _parse_uncache(self) -> exp.Uncache: 2853 if not self._match(TokenType.TABLE): 2854 self.raise_error("Expecting TABLE after UNCACHE") 2855 2856 return self.expression( 2857 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2858 ) 2859 2860 def _parse_cache(self) -> exp.Cache: 2861 lazy = self._match_text_seq("LAZY") 2862 self._match(TokenType.TABLE) 2863 table = self._parse_table(schema=True) 2864 2865 options = [] 2866 if self._match_text_seq("OPTIONS"): 2867 self._match_l_paren() 2868 k = self._parse_string() 2869 self._match(TokenType.EQ) 2870 v = self._parse_string() 2871 options = [k, v] 2872 self._match_r_paren() 2873 2874 self._match(TokenType.ALIAS) 2875 return self.expression( 2876 exp.Cache, 2877 this=table, 2878 lazy=lazy, 2879 options=options, 2880 expression=self._parse_select(nested=True), 2881 ) 2882 2883 def _parse_partition(self) -> t.Optional[exp.Partition]: 2884 if not self._match(TokenType.PARTITION): 2885 return None 2886 2887 return self.expression( 2888 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2889 ) 2890 2891 def _parse_value(self) -> t.Optional[exp.Tuple]: 2892 if self._match(TokenType.L_PAREN): 2893 expressions = self._parse_csv(self._parse_expression) 2894 self._match_r_paren() 2895 return self.expression(exp.Tuple, expressions=expressions) 2896 2897 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2898 expression = self._parse_expression() 2899 if expression: 2900 return self.expression(exp.Tuple, expressions=[expression]) 2901 return None 2902 2903 def _parse_projections(self) -> t.List[exp.Expression]: 2904 return self._parse_expressions() 2905 2906 def _parse_select( 2907 self, 2908 nested: bool = False, 2909 table: bool = False, 2910 parse_subquery_alias: bool = True, 2911 parse_set_operation: bool = True, 2912 ) -> t.Optional[exp.Expression]: 2913 cte = self._parse_with() 2914 2915 if cte: 2916 this = self._parse_statement() 2917 2918 if not this: 2919 self.raise_error("Failed to parse any statement following CTE") 2920 return cte 2921 2922 if "with" in this.arg_types: 2923 this.set("with", cte) 2924 else: 2925 self.raise_error(f"{this.key} does not support CTE") 2926 this = cte 2927 2928 return this 2929 2930 # duckdb supports leading with FROM x 2931 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2932 2933 if self._match(TokenType.SELECT): 2934 comments = self._prev_comments 2935 2936 hint = self._parse_hint() 2937 2938 if self._next and not self._next.token_type == TokenType.DOT: 2939 all_ = self._match(TokenType.ALL) 2940 distinct = self._match_set(self.DISTINCT_TOKENS) 2941 else: 2942 all_, distinct = None, None 2943 2944 kind = ( 2945 self._match(TokenType.ALIAS) 2946 and self._match_texts(("STRUCT", "VALUE")) 2947 and self._prev.text.upper() 2948 ) 2949 2950 if distinct: 2951 distinct = self.expression( 2952 exp.Distinct, 2953 on=self._parse_value() if self._match(TokenType.ON) else None, 2954 ) 2955 2956 if all_ and distinct: 2957 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2958 2959 limit = self._parse_limit(top=True) 2960 projections = self._parse_projections() 2961 2962 this = self.expression( 2963 exp.Select, 2964 kind=kind, 2965 hint=hint, 2966 distinct=distinct, 2967 expressions=projections, 2968 limit=limit, 2969 ) 2970 this.comments = comments 2971 2972 into = self._parse_into() 2973 if into: 2974 this.set("into", into) 2975 2976 if not from_: 2977 from_ = self._parse_from() 2978 2979 if from_: 2980 this.set("from", from_) 2981 2982 this = self._parse_query_modifiers(this) 2983 elif (table or nested) and self._match(TokenType.L_PAREN): 2984 if self._match(TokenType.PIVOT): 2985 this = self._parse_simplified_pivot() 2986 elif self._match(TokenType.FROM): 2987 this = exp.select("*").from_( 2988 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2989 ) 2990 else: 2991 this = ( 2992 self._parse_table() 2993 if table 2994 else self._parse_select(nested=True, parse_set_operation=False) 2995 ) 2996 2997 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 2998 # in case a modifier (e.g. join) is following 2999 if table and isinstance(this, exp.Values) and this.alias: 3000 alias = this.args["alias"].pop() 3001 this = exp.Table(this=this, alias=alias) 3002 3003 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3004 3005 self._match_r_paren() 3006 3007 # We return early here so that the UNION isn't attached to the subquery by the 3008 # following call to _parse_set_operations, but instead becomes the parent node 3009 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3010 elif self._match(TokenType.VALUES, advance=False): 3011 this = self._parse_derived_table_values() 3012 elif from_: 3013 this = exp.select("*").from_(from_.this, copy=False) 3014 elif self._match(TokenType.SUMMARIZE): 3015 table = self._match(TokenType.TABLE) 3016 this = self._parse_select() or self._parse_string() or self._parse_table() 3017 return self.expression(exp.Summarize, this=this, table=table) 3018 elif self._match(TokenType.DESCRIBE): 3019 this = self._parse_describe() 3020 elif self._match_text_seq("STREAM"): 3021 this = self.expression(exp.Stream, this=self._parse_function()) 3022 else: 3023 this = None 3024 3025 return self._parse_set_operations(this) if parse_set_operation else this 3026 3027 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3028 if not skip_with_token and not self._match(TokenType.WITH): 3029 return None 3030 3031 comments = self._prev_comments 3032 recursive = self._match(TokenType.RECURSIVE) 3033 3034 last_comments = None 3035 expressions = [] 3036 while True: 3037 expressions.append(self._parse_cte()) 3038 if last_comments: 3039 expressions[-1].add_comments(last_comments) 3040 3041 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3042 break 3043 else: 3044 self._match(TokenType.WITH) 3045 3046 last_comments = self._prev_comments 3047 3048 return self.expression( 3049 exp.With, comments=comments, expressions=expressions, recursive=recursive 3050 ) 3051 3052 def _parse_cte(self) -> exp.CTE: 3053 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3054 if not alias or not alias.this: 3055 self.raise_error("Expected CTE to have alias") 3056 3057 self._match(TokenType.ALIAS) 3058 comments = self._prev_comments 3059 3060 if self._match_text_seq("NOT", "MATERIALIZED"): 3061 materialized = False 3062 elif self._match_text_seq("MATERIALIZED"): 3063 materialized = True 3064 else: 3065 materialized = None 3066 3067 return self.expression( 3068 exp.CTE, 3069 this=self._parse_wrapped(self._parse_statement), 3070 alias=alias, 3071 materialized=materialized, 3072 comments=comments, 3073 ) 3074 3075 def _parse_table_alias( 3076 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3077 ) -> t.Optional[exp.TableAlias]: 3078 any_token = self._match(TokenType.ALIAS) 3079 alias = ( 3080 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3081 or self._parse_string_as_identifier() 3082 ) 3083 3084 index = self._index 3085 if self._match(TokenType.L_PAREN): 3086 columns = self._parse_csv(self._parse_function_parameter) 3087 self._match_r_paren() if columns else self._retreat(index) 3088 else: 3089 columns = None 3090 3091 if not alias and not columns: 3092 return None 3093 3094 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3095 3096 # We bubble up comments from the Identifier to the TableAlias 3097 if isinstance(alias, exp.Identifier): 3098 table_alias.add_comments(alias.pop_comments()) 3099 3100 return table_alias 3101 3102 def _parse_subquery( 3103 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3104 ) -> t.Optional[exp.Subquery]: 3105 if not this: 3106 return None 3107 3108 return self.expression( 3109 exp.Subquery, 3110 this=this, 3111 pivots=self._parse_pivots(), 3112 alias=self._parse_table_alias() if parse_alias else None, 3113 sample=self._parse_table_sample(), 3114 ) 3115 3116 def _implicit_unnests_to_explicit(self, this: E) -> E: 3117 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3118 3119 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3120 for i, join in enumerate(this.args.get("joins") or []): 3121 table = join.this 3122 normalized_table = table.copy() 3123 normalized_table.meta["maybe_column"] = True 3124 normalized_table = _norm(normalized_table, dialect=self.dialect) 3125 3126 if isinstance(table, exp.Table) and not join.args.get("on"): 3127 if normalized_table.parts[0].name in refs: 3128 table_as_column = table.to_column() 3129 unnest = exp.Unnest(expressions=[table_as_column]) 3130 3131 # Table.to_column creates a parent Alias node that we want to convert to 3132 # a TableAlias and attach to the Unnest, so it matches the parser's output 3133 if isinstance(table.args.get("alias"), exp.TableAlias): 3134 table_as_column.replace(table_as_column.this) 3135 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3136 3137 table.replace(unnest) 3138 3139 refs.add(normalized_table.alias_or_name) 3140 3141 return this 3142 3143 def _parse_query_modifiers( 3144 self, this: t.Optional[exp.Expression] 3145 ) -> t.Optional[exp.Expression]: 3146 if isinstance(this, (exp.Query, exp.Table)): 3147 for join in self._parse_joins(): 3148 this.append("joins", join) 3149 for lateral in iter(self._parse_lateral, None): 3150 this.append("laterals", lateral) 3151 3152 while True: 3153 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3154 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3155 key, expression = parser(self) 3156 3157 if expression: 3158 this.set(key, expression) 3159 if key == "limit": 3160 offset = expression.args.pop("offset", None) 3161 3162 if offset: 3163 offset = exp.Offset(expression=offset) 3164 this.set("offset", offset) 3165 3166 limit_by_expressions = expression.expressions 3167 expression.set("expressions", None) 3168 offset.set("expressions", limit_by_expressions) 3169 continue 3170 break 3171 3172 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3173 this = self._implicit_unnests_to_explicit(this) 3174 3175 return this 3176 3177 def _parse_hint(self) -> t.Optional[exp.Hint]: 3178 if self._match(TokenType.HINT): 3179 hints = [] 3180 for hint in iter( 3181 lambda: self._parse_csv( 3182 lambda: self._parse_function() or self._parse_var(upper=True) 3183 ), 3184 [], 3185 ): 3186 hints.extend(hint) 3187 3188 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3189 self.raise_error("Expected */ after HINT") 3190 3191 return self.expression(exp.Hint, expressions=hints) 3192 3193 return None 3194 3195 def _parse_into(self) -> t.Optional[exp.Into]: 3196 if not self._match(TokenType.INTO): 3197 return None 3198 3199 temp = self._match(TokenType.TEMPORARY) 3200 unlogged = self._match_text_seq("UNLOGGED") 3201 self._match(TokenType.TABLE) 3202 3203 return self.expression( 3204 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3205 ) 3206 3207 def _parse_from( 3208 self, joins: bool = False, skip_from_token: bool = False 3209 ) -> t.Optional[exp.From]: 3210 if not skip_from_token and not self._match(TokenType.FROM): 3211 return None 3212 3213 return self.expression( 3214 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3215 ) 3216 3217 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3218 return self.expression( 3219 exp.MatchRecognizeMeasure, 3220 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3221 this=self._parse_expression(), 3222 ) 3223 3224 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3225 if not self._match(TokenType.MATCH_RECOGNIZE): 3226 return None 3227 3228 self._match_l_paren() 3229 3230 partition = self._parse_partition_by() 3231 order = self._parse_order() 3232 3233 measures = ( 3234 self._parse_csv(self._parse_match_recognize_measure) 3235 if self._match_text_seq("MEASURES") 3236 else None 3237 ) 3238 3239 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3240 rows = exp.var("ONE ROW PER MATCH") 3241 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3242 text = "ALL ROWS PER MATCH" 3243 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3244 text += " SHOW EMPTY MATCHES" 3245 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3246 text += " OMIT EMPTY MATCHES" 3247 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3248 text += " WITH UNMATCHED ROWS" 3249 rows = exp.var(text) 3250 else: 3251 rows = None 3252 3253 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3254 text = "AFTER MATCH SKIP" 3255 if self._match_text_seq("PAST", "LAST", "ROW"): 3256 text += " PAST LAST ROW" 3257 elif self._match_text_seq("TO", "NEXT", "ROW"): 3258 text += " TO NEXT ROW" 3259 elif self._match_text_seq("TO", "FIRST"): 3260 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3261 elif self._match_text_seq("TO", "LAST"): 3262 text += f" TO LAST {self._advance_any().text}" # type: ignore 3263 after = exp.var(text) 3264 else: 3265 after = None 3266 3267 if self._match_text_seq("PATTERN"): 3268 self._match_l_paren() 3269 3270 if not self._curr: 3271 self.raise_error("Expecting )", self._curr) 3272 3273 paren = 1 3274 start = self._curr 3275 3276 while self._curr and paren > 0: 3277 if self._curr.token_type == TokenType.L_PAREN: 3278 paren += 1 3279 if self._curr.token_type == TokenType.R_PAREN: 3280 paren -= 1 3281 3282 end = self._prev 3283 self._advance() 3284 3285 if paren > 0: 3286 self.raise_error("Expecting )", self._curr) 3287 3288 pattern = exp.var(self._find_sql(start, end)) 3289 else: 3290 pattern = None 3291 3292 define = ( 3293 self._parse_csv(self._parse_name_as_expression) 3294 if self._match_text_seq("DEFINE") 3295 else None 3296 ) 3297 3298 self._match_r_paren() 3299 3300 return self.expression( 3301 exp.MatchRecognize, 3302 partition_by=partition, 3303 order=order, 3304 measures=measures, 3305 rows=rows, 3306 after=after, 3307 pattern=pattern, 3308 define=define, 3309 alias=self._parse_table_alias(), 3310 ) 3311 3312 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3313 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3314 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3315 cross_apply = False 3316 3317 if cross_apply is not None: 3318 this = self._parse_select(table=True) 3319 view = None 3320 outer = None 3321 elif self._match(TokenType.LATERAL): 3322 this = self._parse_select(table=True) 3323 view = self._match(TokenType.VIEW) 3324 outer = self._match(TokenType.OUTER) 3325 else: 3326 return None 3327 3328 if not this: 3329 this = ( 3330 self._parse_unnest() 3331 or self._parse_function() 3332 or self._parse_id_var(any_token=False) 3333 ) 3334 3335 while self._match(TokenType.DOT): 3336 this = exp.Dot( 3337 this=this, 3338 expression=self._parse_function() or self._parse_id_var(any_token=False), 3339 ) 3340 3341 if view: 3342 table = self._parse_id_var(any_token=False) 3343 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3344 table_alias: t.Optional[exp.TableAlias] = self.expression( 3345 exp.TableAlias, this=table, columns=columns 3346 ) 3347 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3348 # We move the alias from the lateral's child node to the lateral itself 3349 table_alias = this.args["alias"].pop() 3350 else: 3351 table_alias = self._parse_table_alias() 3352 3353 return self.expression( 3354 exp.Lateral, 3355 this=this, 3356 view=view, 3357 outer=outer, 3358 alias=table_alias, 3359 cross_apply=cross_apply, 3360 ) 3361 3362 def _parse_join_parts( 3363 self, 3364 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3365 return ( 3366 self._match_set(self.JOIN_METHODS) and self._prev, 3367 self._match_set(self.JOIN_SIDES) and self._prev, 3368 self._match_set(self.JOIN_KINDS) and self._prev, 3369 ) 3370 3371 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3372 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3373 this = self._parse_column() 3374 if isinstance(this, exp.Column): 3375 return this.this 3376 return this 3377 3378 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3379 3380 def _parse_join( 3381 self, skip_join_token: bool = False, parse_bracket: bool = False 3382 ) -> t.Optional[exp.Join]: 3383 if self._match(TokenType.COMMA): 3384 return self.expression(exp.Join, this=self._parse_table()) 3385 3386 index = self._index 3387 method, side, kind = self._parse_join_parts() 3388 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3389 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3390 3391 if not skip_join_token and not join: 3392 self._retreat(index) 3393 kind = None 3394 method = None 3395 side = None 3396 3397 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3398 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3399 3400 if not skip_join_token and not join and not outer_apply and not cross_apply: 3401 return None 3402 3403 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3404 3405 if method: 3406 kwargs["method"] = method.text 3407 if side: 3408 kwargs["side"] = side.text 3409 if kind: 3410 kwargs["kind"] = kind.text 3411 if hint: 3412 kwargs["hint"] = hint 3413 3414 if self._match(TokenType.MATCH_CONDITION): 3415 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3416 3417 if self._match(TokenType.ON): 3418 kwargs["on"] = self._parse_assignment() 3419 elif self._match(TokenType.USING): 3420 kwargs["using"] = self._parse_using_identifiers() 3421 elif ( 3422 not (outer_apply or cross_apply) 3423 and not isinstance(kwargs["this"], exp.Unnest) 3424 and not (kind and kind.token_type == TokenType.CROSS) 3425 ): 3426 index = self._index 3427 joins: t.Optional[list] = list(self._parse_joins()) 3428 3429 if joins and self._match(TokenType.ON): 3430 kwargs["on"] = self._parse_assignment() 3431 elif joins and self._match(TokenType.USING): 3432 kwargs["using"] = self._parse_using_identifiers() 3433 else: 3434 joins = None 3435 self._retreat(index) 3436 3437 kwargs["this"].set("joins", joins if joins else None) 3438 3439 comments = [c for token in (method, side, kind) if token for c in token.comments] 3440 return self.expression(exp.Join, comments=comments, **kwargs) 3441 3442 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3443 this = self._parse_assignment() 3444 3445 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3446 return this 3447 3448 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3449 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3450 3451 return this 3452 3453 def _parse_index_params(self) -> exp.IndexParameters: 3454 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3455 3456 if self._match(TokenType.L_PAREN, advance=False): 3457 columns = self._parse_wrapped_csv(self._parse_with_operator) 3458 else: 3459 columns = None 3460 3461 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3462 partition_by = self._parse_partition_by() 3463 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3464 tablespace = ( 3465 self._parse_var(any_token=True) 3466 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3467 else None 3468 ) 3469 where = self._parse_where() 3470 3471 on = self._parse_field() if self._match(TokenType.ON) else None 3472 3473 return self.expression( 3474 exp.IndexParameters, 3475 using=using, 3476 columns=columns, 3477 include=include, 3478 partition_by=partition_by, 3479 where=where, 3480 with_storage=with_storage, 3481 tablespace=tablespace, 3482 on=on, 3483 ) 3484 3485 def _parse_index( 3486 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3487 ) -> t.Optional[exp.Index]: 3488 if index or anonymous: 3489 unique = None 3490 primary = None 3491 amp = None 3492 3493 self._match(TokenType.ON) 3494 self._match(TokenType.TABLE) # hive 3495 table = self._parse_table_parts(schema=True) 3496 else: 3497 unique = self._match(TokenType.UNIQUE) 3498 primary = self._match_text_seq("PRIMARY") 3499 amp = self._match_text_seq("AMP") 3500 3501 if not self._match(TokenType.INDEX): 3502 return None 3503 3504 index = self._parse_id_var() 3505 table = None 3506 3507 params = self._parse_index_params() 3508 3509 return self.expression( 3510 exp.Index, 3511 this=index, 3512 table=table, 3513 unique=unique, 3514 primary=primary, 3515 amp=amp, 3516 params=params, 3517 ) 3518 3519 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3520 hints: t.List[exp.Expression] = [] 3521 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3522 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3523 hints.append( 3524 self.expression( 3525 exp.WithTableHint, 3526 expressions=self._parse_csv( 3527 lambda: self._parse_function() or self._parse_var(any_token=True) 3528 ), 3529 ) 3530 ) 3531 self._match_r_paren() 3532 else: 3533 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3534 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3535 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3536 3537 self._match_set((TokenType.INDEX, TokenType.KEY)) 3538 if self._match(TokenType.FOR): 3539 hint.set("target", self._advance_any() and self._prev.text.upper()) 3540 3541 hint.set("expressions", self._parse_wrapped_id_vars()) 3542 hints.append(hint) 3543 3544 return hints or None 3545 3546 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3547 return ( 3548 (not schema and self._parse_function(optional_parens=False)) 3549 or self._parse_id_var(any_token=False) 3550 or self._parse_string_as_identifier() 3551 or self._parse_placeholder() 3552 ) 3553 3554 def _parse_table_parts( 3555 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3556 ) -> exp.Table: 3557 catalog = None 3558 db = None 3559 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3560 3561 while self._match(TokenType.DOT): 3562 if catalog: 3563 # This allows nesting the table in arbitrarily many dot expressions if needed 3564 table = self.expression( 3565 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3566 ) 3567 else: 3568 catalog = db 3569 db = table 3570 # "" used for tsql FROM a..b case 3571 table = self._parse_table_part(schema=schema) or "" 3572 3573 if ( 3574 wildcard 3575 and self._is_connected() 3576 and (isinstance(table, exp.Identifier) or not table) 3577 and self._match(TokenType.STAR) 3578 ): 3579 if isinstance(table, exp.Identifier): 3580 table.args["this"] += "*" 3581 else: 3582 table = exp.Identifier(this="*") 3583 3584 # We bubble up comments from the Identifier to the Table 3585 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3586 3587 if is_db_reference: 3588 catalog = db 3589 db = table 3590 table = None 3591 3592 if not table and not is_db_reference: 3593 self.raise_error(f"Expected table name but got {self._curr}") 3594 if not db and is_db_reference: 3595 self.raise_error(f"Expected database name but got {self._curr}") 3596 3597 table = self.expression( 3598 exp.Table, 3599 comments=comments, 3600 this=table, 3601 db=db, 3602 catalog=catalog, 3603 ) 3604 3605 changes = self._parse_changes() 3606 if changes: 3607 table.set("changes", changes) 3608 3609 at_before = self._parse_historical_data() 3610 if at_before: 3611 table.set("when", at_before) 3612 3613 pivots = self._parse_pivots() 3614 if pivots: 3615 table.set("pivots", pivots) 3616 3617 return table 3618 3619 def _parse_table( 3620 self, 3621 schema: bool = False, 3622 joins: bool = False, 3623 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3624 parse_bracket: bool = False, 3625 is_db_reference: bool = False, 3626 parse_partition: bool = False, 3627 ) -> t.Optional[exp.Expression]: 3628 lateral = self._parse_lateral() 3629 if lateral: 3630 return lateral 3631 3632 unnest = self._parse_unnest() 3633 if unnest: 3634 return unnest 3635 3636 values = self._parse_derived_table_values() 3637 if values: 3638 return values 3639 3640 subquery = self._parse_select(table=True) 3641 if subquery: 3642 if not subquery.args.get("pivots"): 3643 subquery.set("pivots", self._parse_pivots()) 3644 return subquery 3645 3646 bracket = parse_bracket and self._parse_bracket(None) 3647 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3648 3649 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3650 self._parse_table 3651 ) 3652 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3653 3654 only = self._match(TokenType.ONLY) 3655 3656 this = t.cast( 3657 exp.Expression, 3658 bracket 3659 or rows_from 3660 or self._parse_bracket( 3661 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3662 ), 3663 ) 3664 3665 if only: 3666 this.set("only", only) 3667 3668 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3669 self._match_text_seq("*") 3670 3671 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3672 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3673 this.set("partition", self._parse_partition()) 3674 3675 if schema: 3676 return self._parse_schema(this=this) 3677 3678 version = self._parse_version() 3679 3680 if version: 3681 this.set("version", version) 3682 3683 if self.dialect.ALIAS_POST_TABLESAMPLE: 3684 this.set("sample", self._parse_table_sample()) 3685 3686 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3687 if alias: 3688 this.set("alias", alias) 3689 3690 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3691 return self.expression( 3692 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3693 ) 3694 3695 this.set("hints", self._parse_table_hints()) 3696 3697 if not this.args.get("pivots"): 3698 this.set("pivots", self._parse_pivots()) 3699 3700 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3701 this.set("sample", self._parse_table_sample()) 3702 3703 if joins: 3704 for join in self._parse_joins(): 3705 this.append("joins", join) 3706 3707 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3708 this.set("ordinality", True) 3709 this.set("alias", self._parse_table_alias()) 3710 3711 return this 3712 3713 def _parse_version(self) -> t.Optional[exp.Version]: 3714 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3715 this = "TIMESTAMP" 3716 elif self._match(TokenType.VERSION_SNAPSHOT): 3717 this = "VERSION" 3718 else: 3719 return None 3720 3721 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3722 kind = self._prev.text.upper() 3723 start = self._parse_bitwise() 3724 self._match_texts(("TO", "AND")) 3725 end = self._parse_bitwise() 3726 expression: t.Optional[exp.Expression] = self.expression( 3727 exp.Tuple, expressions=[start, end] 3728 ) 3729 elif self._match_text_seq("CONTAINED", "IN"): 3730 kind = "CONTAINED IN" 3731 expression = self.expression( 3732 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3733 ) 3734 elif self._match(TokenType.ALL): 3735 kind = "ALL" 3736 expression = None 3737 else: 3738 self._match_text_seq("AS", "OF") 3739 kind = "AS OF" 3740 expression = self._parse_type() 3741 3742 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3743 3744 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3745 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3746 index = self._index 3747 historical_data = None 3748 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3749 this = self._prev.text.upper() 3750 kind = ( 3751 self._match(TokenType.L_PAREN) 3752 and self._match_texts(self.HISTORICAL_DATA_KIND) 3753 and self._prev.text.upper() 3754 ) 3755 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3756 3757 if expression: 3758 self._match_r_paren() 3759 historical_data = self.expression( 3760 exp.HistoricalData, this=this, kind=kind, expression=expression 3761 ) 3762 else: 3763 self._retreat(index) 3764 3765 return historical_data 3766 3767 def _parse_changes(self) -> t.Optional[exp.Changes]: 3768 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3769 return None 3770 3771 information = self._parse_var(any_token=True) 3772 self._match_r_paren() 3773 3774 return self.expression( 3775 exp.Changes, 3776 information=information, 3777 at_before=self._parse_historical_data(), 3778 end=self._parse_historical_data(), 3779 ) 3780 3781 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3782 if not self._match(TokenType.UNNEST): 3783 return None 3784 3785 expressions = self._parse_wrapped_csv(self._parse_equality) 3786 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3787 3788 alias = self._parse_table_alias() if with_alias else None 3789 3790 if alias: 3791 if self.dialect.UNNEST_COLUMN_ONLY: 3792 if alias.args.get("columns"): 3793 self.raise_error("Unexpected extra column alias in unnest.") 3794 3795 alias.set("columns", [alias.this]) 3796 alias.set("this", None) 3797 3798 columns = alias.args.get("columns") or [] 3799 if offset and len(expressions) < len(columns): 3800 offset = columns.pop() 3801 3802 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3803 self._match(TokenType.ALIAS) 3804 offset = self._parse_id_var( 3805 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3806 ) or exp.to_identifier("offset") 3807 3808 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3809 3810 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3811 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3812 if not is_derived and not ( 3813 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3814 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3815 ): 3816 return None 3817 3818 expressions = self._parse_csv(self._parse_value) 3819 alias = self._parse_table_alias() 3820 3821 if is_derived: 3822 self._match_r_paren() 3823 3824 return self.expression( 3825 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3826 ) 3827 3828 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3829 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3830 as_modifier and self._match_text_seq("USING", "SAMPLE") 3831 ): 3832 return None 3833 3834 bucket_numerator = None 3835 bucket_denominator = None 3836 bucket_field = None 3837 percent = None 3838 size = None 3839 seed = None 3840 3841 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3842 matched_l_paren = self._match(TokenType.L_PAREN) 3843 3844 if self.TABLESAMPLE_CSV: 3845 num = None 3846 expressions = self._parse_csv(self._parse_primary) 3847 else: 3848 expressions = None 3849 num = ( 3850 self._parse_factor() 3851 if self._match(TokenType.NUMBER, advance=False) 3852 else self._parse_primary() or self._parse_placeholder() 3853 ) 3854 3855 if self._match_text_seq("BUCKET"): 3856 bucket_numerator = self._parse_number() 3857 self._match_text_seq("OUT", "OF") 3858 bucket_denominator = bucket_denominator = self._parse_number() 3859 self._match(TokenType.ON) 3860 bucket_field = self._parse_field() 3861 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3862 percent = num 3863 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3864 size = num 3865 else: 3866 percent = num 3867 3868 if matched_l_paren: 3869 self._match_r_paren() 3870 3871 if self._match(TokenType.L_PAREN): 3872 method = self._parse_var(upper=True) 3873 seed = self._match(TokenType.COMMA) and self._parse_number() 3874 self._match_r_paren() 3875 elif self._match_texts(("SEED", "REPEATABLE")): 3876 seed = self._parse_wrapped(self._parse_number) 3877 3878 if not method and self.DEFAULT_SAMPLING_METHOD: 3879 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3880 3881 return self.expression( 3882 exp.TableSample, 3883 expressions=expressions, 3884 method=method, 3885 bucket_numerator=bucket_numerator, 3886 bucket_denominator=bucket_denominator, 3887 bucket_field=bucket_field, 3888 percent=percent, 3889 size=size, 3890 seed=seed, 3891 ) 3892 3893 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3894 return list(iter(self._parse_pivot, None)) or None 3895 3896 def _parse_joins(self) -> t.Iterator[exp.Join]: 3897 return iter(self._parse_join, None) 3898 3899 # https://duckdb.org/docs/sql/statements/pivot 3900 def _parse_simplified_pivot(self) -> exp.Pivot: 3901 def _parse_on() -> t.Optional[exp.Expression]: 3902 this = self._parse_bitwise() 3903 return self._parse_in(this) if self._match(TokenType.IN) else this 3904 3905 this = self._parse_table() 3906 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3907 using = self._match(TokenType.USING) and self._parse_csv( 3908 lambda: self._parse_alias(self._parse_function()) 3909 ) 3910 group = self._parse_group() 3911 return self.expression( 3912 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3913 ) 3914 3915 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3916 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3917 this = self._parse_select_or_expression() 3918 3919 self._match(TokenType.ALIAS) 3920 alias = self._parse_bitwise() 3921 if alias: 3922 if isinstance(alias, exp.Column) and not alias.db: 3923 alias = alias.this 3924 return self.expression(exp.PivotAlias, this=this, alias=alias) 3925 3926 return this 3927 3928 value = self._parse_column() 3929 3930 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3931 self.raise_error("Expecting IN (") 3932 3933 if self._match(TokenType.ANY): 3934 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3935 else: 3936 exprs = self._parse_csv(_parse_aliased_expression) 3937 3938 self._match_r_paren() 3939 return self.expression(exp.In, this=value, expressions=exprs) 3940 3941 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3942 index = self._index 3943 include_nulls = None 3944 3945 if self._match(TokenType.PIVOT): 3946 unpivot = False 3947 elif self._match(TokenType.UNPIVOT): 3948 unpivot = True 3949 3950 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3951 if self._match_text_seq("INCLUDE", "NULLS"): 3952 include_nulls = True 3953 elif self._match_text_seq("EXCLUDE", "NULLS"): 3954 include_nulls = False 3955 else: 3956 return None 3957 3958 expressions = [] 3959 3960 if not self._match(TokenType.L_PAREN): 3961 self._retreat(index) 3962 return None 3963 3964 if unpivot: 3965 expressions = self._parse_csv(self._parse_column) 3966 else: 3967 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3968 3969 if not expressions: 3970 self.raise_error("Failed to parse PIVOT's aggregation list") 3971 3972 if not self._match(TokenType.FOR): 3973 self.raise_error("Expecting FOR") 3974 3975 field = self._parse_pivot_in() 3976 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3977 self._parse_bitwise 3978 ) 3979 3980 self._match_r_paren() 3981 3982 pivot = self.expression( 3983 exp.Pivot, 3984 expressions=expressions, 3985 field=field, 3986 unpivot=unpivot, 3987 include_nulls=include_nulls, 3988 default_on_null=default_on_null, 3989 ) 3990 3991 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3992 pivot.set("alias", self._parse_table_alias()) 3993 3994 if not unpivot: 3995 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3996 3997 columns: t.List[exp.Expression] = [] 3998 for fld in pivot.args["field"].expressions: 3999 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4000 for name in names: 4001 if self.PREFIXED_PIVOT_COLUMNS: 4002 name = f"{name}_{field_name}" if name else field_name 4003 else: 4004 name = f"{field_name}_{name}" if name else field_name 4005 4006 columns.append(exp.to_identifier(name)) 4007 4008 pivot.set("columns", columns) 4009 4010 return pivot 4011 4012 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4013 return [agg.alias for agg in aggregations] 4014 4015 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4016 if not skip_where_token and not self._match(TokenType.PREWHERE): 4017 return None 4018 4019 return self.expression( 4020 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4021 ) 4022 4023 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4024 if not skip_where_token and not self._match(TokenType.WHERE): 4025 return None 4026 4027 return self.expression( 4028 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4029 ) 4030 4031 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4032 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4033 return None 4034 4035 elements: t.Dict[str, t.Any] = defaultdict(list) 4036 4037 if self._match(TokenType.ALL): 4038 elements["all"] = True 4039 elif self._match(TokenType.DISTINCT): 4040 elements["all"] = False 4041 4042 while True: 4043 index = self._index 4044 4045 elements["expressions"].extend( 4046 self._parse_csv( 4047 lambda: None 4048 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4049 else self._parse_assignment() 4050 ) 4051 ) 4052 4053 before_with_index = self._index 4054 with_prefix = self._match(TokenType.WITH) 4055 4056 if self._match(TokenType.ROLLUP): 4057 elements["rollup"].append( 4058 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4059 ) 4060 elif self._match(TokenType.CUBE): 4061 elements["cube"].append( 4062 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4063 ) 4064 elif self._match(TokenType.GROUPING_SETS): 4065 elements["grouping_sets"].append( 4066 self.expression( 4067 exp.GroupingSets, 4068 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4069 ) 4070 ) 4071 elif self._match_text_seq("TOTALS"): 4072 elements["totals"] = True # type: ignore 4073 4074 if before_with_index <= self._index <= before_with_index + 1: 4075 self._retreat(before_with_index) 4076 break 4077 4078 if index == self._index: 4079 break 4080 4081 return self.expression(exp.Group, **elements) # type: ignore 4082 4083 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4084 return self.expression( 4085 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4086 ) 4087 4088 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4089 if self._match(TokenType.L_PAREN): 4090 grouping_set = self._parse_csv(self._parse_column) 4091 self._match_r_paren() 4092 return self.expression(exp.Tuple, expressions=grouping_set) 4093 4094 return self._parse_column() 4095 4096 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4097 if not skip_having_token and not self._match(TokenType.HAVING): 4098 return None 4099 return self.expression(exp.Having, this=self._parse_assignment()) 4100 4101 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4102 if not self._match(TokenType.QUALIFY): 4103 return None 4104 return self.expression(exp.Qualify, this=self._parse_assignment()) 4105 4106 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4107 if skip_start_token: 4108 start = None 4109 elif self._match(TokenType.START_WITH): 4110 start = self._parse_assignment() 4111 else: 4112 return None 4113 4114 self._match(TokenType.CONNECT_BY) 4115 nocycle = self._match_text_seq("NOCYCLE") 4116 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4117 exp.Prior, this=self._parse_bitwise() 4118 ) 4119 connect = self._parse_assignment() 4120 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4121 4122 if not start and self._match(TokenType.START_WITH): 4123 start = self._parse_assignment() 4124 4125 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4126 4127 def _parse_name_as_expression(self) -> exp.Alias: 4128 return self.expression( 4129 exp.Alias, 4130 alias=self._parse_id_var(any_token=True), 4131 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4132 ) 4133 4134 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4135 if self._match_text_seq("INTERPOLATE"): 4136 return self._parse_wrapped_csv(self._parse_name_as_expression) 4137 return None 4138 4139 def _parse_order( 4140 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4141 ) -> t.Optional[exp.Expression]: 4142 siblings = None 4143 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4144 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4145 return this 4146 4147 siblings = True 4148 4149 return self.expression( 4150 exp.Order, 4151 this=this, 4152 expressions=self._parse_csv(self._parse_ordered), 4153 siblings=siblings, 4154 ) 4155 4156 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4157 if not self._match(token): 4158 return None 4159 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4160 4161 def _parse_ordered( 4162 self, parse_method: t.Optional[t.Callable] = None 4163 ) -> t.Optional[exp.Ordered]: 4164 this = parse_method() if parse_method else self._parse_assignment() 4165 if not this: 4166 return None 4167 4168 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4169 this = exp.var("ALL") 4170 4171 asc = self._match(TokenType.ASC) 4172 desc = self._match(TokenType.DESC) or (asc and False) 4173 4174 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4175 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4176 4177 nulls_first = is_nulls_first or False 4178 explicitly_null_ordered = is_nulls_first or is_nulls_last 4179 4180 if ( 4181 not explicitly_null_ordered 4182 and ( 4183 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4184 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4185 ) 4186 and self.dialect.NULL_ORDERING != "nulls_are_last" 4187 ): 4188 nulls_first = True 4189 4190 if self._match_text_seq("WITH", "FILL"): 4191 with_fill = self.expression( 4192 exp.WithFill, 4193 **{ # type: ignore 4194 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4195 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4196 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4197 "interpolate": self._parse_interpolate(), 4198 }, 4199 ) 4200 else: 4201 with_fill = None 4202 4203 return self.expression( 4204 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4205 ) 4206 4207 def _parse_limit( 4208 self, 4209 this: t.Optional[exp.Expression] = None, 4210 top: bool = False, 4211 skip_limit_token: bool = False, 4212 ) -> t.Optional[exp.Expression]: 4213 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4214 comments = self._prev_comments 4215 if top: 4216 limit_paren = self._match(TokenType.L_PAREN) 4217 expression = self._parse_term() if limit_paren else self._parse_number() 4218 4219 if limit_paren: 4220 self._match_r_paren() 4221 else: 4222 expression = self._parse_term() 4223 4224 if self._match(TokenType.COMMA): 4225 offset = expression 4226 expression = self._parse_term() 4227 else: 4228 offset = None 4229 4230 limit_exp = self.expression( 4231 exp.Limit, 4232 this=this, 4233 expression=expression, 4234 offset=offset, 4235 comments=comments, 4236 expressions=self._parse_limit_by(), 4237 ) 4238 4239 return limit_exp 4240 4241 if self._match(TokenType.FETCH): 4242 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4243 direction = self._prev.text.upper() if direction else "FIRST" 4244 4245 count = self._parse_field(tokens=self.FETCH_TOKENS) 4246 percent = self._match(TokenType.PERCENT) 4247 4248 self._match_set((TokenType.ROW, TokenType.ROWS)) 4249 4250 only = self._match_text_seq("ONLY") 4251 with_ties = self._match_text_seq("WITH", "TIES") 4252 4253 if only and with_ties: 4254 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4255 4256 return self.expression( 4257 exp.Fetch, 4258 direction=direction, 4259 count=count, 4260 percent=percent, 4261 with_ties=with_ties, 4262 ) 4263 4264 return this 4265 4266 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4267 if not self._match(TokenType.OFFSET): 4268 return this 4269 4270 count = self._parse_term() 4271 self._match_set((TokenType.ROW, TokenType.ROWS)) 4272 4273 return self.expression( 4274 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4275 ) 4276 4277 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4278 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4279 4280 def _parse_locks(self) -> t.List[exp.Lock]: 4281 locks = [] 4282 while True: 4283 if self._match_text_seq("FOR", "UPDATE"): 4284 update = True 4285 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4286 "LOCK", "IN", "SHARE", "MODE" 4287 ): 4288 update = False 4289 else: 4290 break 4291 4292 expressions = None 4293 if self._match_text_seq("OF"): 4294 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4295 4296 wait: t.Optional[bool | exp.Expression] = None 4297 if self._match_text_seq("NOWAIT"): 4298 wait = True 4299 elif self._match_text_seq("WAIT"): 4300 wait = self._parse_primary() 4301 elif self._match_text_seq("SKIP", "LOCKED"): 4302 wait = False 4303 4304 locks.append( 4305 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4306 ) 4307 4308 return locks 4309 4310 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4311 while this and self._match_set(self.SET_OPERATIONS): 4312 token_type = self._prev.token_type 4313 4314 if token_type == TokenType.UNION: 4315 operation: t.Type[exp.SetOperation] = exp.Union 4316 elif token_type == TokenType.EXCEPT: 4317 operation = exp.Except 4318 else: 4319 operation = exp.Intersect 4320 4321 comments = self._prev.comments 4322 4323 if self._match(TokenType.DISTINCT): 4324 distinct: t.Optional[bool] = True 4325 elif self._match(TokenType.ALL): 4326 distinct = False 4327 else: 4328 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4329 if distinct is None: 4330 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4331 4332 by_name = self._match_text_seq("BY", "NAME") 4333 expression = self._parse_select(nested=True, parse_set_operation=False) 4334 4335 this = self.expression( 4336 operation, 4337 comments=comments, 4338 this=this, 4339 distinct=distinct, 4340 by_name=by_name, 4341 expression=expression, 4342 ) 4343 4344 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4345 expression = this.expression 4346 4347 if expression: 4348 for arg in self.SET_OP_MODIFIERS: 4349 expr = expression.args.get(arg) 4350 if expr: 4351 this.set(arg, expr.pop()) 4352 4353 return this 4354 4355 def _parse_expression(self) -> t.Optional[exp.Expression]: 4356 return self._parse_alias(self._parse_assignment()) 4357 4358 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4359 this = self._parse_disjunction() 4360 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4361 # This allows us to parse <non-identifier token> := <expr> 4362 this = exp.column( 4363 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4364 ) 4365 4366 while self._match_set(self.ASSIGNMENT): 4367 if isinstance(this, exp.Column) and len(this.parts) == 1: 4368 this = this.this 4369 4370 this = self.expression( 4371 self.ASSIGNMENT[self._prev.token_type], 4372 this=this, 4373 comments=self._prev_comments, 4374 expression=self._parse_assignment(), 4375 ) 4376 4377 return this 4378 4379 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4380 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4381 4382 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4383 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4384 4385 def _parse_equality(self) -> t.Optional[exp.Expression]: 4386 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4387 4388 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4389 return self._parse_tokens(self._parse_range, self.COMPARISON) 4390 4391 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4392 this = this or self._parse_bitwise() 4393 negate = self._match(TokenType.NOT) 4394 4395 if self._match_set(self.RANGE_PARSERS): 4396 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4397 if not expression: 4398 return this 4399 4400 this = expression 4401 elif self._match(TokenType.ISNULL): 4402 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4403 4404 # Postgres supports ISNULL and NOTNULL for conditions. 4405 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4406 if self._match(TokenType.NOTNULL): 4407 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4408 this = self.expression(exp.Not, this=this) 4409 4410 if negate: 4411 this = self._negate_range(this) 4412 4413 if self._match(TokenType.IS): 4414 this = self._parse_is(this) 4415 4416 return this 4417 4418 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4419 if not this: 4420 return this 4421 4422 return self.expression(exp.Not, this=this) 4423 4424 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4425 index = self._index - 1 4426 negate = self._match(TokenType.NOT) 4427 4428 if self._match_text_seq("DISTINCT", "FROM"): 4429 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4430 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4431 4432 if self._match(TokenType.JSON): 4433 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4434 4435 if self._match_text_seq("WITH"): 4436 _with = True 4437 elif self._match_text_seq("WITHOUT"): 4438 _with = False 4439 else: 4440 _with = None 4441 4442 unique = self._match(TokenType.UNIQUE) 4443 self._match_text_seq("KEYS") 4444 expression: t.Optional[exp.Expression] = self.expression( 4445 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4446 ) 4447 else: 4448 expression = self._parse_primary() or self._parse_null() 4449 if not expression: 4450 self._retreat(index) 4451 return None 4452 4453 this = self.expression(exp.Is, this=this, expression=expression) 4454 return self.expression(exp.Not, this=this) if negate else this 4455 4456 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4457 unnest = self._parse_unnest(with_alias=False) 4458 if unnest: 4459 this = self.expression(exp.In, this=this, unnest=unnest) 4460 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4461 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4462 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4463 4464 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4465 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4466 else: 4467 this = self.expression(exp.In, this=this, expressions=expressions) 4468 4469 if matched_l_paren: 4470 self._match_r_paren(this) 4471 elif not self._match(TokenType.R_BRACKET, expression=this): 4472 self.raise_error("Expecting ]") 4473 else: 4474 this = self.expression(exp.In, this=this, field=self._parse_field()) 4475 4476 return this 4477 4478 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4479 low = self._parse_bitwise() 4480 self._match(TokenType.AND) 4481 high = self._parse_bitwise() 4482 return self.expression(exp.Between, this=this, low=low, high=high) 4483 4484 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4485 if not self._match(TokenType.ESCAPE): 4486 return this 4487 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4488 4489 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4490 index = self._index 4491 4492 if not self._match(TokenType.INTERVAL) and match_interval: 4493 return None 4494 4495 if self._match(TokenType.STRING, advance=False): 4496 this = self._parse_primary() 4497 else: 4498 this = self._parse_term() 4499 4500 if not this or ( 4501 isinstance(this, exp.Column) 4502 and not this.table 4503 and not this.this.quoted 4504 and this.name.upper() == "IS" 4505 ): 4506 self._retreat(index) 4507 return None 4508 4509 unit = self._parse_function() or ( 4510 not self._match(TokenType.ALIAS, advance=False) 4511 and self._parse_var(any_token=True, upper=True) 4512 ) 4513 4514 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4515 # each INTERVAL expression into this canonical form so it's easy to transpile 4516 if this and this.is_number: 4517 this = exp.Literal.string(this.to_py()) 4518 elif this and this.is_string: 4519 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4520 if len(parts) == 1: 4521 if unit: 4522 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4523 self._retreat(self._index - 1) 4524 4525 this = exp.Literal.string(parts[0][0]) 4526 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4527 4528 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4529 unit = self.expression( 4530 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4531 ) 4532 4533 interval = self.expression(exp.Interval, this=this, unit=unit) 4534 4535 index = self._index 4536 self._match(TokenType.PLUS) 4537 4538 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4539 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4540 return self.expression( 4541 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4542 ) 4543 4544 self._retreat(index) 4545 return interval 4546 4547 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4548 this = self._parse_term() 4549 4550 while True: 4551 if self._match_set(self.BITWISE): 4552 this = self.expression( 4553 self.BITWISE[self._prev.token_type], 4554 this=this, 4555 expression=self._parse_term(), 4556 ) 4557 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4558 this = self.expression( 4559 exp.DPipe, 4560 this=this, 4561 expression=self._parse_term(), 4562 safe=not self.dialect.STRICT_STRING_CONCAT, 4563 ) 4564 elif self._match(TokenType.DQMARK): 4565 this = self.expression( 4566 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4567 ) 4568 elif self._match_pair(TokenType.LT, TokenType.LT): 4569 this = self.expression( 4570 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4571 ) 4572 elif self._match_pair(TokenType.GT, TokenType.GT): 4573 this = self.expression( 4574 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4575 ) 4576 else: 4577 break 4578 4579 return this 4580 4581 def _parse_term(self) -> t.Optional[exp.Expression]: 4582 this = self._parse_factor() 4583 4584 while self._match_set(self.TERM): 4585 klass = self.TERM[self._prev.token_type] 4586 comments = self._prev_comments 4587 expression = self._parse_factor() 4588 4589 this = self.expression(klass, this=this, comments=comments, expression=expression) 4590 4591 if isinstance(this, exp.Collate): 4592 expr = this.expression 4593 4594 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4595 # fallback to Identifier / Var 4596 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4597 ident = expr.this 4598 if isinstance(ident, exp.Identifier): 4599 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4600 4601 return this 4602 4603 def _parse_factor(self) -> t.Optional[exp.Expression]: 4604 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4605 this = parse_method() 4606 4607 while self._match_set(self.FACTOR): 4608 klass = self.FACTOR[self._prev.token_type] 4609 comments = self._prev_comments 4610 expression = parse_method() 4611 4612 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4613 self._retreat(self._index - 1) 4614 return this 4615 4616 this = self.expression(klass, this=this, comments=comments, expression=expression) 4617 4618 if isinstance(this, exp.Div): 4619 this.args["typed"] = self.dialect.TYPED_DIVISION 4620 this.args["safe"] = self.dialect.SAFE_DIVISION 4621 4622 return this 4623 4624 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4625 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4626 4627 def _parse_unary(self) -> t.Optional[exp.Expression]: 4628 if self._match_set(self.UNARY_PARSERS): 4629 return self.UNARY_PARSERS[self._prev.token_type](self) 4630 return self._parse_at_time_zone(self._parse_type()) 4631 4632 def _parse_type( 4633 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4634 ) -> t.Optional[exp.Expression]: 4635 interval = parse_interval and self._parse_interval() 4636 if interval: 4637 return interval 4638 4639 index = self._index 4640 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4641 4642 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4643 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4644 if isinstance(data_type, exp.Cast): 4645 # This constructor can contain ops directly after it, for instance struct unnesting: 4646 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4647 return self._parse_column_ops(data_type) 4648 4649 if data_type: 4650 index2 = self._index 4651 this = self._parse_primary() 4652 4653 if isinstance(this, exp.Literal): 4654 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4655 if parser: 4656 return parser(self, this, data_type) 4657 4658 return self.expression(exp.Cast, this=this, to=data_type) 4659 4660 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4661 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4662 # 4663 # If the index difference here is greater than 1, that means the parser itself must have 4664 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4665 # 4666 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4667 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4668 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4669 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4670 # 4671 # In these cases, we don't really want to return the converted type, but instead retreat 4672 # and try to parse a Column or Identifier in the section below. 4673 if data_type.expressions and index2 - index > 1: 4674 self._retreat(index2) 4675 return self._parse_column_ops(data_type) 4676 4677 self._retreat(index) 4678 4679 if fallback_to_identifier: 4680 return self._parse_id_var() 4681 4682 this = self._parse_column() 4683 return this and self._parse_column_ops(this) 4684 4685 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4686 this = self._parse_type() 4687 if not this: 4688 return None 4689 4690 if isinstance(this, exp.Column) and not this.table: 4691 this = exp.var(this.name.upper()) 4692 4693 return self.expression( 4694 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4695 ) 4696 4697 def _parse_types( 4698 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4699 ) -> t.Optional[exp.Expression]: 4700 index = self._index 4701 4702 this: t.Optional[exp.Expression] = None 4703 prefix = self._match_text_seq("SYSUDTLIB", ".") 4704 4705 if not self._match_set(self.TYPE_TOKENS): 4706 identifier = allow_identifiers and self._parse_id_var( 4707 any_token=False, tokens=(TokenType.VAR,) 4708 ) 4709 if isinstance(identifier, exp.Identifier): 4710 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4711 4712 if len(tokens) != 1: 4713 self.raise_error("Unexpected identifier", self._prev) 4714 4715 if tokens[0].token_type in self.TYPE_TOKENS: 4716 self._prev = tokens[0] 4717 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4718 type_name = identifier.name 4719 4720 while self._match(TokenType.DOT): 4721 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4722 4723 this = exp.DataType.build(type_name, udt=True) 4724 else: 4725 self._retreat(self._index - 1) 4726 return None 4727 else: 4728 return None 4729 4730 type_token = self._prev.token_type 4731 4732 if type_token == TokenType.PSEUDO_TYPE: 4733 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4734 4735 if type_token == TokenType.OBJECT_IDENTIFIER: 4736 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4737 4738 # https://materialize.com/docs/sql/types/map/ 4739 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4740 key_type = self._parse_types( 4741 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4742 ) 4743 if not self._match(TokenType.FARROW): 4744 self._retreat(index) 4745 return None 4746 4747 value_type = self._parse_types( 4748 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4749 ) 4750 if not self._match(TokenType.R_BRACKET): 4751 self._retreat(index) 4752 return None 4753 4754 return exp.DataType( 4755 this=exp.DataType.Type.MAP, 4756 expressions=[key_type, value_type], 4757 nested=True, 4758 prefix=prefix, 4759 ) 4760 4761 nested = type_token in self.NESTED_TYPE_TOKENS 4762 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4763 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4764 expressions = None 4765 maybe_func = False 4766 4767 if self._match(TokenType.L_PAREN): 4768 if is_struct: 4769 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4770 elif nested: 4771 expressions = self._parse_csv( 4772 lambda: self._parse_types( 4773 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4774 ) 4775 ) 4776 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4777 this = expressions[0] 4778 this.set("nullable", True) 4779 self._match_r_paren() 4780 return this 4781 elif type_token in self.ENUM_TYPE_TOKENS: 4782 expressions = self._parse_csv(self._parse_equality) 4783 elif is_aggregate: 4784 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4785 any_token=False, tokens=(TokenType.VAR,) 4786 ) 4787 if not func_or_ident or not self._match(TokenType.COMMA): 4788 return None 4789 expressions = self._parse_csv( 4790 lambda: self._parse_types( 4791 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4792 ) 4793 ) 4794 expressions.insert(0, func_or_ident) 4795 else: 4796 expressions = self._parse_csv(self._parse_type_size) 4797 4798 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4799 if type_token == TokenType.VECTOR and len(expressions) == 2: 4800 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4801 4802 if not expressions or not self._match(TokenType.R_PAREN): 4803 self._retreat(index) 4804 return None 4805 4806 maybe_func = True 4807 4808 values: t.Optional[t.List[exp.Expression]] = None 4809 4810 if nested and self._match(TokenType.LT): 4811 if is_struct: 4812 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4813 else: 4814 expressions = self._parse_csv( 4815 lambda: self._parse_types( 4816 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4817 ) 4818 ) 4819 4820 if not self._match(TokenType.GT): 4821 self.raise_error("Expecting >") 4822 4823 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4824 values = self._parse_csv(self._parse_assignment) 4825 if not values and is_struct: 4826 values = None 4827 self._retreat(self._index - 1) 4828 else: 4829 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4830 4831 if type_token in self.TIMESTAMPS: 4832 if self._match_text_seq("WITH", "TIME", "ZONE"): 4833 maybe_func = False 4834 tz_type = ( 4835 exp.DataType.Type.TIMETZ 4836 if type_token in self.TIMES 4837 else exp.DataType.Type.TIMESTAMPTZ 4838 ) 4839 this = exp.DataType(this=tz_type, expressions=expressions) 4840 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4841 maybe_func = False 4842 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4843 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4844 maybe_func = False 4845 elif type_token == TokenType.INTERVAL: 4846 unit = self._parse_var(upper=True) 4847 if unit: 4848 if self._match_text_seq("TO"): 4849 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4850 4851 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4852 else: 4853 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4854 4855 if maybe_func and check_func: 4856 index2 = self._index 4857 peek = self._parse_string() 4858 4859 if not peek: 4860 self._retreat(index) 4861 return None 4862 4863 self._retreat(index2) 4864 4865 if not this: 4866 if self._match_text_seq("UNSIGNED"): 4867 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4868 if not unsigned_type_token: 4869 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4870 4871 type_token = unsigned_type_token or type_token 4872 4873 this = exp.DataType( 4874 this=exp.DataType.Type[type_token.value], 4875 expressions=expressions, 4876 nested=nested, 4877 prefix=prefix, 4878 ) 4879 4880 # Empty arrays/structs are allowed 4881 if values is not None: 4882 cls = exp.Struct if is_struct else exp.Array 4883 this = exp.cast(cls(expressions=values), this, copy=False) 4884 4885 elif expressions: 4886 this.set("expressions", expressions) 4887 4888 # https://materialize.com/docs/sql/types/list/#type-name 4889 while self._match(TokenType.LIST): 4890 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4891 4892 index = self._index 4893 4894 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4895 matched_array = self._match(TokenType.ARRAY) 4896 4897 while self._curr: 4898 datatype_token = self._prev.token_type 4899 matched_l_bracket = self._match(TokenType.L_BRACKET) 4900 if not matched_l_bracket and not matched_array: 4901 break 4902 4903 matched_array = False 4904 values = self._parse_csv(self._parse_assignment) or None 4905 if ( 4906 values 4907 and not schema 4908 and ( 4909 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4910 ) 4911 ): 4912 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4913 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4914 self._retreat(index) 4915 break 4916 4917 this = exp.DataType( 4918 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4919 ) 4920 self._match(TokenType.R_BRACKET) 4921 4922 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4923 converter = self.TYPE_CONVERTERS.get(this.this) 4924 if converter: 4925 this = converter(t.cast(exp.DataType, this)) 4926 4927 return this 4928 4929 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4930 index = self._index 4931 4932 if ( 4933 self._curr 4934 and self._next 4935 and self._curr.token_type in self.TYPE_TOKENS 4936 and self._next.token_type in self.TYPE_TOKENS 4937 ): 4938 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4939 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4940 this = self._parse_id_var() 4941 else: 4942 this = ( 4943 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4944 or self._parse_id_var() 4945 ) 4946 4947 self._match(TokenType.COLON) 4948 4949 if ( 4950 type_required 4951 and not isinstance(this, exp.DataType) 4952 and not self._match_set(self.TYPE_TOKENS, advance=False) 4953 ): 4954 self._retreat(index) 4955 return self._parse_types() 4956 4957 return self._parse_column_def(this) 4958 4959 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4960 if not self._match_text_seq("AT", "TIME", "ZONE"): 4961 return this 4962 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4963 4964 def _parse_column(self) -> t.Optional[exp.Expression]: 4965 this = self._parse_column_reference() 4966 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4967 4968 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4969 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4970 4971 return column 4972 4973 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4974 this = self._parse_field() 4975 if ( 4976 not this 4977 and self._match(TokenType.VALUES, advance=False) 4978 and self.VALUES_FOLLOWED_BY_PAREN 4979 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4980 ): 4981 this = self._parse_id_var() 4982 4983 if isinstance(this, exp.Identifier): 4984 # We bubble up comments from the Identifier to the Column 4985 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4986 4987 return this 4988 4989 def _parse_colon_as_variant_extract( 4990 self, this: t.Optional[exp.Expression] 4991 ) -> t.Optional[exp.Expression]: 4992 casts = [] 4993 json_path = [] 4994 escape = None 4995 4996 while self._match(TokenType.COLON): 4997 start_index = self._index 4998 4999 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5000 path = self._parse_column_ops( 5001 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5002 ) 5003 5004 # The cast :: operator has a lower precedence than the extraction operator :, so 5005 # we rearrange the AST appropriately to avoid casting the JSON path 5006 while isinstance(path, exp.Cast): 5007 casts.append(path.to) 5008 path = path.this 5009 5010 if casts: 5011 dcolon_offset = next( 5012 i 5013 for i, t in enumerate(self._tokens[start_index:]) 5014 if t.token_type == TokenType.DCOLON 5015 ) 5016 end_token = self._tokens[start_index + dcolon_offset - 1] 5017 else: 5018 end_token = self._prev 5019 5020 if path: 5021 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5022 # it'll roundtrip to a string literal in GET_PATH 5023 if isinstance(path, exp.Identifier) and path.quoted: 5024 escape = True 5025 5026 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5027 5028 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5029 # Databricks transforms it back to the colon/dot notation 5030 if json_path: 5031 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5032 5033 if json_path_expr: 5034 json_path_expr.set("escape", escape) 5035 5036 this = self.expression( 5037 exp.JSONExtract, 5038 this=this, 5039 expression=json_path_expr, 5040 variant_extract=True, 5041 ) 5042 5043 while casts: 5044 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5045 5046 return this 5047 5048 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5049 return self._parse_types() 5050 5051 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5052 this = self._parse_bracket(this) 5053 5054 while self._match_set(self.COLUMN_OPERATORS): 5055 op_token = self._prev.token_type 5056 op = self.COLUMN_OPERATORS.get(op_token) 5057 5058 if op_token == TokenType.DCOLON: 5059 field = self._parse_dcolon() 5060 if not field: 5061 self.raise_error("Expected type") 5062 elif op and self._curr: 5063 field = self._parse_column_reference() or self._parse_bracket() 5064 else: 5065 field = self._parse_field(any_token=True, anonymous_func=True) 5066 5067 if isinstance(field, exp.Func) and this: 5068 # bigquery allows function calls like x.y.count(...) 5069 # SAFE.SUBSTR(...) 5070 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5071 this = exp.replace_tree( 5072 this, 5073 lambda n: ( 5074 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5075 if n.table 5076 else n.this 5077 ) 5078 if isinstance(n, exp.Column) 5079 else n, 5080 ) 5081 5082 if op: 5083 this = op(self, this, field) 5084 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5085 this = self.expression( 5086 exp.Column, 5087 comments=this.comments, 5088 this=field, 5089 table=this.this, 5090 db=this.args.get("table"), 5091 catalog=this.args.get("db"), 5092 ) 5093 else: 5094 this = self.expression(exp.Dot, this=this, expression=field) 5095 5096 this = self._parse_bracket(this) 5097 5098 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5099 5100 def _parse_primary(self) -> t.Optional[exp.Expression]: 5101 if self._match_set(self.PRIMARY_PARSERS): 5102 token_type = self._prev.token_type 5103 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5104 5105 if token_type == TokenType.STRING: 5106 expressions = [primary] 5107 while self._match(TokenType.STRING): 5108 expressions.append(exp.Literal.string(self._prev.text)) 5109 5110 if len(expressions) > 1: 5111 return self.expression(exp.Concat, expressions=expressions) 5112 5113 return primary 5114 5115 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5116 return exp.Literal.number(f"0.{self._prev.text}") 5117 5118 if self._match(TokenType.L_PAREN): 5119 comments = self._prev_comments 5120 query = self._parse_select() 5121 5122 if query: 5123 expressions = [query] 5124 else: 5125 expressions = self._parse_expressions() 5126 5127 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5128 5129 if not this and self._match(TokenType.R_PAREN, advance=False): 5130 this = self.expression(exp.Tuple) 5131 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5132 this = self._parse_subquery(this=this, parse_alias=False) 5133 elif isinstance(this, exp.Subquery): 5134 this = self._parse_subquery( 5135 this=self._parse_set_operations(this), parse_alias=False 5136 ) 5137 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5138 this = self.expression(exp.Tuple, expressions=expressions) 5139 else: 5140 this = self.expression(exp.Paren, this=this) 5141 5142 if this: 5143 this.add_comments(comments) 5144 5145 self._match_r_paren(expression=this) 5146 return this 5147 5148 return None 5149 5150 def _parse_field( 5151 self, 5152 any_token: bool = False, 5153 tokens: t.Optional[t.Collection[TokenType]] = None, 5154 anonymous_func: bool = False, 5155 ) -> t.Optional[exp.Expression]: 5156 if anonymous_func: 5157 field = ( 5158 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5159 or self._parse_primary() 5160 ) 5161 else: 5162 field = self._parse_primary() or self._parse_function( 5163 anonymous=anonymous_func, any_token=any_token 5164 ) 5165 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5166 5167 def _parse_function( 5168 self, 5169 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5170 anonymous: bool = False, 5171 optional_parens: bool = True, 5172 any_token: bool = False, 5173 ) -> t.Optional[exp.Expression]: 5174 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5175 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5176 fn_syntax = False 5177 if ( 5178 self._match(TokenType.L_BRACE, advance=False) 5179 and self._next 5180 and self._next.text.upper() == "FN" 5181 ): 5182 self._advance(2) 5183 fn_syntax = True 5184 5185 func = self._parse_function_call( 5186 functions=functions, 5187 anonymous=anonymous, 5188 optional_parens=optional_parens, 5189 any_token=any_token, 5190 ) 5191 5192 if fn_syntax: 5193 self._match(TokenType.R_BRACE) 5194 5195 return func 5196 5197 def _parse_function_call( 5198 self, 5199 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5200 anonymous: bool = False, 5201 optional_parens: bool = True, 5202 any_token: bool = False, 5203 ) -> t.Optional[exp.Expression]: 5204 if not self._curr: 5205 return None 5206 5207 comments = self._curr.comments 5208 token_type = self._curr.token_type 5209 this = self._curr.text 5210 upper = this.upper() 5211 5212 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5213 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5214 self._advance() 5215 return self._parse_window(parser(self)) 5216 5217 if not self._next or self._next.token_type != TokenType.L_PAREN: 5218 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5219 self._advance() 5220 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5221 5222 return None 5223 5224 if any_token: 5225 if token_type in self.RESERVED_TOKENS: 5226 return None 5227 elif token_type not in self.FUNC_TOKENS: 5228 return None 5229 5230 self._advance(2) 5231 5232 parser = self.FUNCTION_PARSERS.get(upper) 5233 if parser and not anonymous: 5234 this = parser(self) 5235 else: 5236 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5237 5238 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5239 this = self.expression( 5240 subquery_predicate, comments=comments, this=self._parse_select() 5241 ) 5242 self._match_r_paren() 5243 return this 5244 5245 if functions is None: 5246 functions = self.FUNCTIONS 5247 5248 function = functions.get(upper) 5249 5250 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5251 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5252 5253 if alias: 5254 args = self._kv_to_prop_eq(args) 5255 5256 if function and not anonymous: 5257 if "dialect" in function.__code__.co_varnames: 5258 func = function(args, dialect=self.dialect) 5259 else: 5260 func = function(args) 5261 5262 func = self.validate_expression(func, args) 5263 if not self.dialect.NORMALIZE_FUNCTIONS: 5264 func.meta["name"] = this 5265 5266 this = func 5267 else: 5268 if token_type == TokenType.IDENTIFIER: 5269 this = exp.Identifier(this=this, quoted=True) 5270 this = self.expression(exp.Anonymous, this=this, expressions=args) 5271 5272 if isinstance(this, exp.Expression): 5273 this.add_comments(comments) 5274 5275 self._match_r_paren(this) 5276 return self._parse_window(this) 5277 5278 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5279 return expression 5280 5281 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5282 transformed = [] 5283 5284 for index, e in enumerate(expressions): 5285 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5286 if isinstance(e, exp.Alias): 5287 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5288 5289 if not isinstance(e, exp.PropertyEQ): 5290 e = self.expression( 5291 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5292 ) 5293 5294 if isinstance(e.this, exp.Column): 5295 e.this.replace(e.this.this) 5296 else: 5297 e = self._to_prop_eq(e, index) 5298 5299 transformed.append(e) 5300 5301 return transformed 5302 5303 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5304 return self._parse_column_def(self._parse_id_var()) 5305 5306 def _parse_user_defined_function( 5307 self, kind: t.Optional[TokenType] = None 5308 ) -> t.Optional[exp.Expression]: 5309 this = self._parse_id_var() 5310 5311 while self._match(TokenType.DOT): 5312 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5313 5314 if not self._match(TokenType.L_PAREN): 5315 return this 5316 5317 expressions = self._parse_csv(self._parse_function_parameter) 5318 self._match_r_paren() 5319 return self.expression( 5320 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5321 ) 5322 5323 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5324 literal = self._parse_primary() 5325 if literal: 5326 return self.expression(exp.Introducer, this=token.text, expression=literal) 5327 5328 return self.expression(exp.Identifier, this=token.text) 5329 5330 def _parse_session_parameter(self) -> exp.SessionParameter: 5331 kind = None 5332 this = self._parse_id_var() or self._parse_primary() 5333 5334 if this and self._match(TokenType.DOT): 5335 kind = this.name 5336 this = self._parse_var() or self._parse_primary() 5337 5338 return self.expression(exp.SessionParameter, this=this, kind=kind) 5339 5340 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5341 return self._parse_id_var() 5342 5343 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5344 index = self._index 5345 5346 if self._match(TokenType.L_PAREN): 5347 expressions = t.cast( 5348 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5349 ) 5350 5351 if not self._match(TokenType.R_PAREN): 5352 self._retreat(index) 5353 else: 5354 expressions = [self._parse_lambda_arg()] 5355 5356 if self._match_set(self.LAMBDAS): 5357 return self.LAMBDAS[self._prev.token_type](self, expressions) 5358 5359 self._retreat(index) 5360 5361 this: t.Optional[exp.Expression] 5362 5363 if self._match(TokenType.DISTINCT): 5364 this = self.expression( 5365 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5366 ) 5367 else: 5368 this = self._parse_select_or_expression(alias=alias) 5369 5370 return self._parse_limit( 5371 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5372 ) 5373 5374 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5375 index = self._index 5376 if not self._match(TokenType.L_PAREN): 5377 return this 5378 5379 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5380 # expr can be of both types 5381 if self._match_set(self.SELECT_START_TOKENS): 5382 self._retreat(index) 5383 return this 5384 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5385 self._match_r_paren() 5386 return self.expression(exp.Schema, this=this, expressions=args) 5387 5388 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5389 return self._parse_column_def(self._parse_field(any_token=True)) 5390 5391 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5392 # column defs are not really columns, they're identifiers 5393 if isinstance(this, exp.Column): 5394 this = this.this 5395 5396 kind = self._parse_types(schema=True) 5397 5398 if self._match_text_seq("FOR", "ORDINALITY"): 5399 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5400 5401 constraints: t.List[exp.Expression] = [] 5402 5403 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5404 ("ALIAS", "MATERIALIZED") 5405 ): 5406 persisted = self._prev.text.upper() == "MATERIALIZED" 5407 constraint_kind = exp.ComputedColumnConstraint( 5408 this=self._parse_assignment(), 5409 persisted=persisted or self._match_text_seq("PERSISTED"), 5410 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5411 ) 5412 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5413 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5414 self._match(TokenType.ALIAS) 5415 constraints.append( 5416 self.expression( 5417 exp.ColumnConstraint, 5418 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5419 ) 5420 ) 5421 5422 while True: 5423 constraint = self._parse_column_constraint() 5424 if not constraint: 5425 break 5426 constraints.append(constraint) 5427 5428 if not kind and not constraints: 5429 return this 5430 5431 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5432 5433 def _parse_auto_increment( 5434 self, 5435 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5436 start = None 5437 increment = None 5438 5439 if self._match(TokenType.L_PAREN, advance=False): 5440 args = self._parse_wrapped_csv(self._parse_bitwise) 5441 start = seq_get(args, 0) 5442 increment = seq_get(args, 1) 5443 elif self._match_text_seq("START"): 5444 start = self._parse_bitwise() 5445 self._match_text_seq("INCREMENT") 5446 increment = self._parse_bitwise() 5447 5448 if start and increment: 5449 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5450 5451 return exp.AutoIncrementColumnConstraint() 5452 5453 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5454 if not self._match_text_seq("REFRESH"): 5455 self._retreat(self._index - 1) 5456 return None 5457 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5458 5459 def _parse_compress(self) -> exp.CompressColumnConstraint: 5460 if self._match(TokenType.L_PAREN, advance=False): 5461 return self.expression( 5462 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5463 ) 5464 5465 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5466 5467 def _parse_generated_as_identity( 5468 self, 5469 ) -> ( 5470 exp.GeneratedAsIdentityColumnConstraint 5471 | exp.ComputedColumnConstraint 5472 | exp.GeneratedAsRowColumnConstraint 5473 ): 5474 if self._match_text_seq("BY", "DEFAULT"): 5475 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5476 this = self.expression( 5477 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5478 ) 5479 else: 5480 self._match_text_seq("ALWAYS") 5481 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5482 5483 self._match(TokenType.ALIAS) 5484 5485 if self._match_text_seq("ROW"): 5486 start = self._match_text_seq("START") 5487 if not start: 5488 self._match(TokenType.END) 5489 hidden = self._match_text_seq("HIDDEN") 5490 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5491 5492 identity = self._match_text_seq("IDENTITY") 5493 5494 if self._match(TokenType.L_PAREN): 5495 if self._match(TokenType.START_WITH): 5496 this.set("start", self._parse_bitwise()) 5497 if self._match_text_seq("INCREMENT", "BY"): 5498 this.set("increment", self._parse_bitwise()) 5499 if self._match_text_seq("MINVALUE"): 5500 this.set("minvalue", self._parse_bitwise()) 5501 if self._match_text_seq("MAXVALUE"): 5502 this.set("maxvalue", self._parse_bitwise()) 5503 5504 if self._match_text_seq("CYCLE"): 5505 this.set("cycle", True) 5506 elif self._match_text_seq("NO", "CYCLE"): 5507 this.set("cycle", False) 5508 5509 if not identity: 5510 this.set("expression", self._parse_range()) 5511 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5512 args = self._parse_csv(self._parse_bitwise) 5513 this.set("start", seq_get(args, 0)) 5514 this.set("increment", seq_get(args, 1)) 5515 5516 self._match_r_paren() 5517 5518 return this 5519 5520 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5521 self._match_text_seq("LENGTH") 5522 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5523 5524 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5525 if self._match_text_seq("NULL"): 5526 return self.expression(exp.NotNullColumnConstraint) 5527 if self._match_text_seq("CASESPECIFIC"): 5528 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5529 if self._match_text_seq("FOR", "REPLICATION"): 5530 return self.expression(exp.NotForReplicationColumnConstraint) 5531 5532 # Unconsume the `NOT` token 5533 self._retreat(self._index - 1) 5534 return None 5535 5536 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5537 if self._match(TokenType.CONSTRAINT): 5538 this = self._parse_id_var() 5539 else: 5540 this = None 5541 5542 if self._match_texts(self.CONSTRAINT_PARSERS): 5543 return self.expression( 5544 exp.ColumnConstraint, 5545 this=this, 5546 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5547 ) 5548 5549 return this 5550 5551 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5552 if not self._match(TokenType.CONSTRAINT): 5553 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5554 5555 return self.expression( 5556 exp.Constraint, 5557 this=self._parse_id_var(), 5558 expressions=self._parse_unnamed_constraints(), 5559 ) 5560 5561 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5562 constraints = [] 5563 while True: 5564 constraint = self._parse_unnamed_constraint() or self._parse_function() 5565 if not constraint: 5566 break 5567 constraints.append(constraint) 5568 5569 return constraints 5570 5571 def _parse_unnamed_constraint( 5572 self, constraints: t.Optional[t.Collection[str]] = None 5573 ) -> t.Optional[exp.Expression]: 5574 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5575 constraints or self.CONSTRAINT_PARSERS 5576 ): 5577 return None 5578 5579 constraint = self._prev.text.upper() 5580 if constraint not in self.CONSTRAINT_PARSERS: 5581 self.raise_error(f"No parser found for schema constraint {constraint}.") 5582 5583 return self.CONSTRAINT_PARSERS[constraint](self) 5584 5585 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5586 return self._parse_id_var(any_token=False) 5587 5588 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5589 self._match_text_seq("KEY") 5590 return self.expression( 5591 exp.UniqueColumnConstraint, 5592 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5593 this=self._parse_schema(self._parse_unique_key()), 5594 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5595 on_conflict=self._parse_on_conflict(), 5596 ) 5597 5598 def _parse_key_constraint_options(self) -> t.List[str]: 5599 options = [] 5600 while True: 5601 if not self._curr: 5602 break 5603 5604 if self._match(TokenType.ON): 5605 action = None 5606 on = self._advance_any() and self._prev.text 5607 5608 if self._match_text_seq("NO", "ACTION"): 5609 action = "NO ACTION" 5610 elif self._match_text_seq("CASCADE"): 5611 action = "CASCADE" 5612 elif self._match_text_seq("RESTRICT"): 5613 action = "RESTRICT" 5614 elif self._match_pair(TokenType.SET, TokenType.NULL): 5615 action = "SET NULL" 5616 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5617 action = "SET DEFAULT" 5618 else: 5619 self.raise_error("Invalid key constraint") 5620 5621 options.append(f"ON {on} {action}") 5622 else: 5623 var = self._parse_var_from_options( 5624 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5625 ) 5626 if not var: 5627 break 5628 options.append(var.name) 5629 5630 return options 5631 5632 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5633 if match and not self._match(TokenType.REFERENCES): 5634 return None 5635 5636 expressions = None 5637 this = self._parse_table(schema=True) 5638 options = self._parse_key_constraint_options() 5639 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5640 5641 def _parse_foreign_key(self) -> exp.ForeignKey: 5642 expressions = self._parse_wrapped_id_vars() 5643 reference = self._parse_references() 5644 options = {} 5645 5646 while self._match(TokenType.ON): 5647 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5648 self.raise_error("Expected DELETE or UPDATE") 5649 5650 kind = self._prev.text.lower() 5651 5652 if self._match_text_seq("NO", "ACTION"): 5653 action = "NO ACTION" 5654 elif self._match(TokenType.SET): 5655 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5656 action = "SET " + self._prev.text.upper() 5657 else: 5658 self._advance() 5659 action = self._prev.text.upper() 5660 5661 options[kind] = action 5662 5663 return self.expression( 5664 exp.ForeignKey, 5665 expressions=expressions, 5666 reference=reference, 5667 **options, # type: ignore 5668 ) 5669 5670 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5671 return self._parse_field() 5672 5673 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5674 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5675 self._retreat(self._index - 1) 5676 return None 5677 5678 id_vars = self._parse_wrapped_id_vars() 5679 return self.expression( 5680 exp.PeriodForSystemTimeConstraint, 5681 this=seq_get(id_vars, 0), 5682 expression=seq_get(id_vars, 1), 5683 ) 5684 5685 def _parse_primary_key( 5686 self, wrapped_optional: bool = False, in_props: bool = False 5687 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5688 desc = ( 5689 self._match_set((TokenType.ASC, TokenType.DESC)) 5690 and self._prev.token_type == TokenType.DESC 5691 ) 5692 5693 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5694 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5695 5696 expressions = self._parse_wrapped_csv( 5697 self._parse_primary_key_part, optional=wrapped_optional 5698 ) 5699 options = self._parse_key_constraint_options() 5700 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5701 5702 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5703 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5704 5705 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5706 """ 5707 Parses a datetime column in ODBC format. We parse the column into the corresponding 5708 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5709 same as we did for `DATE('yyyy-mm-dd')`. 5710 5711 Reference: 5712 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5713 """ 5714 self._match(TokenType.VAR) 5715 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5716 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5717 if not self._match(TokenType.R_BRACE): 5718 self.raise_error("Expected }") 5719 return expression 5720 5721 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5722 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5723 return this 5724 5725 bracket_kind = self._prev.token_type 5726 if ( 5727 bracket_kind == TokenType.L_BRACE 5728 and self._curr 5729 and self._curr.token_type == TokenType.VAR 5730 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5731 ): 5732 return self._parse_odbc_datetime_literal() 5733 5734 expressions = self._parse_csv( 5735 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5736 ) 5737 5738 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5739 self.raise_error("Expected ]") 5740 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5741 self.raise_error("Expected }") 5742 5743 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5744 if bracket_kind == TokenType.L_BRACE: 5745 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5746 elif not this: 5747 this = build_array_constructor( 5748 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5749 ) 5750 else: 5751 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5752 if constructor_type: 5753 return build_array_constructor( 5754 constructor_type, 5755 args=expressions, 5756 bracket_kind=bracket_kind, 5757 dialect=self.dialect, 5758 ) 5759 5760 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5761 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5762 5763 self._add_comments(this) 5764 return self._parse_bracket(this) 5765 5766 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5767 if self._match(TokenType.COLON): 5768 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5769 return this 5770 5771 def _parse_case(self) -> t.Optional[exp.Expression]: 5772 ifs = [] 5773 default = None 5774 5775 comments = self._prev_comments 5776 expression = self._parse_assignment() 5777 5778 while self._match(TokenType.WHEN): 5779 this = self._parse_assignment() 5780 self._match(TokenType.THEN) 5781 then = self._parse_assignment() 5782 ifs.append(self.expression(exp.If, this=this, true=then)) 5783 5784 if self._match(TokenType.ELSE): 5785 default = self._parse_assignment() 5786 5787 if not self._match(TokenType.END): 5788 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5789 default = exp.column("interval") 5790 else: 5791 self.raise_error("Expected END after CASE", self._prev) 5792 5793 return self.expression( 5794 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5795 ) 5796 5797 def _parse_if(self) -> t.Optional[exp.Expression]: 5798 if self._match(TokenType.L_PAREN): 5799 args = self._parse_csv(self._parse_assignment) 5800 this = self.validate_expression(exp.If.from_arg_list(args), args) 5801 self._match_r_paren() 5802 else: 5803 index = self._index - 1 5804 5805 if self.NO_PAREN_IF_COMMANDS and index == 0: 5806 return self._parse_as_command(self._prev) 5807 5808 condition = self._parse_assignment() 5809 5810 if not condition: 5811 self._retreat(index) 5812 return None 5813 5814 self._match(TokenType.THEN) 5815 true = self._parse_assignment() 5816 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5817 self._match(TokenType.END) 5818 this = self.expression(exp.If, this=condition, true=true, false=false) 5819 5820 return this 5821 5822 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5823 if not self._match_text_seq("VALUE", "FOR"): 5824 self._retreat(self._index - 1) 5825 return None 5826 5827 return self.expression( 5828 exp.NextValueFor, 5829 this=self._parse_column(), 5830 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5831 ) 5832 5833 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5834 this = self._parse_function() or self._parse_var_or_string(upper=True) 5835 5836 if self._match(TokenType.FROM): 5837 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5838 5839 if not self._match(TokenType.COMMA): 5840 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5841 5842 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5843 5844 def _parse_gap_fill(self) -> exp.GapFill: 5845 self._match(TokenType.TABLE) 5846 this = self._parse_table() 5847 5848 self._match(TokenType.COMMA) 5849 args = [this, *self._parse_csv(self._parse_lambda)] 5850 5851 gap_fill = exp.GapFill.from_arg_list(args) 5852 return self.validate_expression(gap_fill, args) 5853 5854 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5855 this = self._parse_assignment() 5856 5857 if not self._match(TokenType.ALIAS): 5858 if self._match(TokenType.COMMA): 5859 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5860 5861 self.raise_error("Expected AS after CAST") 5862 5863 fmt = None 5864 to = self._parse_types() 5865 5866 if self._match(TokenType.FORMAT): 5867 fmt_string = self._parse_string() 5868 fmt = self._parse_at_time_zone(fmt_string) 5869 5870 if not to: 5871 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5872 if to.this in exp.DataType.TEMPORAL_TYPES: 5873 this = self.expression( 5874 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5875 this=this, 5876 format=exp.Literal.string( 5877 format_time( 5878 fmt_string.this if fmt_string else "", 5879 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5880 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5881 ) 5882 ), 5883 safe=safe, 5884 ) 5885 5886 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5887 this.set("zone", fmt.args["zone"]) 5888 return this 5889 elif not to: 5890 self.raise_error("Expected TYPE after CAST") 5891 elif isinstance(to, exp.Identifier): 5892 to = exp.DataType.build(to.name, udt=True) 5893 elif to.this == exp.DataType.Type.CHAR: 5894 if self._match(TokenType.CHARACTER_SET): 5895 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5896 5897 return self.expression( 5898 exp.Cast if strict else exp.TryCast, 5899 this=this, 5900 to=to, 5901 format=fmt, 5902 safe=safe, 5903 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5904 ) 5905 5906 def _parse_string_agg(self) -> exp.Expression: 5907 if self._match(TokenType.DISTINCT): 5908 args: t.List[t.Optional[exp.Expression]] = [ 5909 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5910 ] 5911 if self._match(TokenType.COMMA): 5912 args.extend(self._parse_csv(self._parse_assignment)) 5913 else: 5914 args = self._parse_csv(self._parse_assignment) # type: ignore 5915 5916 index = self._index 5917 if not self._match(TokenType.R_PAREN) and args: 5918 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5919 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5920 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5921 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5922 5923 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5924 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5925 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5926 if not self._match_text_seq("WITHIN", "GROUP"): 5927 self._retreat(index) 5928 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5929 5930 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5931 order = self._parse_order(this=seq_get(args, 0)) 5932 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5933 5934 def _parse_convert( 5935 self, strict: bool, safe: t.Optional[bool] = None 5936 ) -> t.Optional[exp.Expression]: 5937 this = self._parse_bitwise() 5938 5939 if self._match(TokenType.USING): 5940 to: t.Optional[exp.Expression] = self.expression( 5941 exp.CharacterSet, this=self._parse_var() 5942 ) 5943 elif self._match(TokenType.COMMA): 5944 to = self._parse_types() 5945 else: 5946 to = None 5947 5948 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5949 5950 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5951 """ 5952 There are generally two variants of the DECODE function: 5953 5954 - DECODE(bin, charset) 5955 - DECODE(expression, search, result [, search, result] ... [, default]) 5956 5957 The second variant will always be parsed into a CASE expression. Note that NULL 5958 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5959 instead of relying on pattern matching. 5960 """ 5961 args = self._parse_csv(self._parse_assignment) 5962 5963 if len(args) < 3: 5964 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5965 5966 expression, *expressions = args 5967 if not expression: 5968 return None 5969 5970 ifs = [] 5971 for search, result in zip(expressions[::2], expressions[1::2]): 5972 if not search or not result: 5973 return None 5974 5975 if isinstance(search, exp.Literal): 5976 ifs.append( 5977 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5978 ) 5979 elif isinstance(search, exp.Null): 5980 ifs.append( 5981 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5982 ) 5983 else: 5984 cond = exp.or_( 5985 exp.EQ(this=expression.copy(), expression=search), 5986 exp.and_( 5987 exp.Is(this=expression.copy(), expression=exp.Null()), 5988 exp.Is(this=search.copy(), expression=exp.Null()), 5989 copy=False, 5990 ), 5991 copy=False, 5992 ) 5993 ifs.append(exp.If(this=cond, true=result)) 5994 5995 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5996 5997 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5998 self._match_text_seq("KEY") 5999 key = self._parse_column() 6000 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6001 self._match_text_seq("VALUE") 6002 value = self._parse_bitwise() 6003 6004 if not key and not value: 6005 return None 6006 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6007 6008 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6009 if not this or not self._match_text_seq("FORMAT", "JSON"): 6010 return this 6011 6012 return self.expression(exp.FormatJson, this=this) 6013 6014 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6015 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6016 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6017 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6018 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6019 else: 6020 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6021 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6022 6023 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6024 6025 if not empty and not error and not null: 6026 return None 6027 6028 return self.expression( 6029 exp.OnCondition, 6030 empty=empty, 6031 error=error, 6032 null=null, 6033 ) 6034 6035 def _parse_on_handling( 6036 self, on: str, *values: str 6037 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6038 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6039 for value in values: 6040 if self._match_text_seq(value, "ON", on): 6041 return f"{value} ON {on}" 6042 6043 index = self._index 6044 if self._match(TokenType.DEFAULT): 6045 default_value = self._parse_bitwise() 6046 if self._match_text_seq("ON", on): 6047 return default_value 6048 6049 self._retreat(index) 6050 6051 return None 6052 6053 @t.overload 6054 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6055 6056 @t.overload 6057 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6058 6059 def _parse_json_object(self, agg=False): 6060 star = self._parse_star() 6061 expressions = ( 6062 [star] 6063 if star 6064 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6065 ) 6066 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6067 6068 unique_keys = None 6069 if self._match_text_seq("WITH", "UNIQUE"): 6070 unique_keys = True 6071 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6072 unique_keys = False 6073 6074 self._match_text_seq("KEYS") 6075 6076 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6077 self._parse_type() 6078 ) 6079 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6080 6081 return self.expression( 6082 exp.JSONObjectAgg if agg else exp.JSONObject, 6083 expressions=expressions, 6084 null_handling=null_handling, 6085 unique_keys=unique_keys, 6086 return_type=return_type, 6087 encoding=encoding, 6088 ) 6089 6090 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6091 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6092 if not self._match_text_seq("NESTED"): 6093 this = self._parse_id_var() 6094 kind = self._parse_types(allow_identifiers=False) 6095 nested = None 6096 else: 6097 this = None 6098 kind = None 6099 nested = True 6100 6101 path = self._match_text_seq("PATH") and self._parse_string() 6102 nested_schema = nested and self._parse_json_schema() 6103 6104 return self.expression( 6105 exp.JSONColumnDef, 6106 this=this, 6107 kind=kind, 6108 path=path, 6109 nested_schema=nested_schema, 6110 ) 6111 6112 def _parse_json_schema(self) -> exp.JSONSchema: 6113 self._match_text_seq("COLUMNS") 6114 return self.expression( 6115 exp.JSONSchema, 6116 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6117 ) 6118 6119 def _parse_json_table(self) -> exp.JSONTable: 6120 this = self._parse_format_json(self._parse_bitwise()) 6121 path = self._match(TokenType.COMMA) and self._parse_string() 6122 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6123 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6124 schema = self._parse_json_schema() 6125 6126 return exp.JSONTable( 6127 this=this, 6128 schema=schema, 6129 path=path, 6130 error_handling=error_handling, 6131 empty_handling=empty_handling, 6132 ) 6133 6134 def _parse_match_against(self) -> exp.MatchAgainst: 6135 expressions = self._parse_csv(self._parse_column) 6136 6137 self._match_text_seq(")", "AGAINST", "(") 6138 6139 this = self._parse_string() 6140 6141 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6142 modifier = "IN NATURAL LANGUAGE MODE" 6143 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6144 modifier = f"{modifier} WITH QUERY EXPANSION" 6145 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6146 modifier = "IN BOOLEAN MODE" 6147 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6148 modifier = "WITH QUERY EXPANSION" 6149 else: 6150 modifier = None 6151 6152 return self.expression( 6153 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6154 ) 6155 6156 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6157 def _parse_open_json(self) -> exp.OpenJSON: 6158 this = self._parse_bitwise() 6159 path = self._match(TokenType.COMMA) and self._parse_string() 6160 6161 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6162 this = self._parse_field(any_token=True) 6163 kind = self._parse_types() 6164 path = self._parse_string() 6165 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6166 6167 return self.expression( 6168 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6169 ) 6170 6171 expressions = None 6172 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6173 self._match_l_paren() 6174 expressions = self._parse_csv(_parse_open_json_column_def) 6175 6176 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6177 6178 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6179 args = self._parse_csv(self._parse_bitwise) 6180 6181 if self._match(TokenType.IN): 6182 return self.expression( 6183 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6184 ) 6185 6186 if haystack_first: 6187 haystack = seq_get(args, 0) 6188 needle = seq_get(args, 1) 6189 else: 6190 needle = seq_get(args, 0) 6191 haystack = seq_get(args, 1) 6192 6193 return self.expression( 6194 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6195 ) 6196 6197 def _parse_predict(self) -> exp.Predict: 6198 self._match_text_seq("MODEL") 6199 this = self._parse_table() 6200 6201 self._match(TokenType.COMMA) 6202 self._match_text_seq("TABLE") 6203 6204 return self.expression( 6205 exp.Predict, 6206 this=this, 6207 expression=self._parse_table(), 6208 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6209 ) 6210 6211 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6212 args = self._parse_csv(self._parse_table) 6213 return exp.JoinHint(this=func_name.upper(), expressions=args) 6214 6215 def _parse_substring(self) -> exp.Substring: 6216 # Postgres supports the form: substring(string [from int] [for int]) 6217 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6218 6219 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6220 6221 if self._match(TokenType.FROM): 6222 args.append(self._parse_bitwise()) 6223 if self._match(TokenType.FOR): 6224 if len(args) == 1: 6225 args.append(exp.Literal.number(1)) 6226 args.append(self._parse_bitwise()) 6227 6228 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6229 6230 def _parse_trim(self) -> exp.Trim: 6231 # https://www.w3resource.com/sql/character-functions/trim.php 6232 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6233 6234 position = None 6235 collation = None 6236 expression = None 6237 6238 if self._match_texts(self.TRIM_TYPES): 6239 position = self._prev.text.upper() 6240 6241 this = self._parse_bitwise() 6242 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6243 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6244 expression = self._parse_bitwise() 6245 6246 if invert_order: 6247 this, expression = expression, this 6248 6249 if self._match(TokenType.COLLATE): 6250 collation = self._parse_bitwise() 6251 6252 return self.expression( 6253 exp.Trim, this=this, position=position, expression=expression, collation=collation 6254 ) 6255 6256 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6257 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6258 6259 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6260 return self._parse_window(self._parse_id_var(), alias=True) 6261 6262 def _parse_respect_or_ignore_nulls( 6263 self, this: t.Optional[exp.Expression] 6264 ) -> t.Optional[exp.Expression]: 6265 if self._match_text_seq("IGNORE", "NULLS"): 6266 return self.expression(exp.IgnoreNulls, this=this) 6267 if self._match_text_seq("RESPECT", "NULLS"): 6268 return self.expression(exp.RespectNulls, this=this) 6269 return this 6270 6271 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6272 if self._match(TokenType.HAVING): 6273 self._match_texts(("MAX", "MIN")) 6274 max = self._prev.text.upper() != "MIN" 6275 return self.expression( 6276 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6277 ) 6278 6279 return this 6280 6281 def _parse_window( 6282 self, this: t.Optional[exp.Expression], alias: bool = False 6283 ) -> t.Optional[exp.Expression]: 6284 func = this 6285 comments = func.comments if isinstance(func, exp.Expression) else None 6286 6287 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6288 self._match(TokenType.WHERE) 6289 this = self.expression( 6290 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6291 ) 6292 self._match_r_paren() 6293 6294 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6295 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6296 if self._match_text_seq("WITHIN", "GROUP"): 6297 order = self._parse_wrapped(self._parse_order) 6298 this = self.expression(exp.WithinGroup, this=this, expression=order) 6299 6300 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6301 # Some dialects choose to implement and some do not. 6302 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6303 6304 # There is some code above in _parse_lambda that handles 6305 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6306 6307 # The below changes handle 6308 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6309 6310 # Oracle allows both formats 6311 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6312 # and Snowflake chose to do the same for familiarity 6313 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6314 if isinstance(this, exp.AggFunc): 6315 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6316 6317 if ignore_respect and ignore_respect is not this: 6318 ignore_respect.replace(ignore_respect.this) 6319 this = self.expression(ignore_respect.__class__, this=this) 6320 6321 this = self._parse_respect_or_ignore_nulls(this) 6322 6323 # bigquery select from window x AS (partition by ...) 6324 if alias: 6325 over = None 6326 self._match(TokenType.ALIAS) 6327 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6328 return this 6329 else: 6330 over = self._prev.text.upper() 6331 6332 if comments and isinstance(func, exp.Expression): 6333 func.pop_comments() 6334 6335 if not self._match(TokenType.L_PAREN): 6336 return self.expression( 6337 exp.Window, 6338 comments=comments, 6339 this=this, 6340 alias=self._parse_id_var(False), 6341 over=over, 6342 ) 6343 6344 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6345 6346 first = self._match(TokenType.FIRST) 6347 if self._match_text_seq("LAST"): 6348 first = False 6349 6350 partition, order = self._parse_partition_and_order() 6351 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6352 6353 if kind: 6354 self._match(TokenType.BETWEEN) 6355 start = self._parse_window_spec() 6356 self._match(TokenType.AND) 6357 end = self._parse_window_spec() 6358 6359 spec = self.expression( 6360 exp.WindowSpec, 6361 kind=kind, 6362 start=start["value"], 6363 start_side=start["side"], 6364 end=end["value"], 6365 end_side=end["side"], 6366 ) 6367 else: 6368 spec = None 6369 6370 self._match_r_paren() 6371 6372 window = self.expression( 6373 exp.Window, 6374 comments=comments, 6375 this=this, 6376 partition_by=partition, 6377 order=order, 6378 spec=spec, 6379 alias=window_alias, 6380 over=over, 6381 first=first, 6382 ) 6383 6384 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6385 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6386 return self._parse_window(window, alias=alias) 6387 6388 return window 6389 6390 def _parse_partition_and_order( 6391 self, 6392 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6393 return self._parse_partition_by(), self._parse_order() 6394 6395 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6396 self._match(TokenType.BETWEEN) 6397 6398 return { 6399 "value": ( 6400 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6401 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6402 or self._parse_bitwise() 6403 ), 6404 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6405 } 6406 6407 def _parse_alias( 6408 self, this: t.Optional[exp.Expression], explicit: bool = False 6409 ) -> t.Optional[exp.Expression]: 6410 any_token = self._match(TokenType.ALIAS) 6411 comments = self._prev_comments or [] 6412 6413 if explicit and not any_token: 6414 return this 6415 6416 if self._match(TokenType.L_PAREN): 6417 aliases = self.expression( 6418 exp.Aliases, 6419 comments=comments, 6420 this=this, 6421 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6422 ) 6423 self._match_r_paren(aliases) 6424 return aliases 6425 6426 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6427 self.STRING_ALIASES and self._parse_string_as_identifier() 6428 ) 6429 6430 if alias: 6431 comments.extend(alias.pop_comments()) 6432 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6433 column = this.this 6434 6435 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6436 if not this.comments and column and column.comments: 6437 this.comments = column.pop_comments() 6438 6439 return this 6440 6441 def _parse_id_var( 6442 self, 6443 any_token: bool = True, 6444 tokens: t.Optional[t.Collection[TokenType]] = None, 6445 ) -> t.Optional[exp.Expression]: 6446 expression = self._parse_identifier() 6447 if not expression and ( 6448 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6449 ): 6450 quoted = self._prev.token_type == TokenType.STRING 6451 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6452 6453 return expression 6454 6455 def _parse_string(self) -> t.Optional[exp.Expression]: 6456 if self._match_set(self.STRING_PARSERS): 6457 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6458 return self._parse_placeholder() 6459 6460 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6461 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6462 6463 def _parse_number(self) -> t.Optional[exp.Expression]: 6464 if self._match_set(self.NUMERIC_PARSERS): 6465 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6466 return self._parse_placeholder() 6467 6468 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6469 if self._match(TokenType.IDENTIFIER): 6470 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6471 return self._parse_placeholder() 6472 6473 def _parse_var( 6474 self, 6475 any_token: bool = False, 6476 tokens: t.Optional[t.Collection[TokenType]] = None, 6477 upper: bool = False, 6478 ) -> t.Optional[exp.Expression]: 6479 if ( 6480 (any_token and self._advance_any()) 6481 or self._match(TokenType.VAR) 6482 or (self._match_set(tokens) if tokens else False) 6483 ): 6484 return self.expression( 6485 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6486 ) 6487 return self._parse_placeholder() 6488 6489 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6490 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6491 self._advance() 6492 return self._prev 6493 return None 6494 6495 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6496 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6497 6498 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6499 return self._parse_primary() or self._parse_var(any_token=True) 6500 6501 def _parse_null(self) -> t.Optional[exp.Expression]: 6502 if self._match_set(self.NULL_TOKENS): 6503 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6504 return self._parse_placeholder() 6505 6506 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6507 if self._match(TokenType.TRUE): 6508 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6509 if self._match(TokenType.FALSE): 6510 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6511 return self._parse_placeholder() 6512 6513 def _parse_star(self) -> t.Optional[exp.Expression]: 6514 if self._match(TokenType.STAR): 6515 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6516 return self._parse_placeholder() 6517 6518 def _parse_parameter(self) -> exp.Parameter: 6519 this = self._parse_identifier() or self._parse_primary_or_var() 6520 return self.expression(exp.Parameter, this=this) 6521 6522 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6523 if self._match_set(self.PLACEHOLDER_PARSERS): 6524 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6525 if placeholder: 6526 return placeholder 6527 self._advance(-1) 6528 return None 6529 6530 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6531 if not self._match_texts(keywords): 6532 return None 6533 if self._match(TokenType.L_PAREN, advance=False): 6534 return self._parse_wrapped_csv(self._parse_expression) 6535 6536 expression = self._parse_expression() 6537 return [expression] if expression else None 6538 6539 def _parse_csv( 6540 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6541 ) -> t.List[exp.Expression]: 6542 parse_result = parse_method() 6543 items = [parse_result] if parse_result is not None else [] 6544 6545 while self._match(sep): 6546 self._add_comments(parse_result) 6547 parse_result = parse_method() 6548 if parse_result is not None: 6549 items.append(parse_result) 6550 6551 return items 6552 6553 def _parse_tokens( 6554 self, parse_method: t.Callable, expressions: t.Dict 6555 ) -> t.Optional[exp.Expression]: 6556 this = parse_method() 6557 6558 while self._match_set(expressions): 6559 this = self.expression( 6560 expressions[self._prev.token_type], 6561 this=this, 6562 comments=self._prev_comments, 6563 expression=parse_method(), 6564 ) 6565 6566 return this 6567 6568 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6569 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6570 6571 def _parse_wrapped_csv( 6572 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6573 ) -> t.List[exp.Expression]: 6574 return self._parse_wrapped( 6575 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6576 ) 6577 6578 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6579 wrapped = self._match(TokenType.L_PAREN) 6580 if not wrapped and not optional: 6581 self.raise_error("Expecting (") 6582 parse_result = parse_method() 6583 if wrapped: 6584 self._match_r_paren() 6585 return parse_result 6586 6587 def _parse_expressions(self) -> t.List[exp.Expression]: 6588 return self._parse_csv(self._parse_expression) 6589 6590 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6591 return self._parse_select() or self._parse_set_operations( 6592 self._parse_expression() if alias else self._parse_assignment() 6593 ) 6594 6595 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6596 return self._parse_query_modifiers( 6597 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6598 ) 6599 6600 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6601 this = None 6602 if self._match_texts(self.TRANSACTION_KIND): 6603 this = self._prev.text 6604 6605 self._match_texts(("TRANSACTION", "WORK")) 6606 6607 modes = [] 6608 while True: 6609 mode = [] 6610 while self._match(TokenType.VAR): 6611 mode.append(self._prev.text) 6612 6613 if mode: 6614 modes.append(" ".join(mode)) 6615 if not self._match(TokenType.COMMA): 6616 break 6617 6618 return self.expression(exp.Transaction, this=this, modes=modes) 6619 6620 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6621 chain = None 6622 savepoint = None 6623 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6624 6625 self._match_texts(("TRANSACTION", "WORK")) 6626 6627 if self._match_text_seq("TO"): 6628 self._match_text_seq("SAVEPOINT") 6629 savepoint = self._parse_id_var() 6630 6631 if self._match(TokenType.AND): 6632 chain = not self._match_text_seq("NO") 6633 self._match_text_seq("CHAIN") 6634 6635 if is_rollback: 6636 return self.expression(exp.Rollback, savepoint=savepoint) 6637 6638 return self.expression(exp.Commit, chain=chain) 6639 6640 def _parse_refresh(self) -> exp.Refresh: 6641 self._match(TokenType.TABLE) 6642 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6643 6644 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6645 if not self._match_text_seq("ADD"): 6646 return None 6647 6648 self._match(TokenType.COLUMN) 6649 exists_column = self._parse_exists(not_=True) 6650 expression = self._parse_field_def() 6651 6652 if expression: 6653 expression.set("exists", exists_column) 6654 6655 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6656 if self._match_texts(("FIRST", "AFTER")): 6657 position = self._prev.text 6658 column_position = self.expression( 6659 exp.ColumnPosition, this=self._parse_column(), position=position 6660 ) 6661 expression.set("position", column_position) 6662 6663 return expression 6664 6665 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6666 drop = self._match(TokenType.DROP) and self._parse_drop() 6667 if drop and not isinstance(drop, exp.Command): 6668 drop.set("kind", drop.args.get("kind", "COLUMN")) 6669 return drop 6670 6671 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6672 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6673 return self.expression( 6674 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6675 ) 6676 6677 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6678 index = self._index - 1 6679 6680 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6681 return self._parse_csv( 6682 lambda: self.expression( 6683 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6684 ) 6685 ) 6686 6687 self._retreat(index) 6688 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6689 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6690 6691 if self._match_text_seq("ADD", "COLUMNS"): 6692 schema = self._parse_schema() 6693 if schema: 6694 return [schema] 6695 return [] 6696 6697 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6698 6699 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6700 if self._match_texts(self.ALTER_ALTER_PARSERS): 6701 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6702 6703 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6704 # keyword after ALTER we default to parsing this statement 6705 self._match(TokenType.COLUMN) 6706 column = self._parse_field(any_token=True) 6707 6708 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6709 return self.expression(exp.AlterColumn, this=column, drop=True) 6710 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6711 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6712 if self._match(TokenType.COMMENT): 6713 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6714 if self._match_text_seq("DROP", "NOT", "NULL"): 6715 return self.expression( 6716 exp.AlterColumn, 6717 this=column, 6718 drop=True, 6719 allow_null=True, 6720 ) 6721 if self._match_text_seq("SET", "NOT", "NULL"): 6722 return self.expression( 6723 exp.AlterColumn, 6724 this=column, 6725 allow_null=False, 6726 ) 6727 self._match_text_seq("SET", "DATA") 6728 self._match_text_seq("TYPE") 6729 return self.expression( 6730 exp.AlterColumn, 6731 this=column, 6732 dtype=self._parse_types(), 6733 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6734 using=self._match(TokenType.USING) and self._parse_assignment(), 6735 ) 6736 6737 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6738 if self._match_texts(("ALL", "EVEN", "AUTO")): 6739 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6740 6741 self._match_text_seq("KEY", "DISTKEY") 6742 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6743 6744 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6745 if compound: 6746 self._match_text_seq("SORTKEY") 6747 6748 if self._match(TokenType.L_PAREN, advance=False): 6749 return self.expression( 6750 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6751 ) 6752 6753 self._match_texts(("AUTO", "NONE")) 6754 return self.expression( 6755 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6756 ) 6757 6758 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6759 index = self._index - 1 6760 6761 partition_exists = self._parse_exists() 6762 if self._match(TokenType.PARTITION, advance=False): 6763 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6764 6765 self._retreat(index) 6766 return self._parse_csv(self._parse_drop_column) 6767 6768 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6769 if self._match(TokenType.COLUMN): 6770 exists = self._parse_exists() 6771 old_column = self._parse_column() 6772 to = self._match_text_seq("TO") 6773 new_column = self._parse_column() 6774 6775 if old_column is None or to is None or new_column is None: 6776 return None 6777 6778 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6779 6780 self._match_text_seq("TO") 6781 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6782 6783 def _parse_alter_table_set(self) -> exp.AlterSet: 6784 alter_set = self.expression(exp.AlterSet) 6785 6786 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6787 "TABLE", "PROPERTIES" 6788 ): 6789 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6790 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6791 alter_set.set("expressions", [self._parse_assignment()]) 6792 elif self._match_texts(("LOGGED", "UNLOGGED")): 6793 alter_set.set("option", exp.var(self._prev.text.upper())) 6794 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6795 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6796 elif self._match_text_seq("LOCATION"): 6797 alter_set.set("location", self._parse_field()) 6798 elif self._match_text_seq("ACCESS", "METHOD"): 6799 alter_set.set("access_method", self._parse_field()) 6800 elif self._match_text_seq("TABLESPACE"): 6801 alter_set.set("tablespace", self._parse_field()) 6802 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6803 alter_set.set("file_format", [self._parse_field()]) 6804 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6805 alter_set.set("file_format", self._parse_wrapped_options()) 6806 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6807 alter_set.set("copy_options", self._parse_wrapped_options()) 6808 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6809 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6810 else: 6811 if self._match_text_seq("SERDE"): 6812 alter_set.set("serde", self._parse_field()) 6813 6814 alter_set.set("expressions", [self._parse_properties()]) 6815 6816 return alter_set 6817 6818 def _parse_alter(self) -> exp.Alter | exp.Command: 6819 start = self._prev 6820 6821 alter_token = self._match_set(self.ALTERABLES) and self._prev 6822 if not alter_token: 6823 return self._parse_as_command(start) 6824 6825 exists = self._parse_exists() 6826 only = self._match_text_seq("ONLY") 6827 this = self._parse_table(schema=True) 6828 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6829 6830 if self._next: 6831 self._advance() 6832 6833 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6834 if parser: 6835 actions = ensure_list(parser(self)) 6836 not_valid = self._match_text_seq("NOT", "VALID") 6837 options = self._parse_csv(self._parse_property) 6838 6839 if not self._curr and actions: 6840 return self.expression( 6841 exp.Alter, 6842 this=this, 6843 kind=alter_token.text.upper(), 6844 exists=exists, 6845 actions=actions, 6846 only=only, 6847 options=options, 6848 cluster=cluster, 6849 not_valid=not_valid, 6850 ) 6851 6852 return self._parse_as_command(start) 6853 6854 def _parse_merge(self) -> exp.Merge: 6855 self._match(TokenType.INTO) 6856 target = self._parse_table() 6857 6858 if target and self._match(TokenType.ALIAS, advance=False): 6859 target.set("alias", self._parse_table_alias()) 6860 6861 self._match(TokenType.USING) 6862 using = self._parse_table() 6863 6864 self._match(TokenType.ON) 6865 on = self._parse_assignment() 6866 6867 return self.expression( 6868 exp.Merge, 6869 this=target, 6870 using=using, 6871 on=on, 6872 expressions=self._parse_when_matched(), 6873 returning=self._parse_returning(), 6874 ) 6875 6876 def _parse_when_matched(self) -> t.List[exp.When]: 6877 whens = [] 6878 6879 while self._match(TokenType.WHEN): 6880 matched = not self._match(TokenType.NOT) 6881 self._match_text_seq("MATCHED") 6882 source = ( 6883 False 6884 if self._match_text_seq("BY", "TARGET") 6885 else self._match_text_seq("BY", "SOURCE") 6886 ) 6887 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6888 6889 self._match(TokenType.THEN) 6890 6891 if self._match(TokenType.INSERT): 6892 this = self._parse_star() 6893 if this: 6894 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6895 else: 6896 then = self.expression( 6897 exp.Insert, 6898 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6899 expression=self._match_text_seq("VALUES") and self._parse_value(), 6900 ) 6901 elif self._match(TokenType.UPDATE): 6902 expressions = self._parse_star() 6903 if expressions: 6904 then = self.expression(exp.Update, expressions=expressions) 6905 else: 6906 then = self.expression( 6907 exp.Update, 6908 expressions=self._match(TokenType.SET) 6909 and self._parse_csv(self._parse_equality), 6910 ) 6911 elif self._match(TokenType.DELETE): 6912 then = self.expression(exp.Var, this=self._prev.text) 6913 else: 6914 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6915 6916 whens.append( 6917 self.expression( 6918 exp.When, 6919 matched=matched, 6920 source=source, 6921 condition=condition, 6922 then=then, 6923 ) 6924 ) 6925 return whens 6926 6927 def _parse_show(self) -> t.Optional[exp.Expression]: 6928 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6929 if parser: 6930 return parser(self) 6931 return self._parse_as_command(self._prev) 6932 6933 def _parse_set_item_assignment( 6934 self, kind: t.Optional[str] = None 6935 ) -> t.Optional[exp.Expression]: 6936 index = self._index 6937 6938 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6939 return self._parse_set_transaction(global_=kind == "GLOBAL") 6940 6941 left = self._parse_primary() or self._parse_column() 6942 assignment_delimiter = self._match_texts(("=", "TO")) 6943 6944 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6945 self._retreat(index) 6946 return None 6947 6948 right = self._parse_statement() or self._parse_id_var() 6949 if isinstance(right, (exp.Column, exp.Identifier)): 6950 right = exp.var(right.name) 6951 6952 this = self.expression(exp.EQ, this=left, expression=right) 6953 return self.expression(exp.SetItem, this=this, kind=kind) 6954 6955 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6956 self._match_text_seq("TRANSACTION") 6957 characteristics = self._parse_csv( 6958 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6959 ) 6960 return self.expression( 6961 exp.SetItem, 6962 expressions=characteristics, 6963 kind="TRANSACTION", 6964 **{"global": global_}, # type: ignore 6965 ) 6966 6967 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6968 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6969 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6970 6971 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6972 index = self._index 6973 set_ = self.expression( 6974 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6975 ) 6976 6977 if self._curr: 6978 self._retreat(index) 6979 return self._parse_as_command(self._prev) 6980 6981 return set_ 6982 6983 def _parse_var_from_options( 6984 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6985 ) -> t.Optional[exp.Var]: 6986 start = self._curr 6987 if not start: 6988 return None 6989 6990 option = start.text.upper() 6991 continuations = options.get(option) 6992 6993 index = self._index 6994 self._advance() 6995 for keywords in continuations or []: 6996 if isinstance(keywords, str): 6997 keywords = (keywords,) 6998 6999 if self._match_text_seq(*keywords): 7000 option = f"{option} {' '.join(keywords)}" 7001 break 7002 else: 7003 if continuations or continuations is None: 7004 if raise_unmatched: 7005 self.raise_error(f"Unknown option {option}") 7006 7007 self._retreat(index) 7008 return None 7009 7010 return exp.var(option) 7011 7012 def _parse_as_command(self, start: Token) -> exp.Command: 7013 while self._curr: 7014 self._advance() 7015 text = self._find_sql(start, self._prev) 7016 size = len(start.text) 7017 self._warn_unsupported() 7018 return exp.Command(this=text[:size], expression=text[size:]) 7019 7020 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7021 settings = [] 7022 7023 self._match_l_paren() 7024 kind = self._parse_id_var() 7025 7026 if self._match(TokenType.L_PAREN): 7027 while True: 7028 key = self._parse_id_var() 7029 value = self._parse_primary() 7030 7031 if not key and value is None: 7032 break 7033 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7034 self._match(TokenType.R_PAREN) 7035 7036 self._match_r_paren() 7037 7038 return self.expression( 7039 exp.DictProperty, 7040 this=this, 7041 kind=kind.this if kind else None, 7042 settings=settings, 7043 ) 7044 7045 def _parse_dict_range(self, this: str) -> exp.DictRange: 7046 self._match_l_paren() 7047 has_min = self._match_text_seq("MIN") 7048 if has_min: 7049 min = self._parse_var() or self._parse_primary() 7050 self._match_text_seq("MAX") 7051 max = self._parse_var() or self._parse_primary() 7052 else: 7053 max = self._parse_var() or self._parse_primary() 7054 min = exp.Literal.number(0) 7055 self._match_r_paren() 7056 return self.expression(exp.DictRange, this=this, min=min, max=max) 7057 7058 def _parse_comprehension( 7059 self, this: t.Optional[exp.Expression] 7060 ) -> t.Optional[exp.Comprehension]: 7061 index = self._index 7062 expression = self._parse_column() 7063 if not self._match(TokenType.IN): 7064 self._retreat(index - 1) 7065 return None 7066 iterator = self._parse_column() 7067 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7068 return self.expression( 7069 exp.Comprehension, 7070 this=this, 7071 expression=expression, 7072 iterator=iterator, 7073 condition=condition, 7074 ) 7075 7076 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7077 if self._match(TokenType.HEREDOC_STRING): 7078 return self.expression(exp.Heredoc, this=self._prev.text) 7079 7080 if not self._match_text_seq("$"): 7081 return None 7082 7083 tags = ["$"] 7084 tag_text = None 7085 7086 if self._is_connected(): 7087 self._advance() 7088 tags.append(self._prev.text.upper()) 7089 else: 7090 self.raise_error("No closing $ found") 7091 7092 if tags[-1] != "$": 7093 if self._is_connected() and self._match_text_seq("$"): 7094 tag_text = tags[-1] 7095 tags.append("$") 7096 else: 7097 self.raise_error("No closing $ found") 7098 7099 heredoc_start = self._curr 7100 7101 while self._curr: 7102 if self._match_text_seq(*tags, advance=False): 7103 this = self._find_sql(heredoc_start, self._prev) 7104 self._advance(len(tags)) 7105 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7106 7107 self._advance() 7108 7109 self.raise_error(f"No closing {''.join(tags)} found") 7110 return None 7111 7112 def _find_parser( 7113 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7114 ) -> t.Optional[t.Callable]: 7115 if not self._curr: 7116 return None 7117 7118 index = self._index 7119 this = [] 7120 while True: 7121 # The current token might be multiple words 7122 curr = self._curr.text.upper() 7123 key = curr.split(" ") 7124 this.append(curr) 7125 7126 self._advance() 7127 result, trie = in_trie(trie, key) 7128 if result == TrieResult.FAILED: 7129 break 7130 7131 if result == TrieResult.EXISTS: 7132 subparser = parsers[" ".join(this)] 7133 return subparser 7134 7135 self._retreat(index) 7136 return None 7137 7138 def _match(self, token_type, advance=True, expression=None): 7139 if not self._curr: 7140 return None 7141 7142 if self._curr.token_type == token_type: 7143 if advance: 7144 self._advance() 7145 self._add_comments(expression) 7146 return True 7147 7148 return None 7149 7150 def _match_set(self, types, advance=True): 7151 if not self._curr: 7152 return None 7153 7154 if self._curr.token_type in types: 7155 if advance: 7156 self._advance() 7157 return True 7158 7159 return None 7160 7161 def _match_pair(self, token_type_a, token_type_b, advance=True): 7162 if not self._curr or not self._next: 7163 return None 7164 7165 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7166 if advance: 7167 self._advance(2) 7168 return True 7169 7170 return None 7171 7172 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7173 if not self._match(TokenType.L_PAREN, expression=expression): 7174 self.raise_error("Expecting (") 7175 7176 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7177 if not self._match(TokenType.R_PAREN, expression=expression): 7178 self.raise_error("Expecting )") 7179 7180 def _match_texts(self, texts, advance=True): 7181 if ( 7182 self._curr 7183 and self._curr.token_type != TokenType.STRING 7184 and self._curr.text.upper() in texts 7185 ): 7186 if advance: 7187 self._advance() 7188 return True 7189 return None 7190 7191 def _match_text_seq(self, *texts, advance=True): 7192 index = self._index 7193 for text in texts: 7194 if ( 7195 self._curr 7196 and self._curr.token_type != TokenType.STRING 7197 and self._curr.text.upper() == text 7198 ): 7199 self._advance() 7200 else: 7201 self._retreat(index) 7202 return None 7203 7204 if not advance: 7205 self._retreat(index) 7206 7207 return True 7208 7209 def _replace_lambda( 7210 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7211 ) -> t.Optional[exp.Expression]: 7212 if not node: 7213 return node 7214 7215 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7216 7217 for column in node.find_all(exp.Column): 7218 typ = lambda_types.get(column.parts[0].name) 7219 if typ is not None: 7220 dot_or_id = column.to_dot() if column.table else column.this 7221 7222 if typ: 7223 dot_or_id = self.expression( 7224 exp.Cast, 7225 this=dot_or_id, 7226 to=typ, 7227 ) 7228 7229 parent = column.parent 7230 7231 while isinstance(parent, exp.Dot): 7232 if not isinstance(parent.parent, exp.Dot): 7233 parent.replace(dot_or_id) 7234 break 7235 parent = parent.parent 7236 else: 7237 if column is node: 7238 node = dot_or_id 7239 else: 7240 column.replace(dot_or_id) 7241 return node 7242 7243 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7244 start = self._prev 7245 7246 # Not to be confused with TRUNCATE(number, decimals) function call 7247 if self._match(TokenType.L_PAREN): 7248 self._retreat(self._index - 2) 7249 return self._parse_function() 7250 7251 # Clickhouse supports TRUNCATE DATABASE as well 7252 is_database = self._match(TokenType.DATABASE) 7253 7254 self._match(TokenType.TABLE) 7255 7256 exists = self._parse_exists(not_=False) 7257 7258 expressions = self._parse_csv( 7259 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7260 ) 7261 7262 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7263 7264 if self._match_text_seq("RESTART", "IDENTITY"): 7265 identity = "RESTART" 7266 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7267 identity = "CONTINUE" 7268 else: 7269 identity = None 7270 7271 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7272 option = self._prev.text 7273 else: 7274 option = None 7275 7276 partition = self._parse_partition() 7277 7278 # Fallback case 7279 if self._curr: 7280 return self._parse_as_command(start) 7281 7282 return self.expression( 7283 exp.TruncateTable, 7284 expressions=expressions, 7285 is_database=is_database, 7286 exists=exists, 7287 cluster=cluster, 7288 identity=identity, 7289 option=option, 7290 partition=partition, 7291 ) 7292 7293 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7294 this = self._parse_ordered(self._parse_opclass) 7295 7296 if not self._match(TokenType.WITH): 7297 return this 7298 7299 op = self._parse_var(any_token=True) 7300 7301 return self.expression(exp.WithOperator, this=this, op=op) 7302 7303 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7304 self._match(TokenType.EQ) 7305 self._match(TokenType.L_PAREN) 7306 7307 opts: t.List[t.Optional[exp.Expression]] = [] 7308 while self._curr and not self._match(TokenType.R_PAREN): 7309 if self._match_text_seq("FORMAT_NAME", "="): 7310 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7311 # so we parse it separately to use _parse_field() 7312 prop = self.expression( 7313 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7314 ) 7315 opts.append(prop) 7316 else: 7317 opts.append(self._parse_property()) 7318 7319 self._match(TokenType.COMMA) 7320 7321 return opts 7322 7323 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7324 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7325 7326 options = [] 7327 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7328 option = self._parse_var(any_token=True) 7329 prev = self._prev.text.upper() 7330 7331 # Different dialects might separate options and values by white space, "=" and "AS" 7332 self._match(TokenType.EQ) 7333 self._match(TokenType.ALIAS) 7334 7335 param = self.expression(exp.CopyParameter, this=option) 7336 7337 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7338 TokenType.L_PAREN, advance=False 7339 ): 7340 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7341 param.set("expressions", self._parse_wrapped_options()) 7342 elif prev == "FILE_FORMAT": 7343 # T-SQL's external file format case 7344 param.set("expression", self._parse_field()) 7345 else: 7346 param.set("expression", self._parse_unquoted_field()) 7347 7348 options.append(param) 7349 self._match(sep) 7350 7351 return options 7352 7353 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7354 expr = self.expression(exp.Credentials) 7355 7356 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7357 expr.set("storage", self._parse_field()) 7358 if self._match_text_seq("CREDENTIALS"): 7359 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7360 creds = ( 7361 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7362 ) 7363 expr.set("credentials", creds) 7364 if self._match_text_seq("ENCRYPTION"): 7365 expr.set("encryption", self._parse_wrapped_options()) 7366 if self._match_text_seq("IAM_ROLE"): 7367 expr.set("iam_role", self._parse_field()) 7368 if self._match_text_seq("REGION"): 7369 expr.set("region", self._parse_field()) 7370 7371 return expr 7372 7373 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7374 return self._parse_field() 7375 7376 def _parse_copy(self) -> exp.Copy | exp.Command: 7377 start = self._prev 7378 7379 self._match(TokenType.INTO) 7380 7381 this = ( 7382 self._parse_select(nested=True, parse_subquery_alias=False) 7383 if self._match(TokenType.L_PAREN, advance=False) 7384 else self._parse_table(schema=True) 7385 ) 7386 7387 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7388 7389 files = self._parse_csv(self._parse_file_location) 7390 credentials = self._parse_credentials() 7391 7392 self._match_text_seq("WITH") 7393 7394 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7395 7396 # Fallback case 7397 if self._curr: 7398 return self._parse_as_command(start) 7399 7400 return self.expression( 7401 exp.Copy, 7402 this=this, 7403 kind=kind, 7404 credentials=credentials, 7405 files=files, 7406 params=params, 7407 ) 7408 7409 def _parse_normalize(self) -> exp.Normalize: 7410 return self.expression( 7411 exp.Normalize, 7412 this=self._parse_bitwise(), 7413 form=self._match(TokenType.COMMA) and self._parse_var(), 7414 ) 7415 7416 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7417 if self._match_text_seq("COLUMNS", "(", advance=False): 7418 this = self._parse_function() 7419 if isinstance(this, exp.Columns): 7420 this.set("unpack", True) 7421 return this 7422 7423 return self.expression( 7424 exp.Star, 7425 **{ # type: ignore 7426 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7427 "replace": self._parse_star_op("REPLACE"), 7428 "rename": self._parse_star_op("RENAME"), 7429 }, 7430 ) 7431 7432 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7433 privilege_parts = [] 7434 7435 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7436 # (end of privilege list) or L_PAREN (start of column list) are met 7437 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7438 privilege_parts.append(self._curr.text.upper()) 7439 self._advance() 7440 7441 this = exp.var(" ".join(privilege_parts)) 7442 expressions = ( 7443 self._parse_wrapped_csv(self._parse_column) 7444 if self._match(TokenType.L_PAREN, advance=False) 7445 else None 7446 ) 7447 7448 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7449 7450 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7451 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7452 principal = self._parse_id_var() 7453 7454 if not principal: 7455 return None 7456 7457 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7458 7459 def _parse_grant(self) -> exp.Grant | exp.Command: 7460 start = self._prev 7461 7462 privileges = self._parse_csv(self._parse_grant_privilege) 7463 7464 self._match(TokenType.ON) 7465 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7466 7467 # Attempt to parse the securable e.g. MySQL allows names 7468 # such as "foo.*", "*.*" which are not easily parseable yet 7469 securable = self._try_parse(self._parse_table_parts) 7470 7471 if not securable or not self._match_text_seq("TO"): 7472 return self._parse_as_command(start) 7473 7474 principals = self._parse_csv(self._parse_grant_principal) 7475 7476 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7477 7478 if self._curr: 7479 return self._parse_as_command(start) 7480 7481 return self.expression( 7482 exp.Grant, 7483 privileges=privileges, 7484 kind=kind, 7485 securable=securable, 7486 principals=principals, 7487 grant_option=grant_option, 7488 ) 7489 7490 def _parse_overlay(self) -> exp.Overlay: 7491 return self.expression( 7492 exp.Overlay, 7493 **{ # type: ignore 7494 "this": self._parse_bitwise(), 7495 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7496 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7497 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7498 }, 7499 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1360 def __init__( 1361 self, 1362 error_level: t.Optional[ErrorLevel] = None, 1363 error_message_context: int = 100, 1364 max_errors: int = 3, 1365 dialect: DialectType = None, 1366 ): 1367 from sqlglot.dialects import Dialect 1368 1369 self.error_level = error_level or ErrorLevel.IMMEDIATE 1370 self.error_message_context = error_message_context 1371 self.max_errors = max_errors 1372 self.dialect = Dialect.get_or_raise(dialect) 1373 self.reset()
1385 def parse( 1386 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1387 ) -> t.List[t.Optional[exp.Expression]]: 1388 """ 1389 Parses a list of tokens and returns a list of syntax trees, one tree 1390 per parsed SQL statement. 1391 1392 Args: 1393 raw_tokens: The list of tokens. 1394 sql: The original SQL string, used to produce helpful debug messages. 1395 1396 Returns: 1397 The list of the produced syntax trees. 1398 """ 1399 return self._parse( 1400 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1401 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1403 def parse_into( 1404 self, 1405 expression_types: exp.IntoType, 1406 raw_tokens: t.List[Token], 1407 sql: t.Optional[str] = None, 1408 ) -> t.List[t.Optional[exp.Expression]]: 1409 """ 1410 Parses a list of tokens into a given Expression type. If a collection of Expression 1411 types is given instead, this method will try to parse the token list into each one 1412 of them, stopping at the first for which the parsing succeeds. 1413 1414 Args: 1415 expression_types: The expression type(s) to try and parse the token list into. 1416 raw_tokens: The list of tokens. 1417 sql: The original SQL string, used to produce helpful debug messages. 1418 1419 Returns: 1420 The target Expression. 1421 """ 1422 errors = [] 1423 for expression_type in ensure_list(expression_types): 1424 parser = self.EXPRESSION_PARSERS.get(expression_type) 1425 if not parser: 1426 raise TypeError(f"No parser registered for {expression_type}") 1427 1428 try: 1429 return self._parse(parser, raw_tokens, sql) 1430 except ParseError as e: 1431 e.errors[0]["into_expression"] = expression_type 1432 errors.append(e) 1433 1434 raise ParseError( 1435 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1436 errors=merge_errors(errors), 1437 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1477 def check_errors(self) -> None: 1478 """Logs or raises any found errors, depending on the chosen error level setting.""" 1479 if self.error_level == ErrorLevel.WARN: 1480 for error in self.errors: 1481 logger.error(str(error)) 1482 elif self.error_level == ErrorLevel.RAISE and self.errors: 1483 raise ParseError( 1484 concat_messages(self.errors, self.max_errors), 1485 errors=merge_errors(self.errors), 1486 )
Logs or raises any found errors, depending on the chosen error level setting.
1488 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1489 """ 1490 Appends an error in the list of recorded errors or raises it, depending on the chosen 1491 error level setting. 1492 """ 1493 token = token or self._curr or self._prev or Token.string("") 1494 start = token.start 1495 end = token.end + 1 1496 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1497 highlight = self.sql[start:end] 1498 end_context = self.sql[end : end + self.error_message_context] 1499 1500 error = ParseError.new( 1501 f"{message}. Line {token.line}, Col: {token.col}.\n" 1502 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1503 description=message, 1504 line=token.line, 1505 col=token.col, 1506 start_context=start_context, 1507 highlight=highlight, 1508 end_context=end_context, 1509 ) 1510 1511 if self.error_level == ErrorLevel.IMMEDIATE: 1512 raise error 1513 1514 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1516 def expression( 1517 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1518 ) -> E: 1519 """ 1520 Creates a new, validated Expression. 1521 1522 Args: 1523 exp_class: The expression class to instantiate. 1524 comments: An optional list of comments to attach to the expression. 1525 kwargs: The arguments to set for the expression along with their respective values. 1526 1527 Returns: 1528 The target expression. 1529 """ 1530 instance = exp_class(**kwargs) 1531 instance.add_comments(comments) if comments else self._add_comments(instance) 1532 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1539 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1540 """ 1541 Validates an Expression, making sure that all its mandatory arguments are set. 1542 1543 Args: 1544 expression: The expression to validate. 1545 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1546 1547 Returns: 1548 The validated expression. 1549 """ 1550 if self.error_level != ErrorLevel.IGNORE: 1551 for error_message in expression.error_messages(args): 1552 self.raise_error(error_message) 1553 1554 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.