sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33if t.TYPE_CHECKING: 34 from sqlglot._typing import E, Lit 35 36 from sqlglot.optimizer.annotate_types import TypeAnnotator 37 38logger = logging.getLogger("sqlglot") 39 40 41def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 42 if not expression.find_ancestor(exp.From, exp.Join): 43 return self.values_sql(expression) 44 45 structs = [] 46 alias = expression.args.get("alias") 47 for tup in expression.find_all(exp.Tuple): 48 field_aliases = ( 49 alias.columns 50 if alias and alias.columns 51 else (f"_c{i}" for i in range(len(tup.expressions))) 52 ) 53 expressions = [ 54 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 55 for name, fld in zip(field_aliases, tup.expressions) 56 ] 57 structs.append(exp.Struct(expressions=expressions)) 58 59 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 60 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 61 return self.unnest_sql( 62 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 63 ) 64 65 66def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 67 this = expression.this 68 if isinstance(this, exp.Schema): 69 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 70 else: 71 this = self.sql(this) 72 return f"RETURNS {this}" 73 74 75def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 76 returns = expression.find(exp.ReturnsProperty) 77 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 78 expression.set("kind", "TABLE FUNCTION") 79 80 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 81 expression.set("expression", expression.expression.this) 82 83 return self.create_sql(expression) 84 85 86# https://issuetracker.google.com/issues/162294746 87# workaround for bigquery bug when grouping by an expression and then ordering 88# WITH x AS (SELECT 1 y) 89# SELECT y + 1 z 90# FROM x 91# GROUP BY x + 1 92# ORDER by z 93def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 94 if isinstance(expression, exp.Select): 95 group = expression.args.get("group") 96 order = expression.args.get("order") 97 98 if group and order: 99 aliases = { 100 select.this: select.args["alias"] 101 for select in expression.selects 102 if isinstance(select, exp.Alias) 103 } 104 105 for grouped in group.expressions: 106 if grouped.is_int: 107 continue 108 alias = aliases.get(grouped) 109 if alias: 110 grouped.replace(exp.column(alias)) 111 112 return expression 113 114 115def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 116 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 117 if isinstance(expression, exp.CTE) and expression.alias_column_names: 118 cte_query = expression.this 119 120 if cte_query.is_star: 121 logger.warning( 122 "Can't push down CTE column names for star queries. Run the query through" 123 " the optimizer or use 'qualify' to expand the star projections first." 124 ) 125 return expression 126 127 column_names = expression.alias_column_names 128 expression.args["alias"].set("columns", None) 129 130 for name, select in zip(column_names, cte_query.selects): 131 to_replace = select 132 133 if isinstance(select, exp.Alias): 134 select = select.this 135 136 # Inner aliases are shadowed by the CTE column names 137 to_replace.replace(exp.alias_(select, name)) 138 139 return expression 140 141 142def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 143 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 144 this.set("zone", seq_get(args, 2)) 145 return this 146 147 148def _build_timestamp(args: t.List) -> exp.Timestamp: 149 timestamp = exp.Timestamp.from_arg_list(args) 150 timestamp.set("with_tz", True) 151 return timestamp 152 153 154def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 155 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 156 return expr_type.from_arg_list(args) 157 158 159def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 160 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 161 arg = seq_get(args, 0) 162 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 163 164 165def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 166 return self.sql( 167 exp.Exists( 168 this=exp.select("1") 169 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 170 .where(exp.column("_col").eq(expression.right)) 171 ) 172 ) 173 174 175def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 176 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 177 178 179def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 180 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 181 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 182 unit = unit_to_var(expression) 183 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 184 185 186def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 187 scale = expression.args.get("scale") 188 timestamp = expression.this 189 190 if scale in (None, exp.UnixToTime.SECONDS): 191 return self.func("TIMESTAMP_SECONDS", timestamp) 192 if scale == exp.UnixToTime.MILLIS: 193 return self.func("TIMESTAMP_MILLIS", timestamp) 194 if scale == exp.UnixToTime.MICROS: 195 return self.func("TIMESTAMP_MICROS", timestamp) 196 197 unix_seconds = exp.cast( 198 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 199 ) 200 return self.func("TIMESTAMP_SECONDS", unix_seconds) 201 202 203def _build_time(args: t.List) -> exp.Func: 204 if len(args) == 1: 205 return exp.TsOrDsToTime(this=args[0]) 206 if len(args) == 2: 207 return exp.Time.from_arg_list(args) 208 return exp.TimeFromParts.from_arg_list(args) 209 210 211def _build_datetime(args: t.List) -> exp.Func: 212 if len(args) == 1: 213 return exp.TsOrDsToTimestamp.from_arg_list(args) 214 if len(args) == 2: 215 return exp.Datetime.from_arg_list(args) 216 return exp.TimestampFromParts.from_arg_list(args) 217 218 219def _build_regexp_extract(args: t.List) -> exp.RegexpExtract: 220 try: 221 group = re.compile(args[1].name).groups == 1 222 except re.error: 223 group = False 224 225 return exp.RegexpExtract( 226 this=seq_get(args, 0), 227 expression=seq_get(args, 1), 228 position=seq_get(args, 2), 229 occurrence=seq_get(args, 3), 230 group=exp.Literal.number(1) if group else None, 231 ) 232 233 234def _str_to_datetime_sql( 235 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 236) -> str: 237 this = self.sql(expression, "this") 238 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 239 240 if expression.args.get("safe"): 241 fmt = self.format_time( 242 expression, 243 self.dialect.INVERSE_FORMAT_MAPPING, 244 self.dialect.INVERSE_FORMAT_TRIE, 245 ) 246 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 247 248 fmt = self.format_time(expression) 249 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 250 251 252def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 253 """ 254 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 255 +---------+---------+---------+------------+---------+ 256 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 257 +---------+---------+---------+------------+---------+ 258 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 259 +---------+---------+---------+------------+---------+ 260 """ 261 self._annotate_args(expression) 262 263 this: exp.Expression = expression.this 264 265 self._set_type( 266 expression, 267 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 268 ) 269 return expression 270 271 272class BigQuery(Dialect): 273 WEEK_OFFSET = -1 274 UNNEST_COLUMN_ONLY = True 275 SUPPORTS_USER_DEFINED_TYPES = False 276 SUPPORTS_SEMI_ANTI_JOIN = False 277 LOG_BASE_FIRST = False 278 HEX_LOWERCASE = True 279 FORCE_EARLY_ALIAS_REF_EXPANSION = True 280 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 281 282 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 283 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 284 285 # bigquery udfs are case sensitive 286 NORMALIZE_FUNCTIONS = False 287 288 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 289 TIME_MAPPING = { 290 "%D": "%m/%d/%y", 291 "%E6S": "%S.%f", 292 "%e": "%-d", 293 } 294 295 FORMAT_MAPPING = { 296 "DD": "%d", 297 "MM": "%m", 298 "MON": "%b", 299 "MONTH": "%B", 300 "YYYY": "%Y", 301 "YY": "%y", 302 "HH": "%I", 303 "HH12": "%I", 304 "HH24": "%H", 305 "MI": "%M", 306 "SS": "%S", 307 "SSSSS": "%f", 308 "TZH": "%z", 309 } 310 311 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 312 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 313 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 314 315 # All set operations require either a DISTINCT or ALL specifier 316 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 317 318 ANNOTATORS = { 319 **Dialect.ANNOTATORS, 320 **{ 321 expr_type: lambda self, e: _annotate_math_functions(self, e) 322 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 323 }, 324 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 325 } 326 327 def normalize_identifier(self, expression: E) -> E: 328 if ( 329 isinstance(expression, exp.Identifier) 330 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 331 ): 332 parent = expression.parent 333 while isinstance(parent, exp.Dot): 334 parent = parent.parent 335 336 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 337 # by default. The following check uses a heuristic to detect tables based on whether 338 # they are qualified. This should generally be correct, because tables in BigQuery 339 # must be qualified with at least a dataset, unless @@dataset_id is set. 340 case_sensitive = ( 341 isinstance(parent, exp.UserDefinedFunction) 342 or ( 343 isinstance(parent, exp.Table) 344 and parent.db 345 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 346 ) 347 or expression.meta.get("is_table") 348 ) 349 if not case_sensitive: 350 expression.set("this", expression.this.lower()) 351 352 return expression 353 354 class Tokenizer(tokens.Tokenizer): 355 QUOTES = ["'", '"', '"""', "'''"] 356 COMMENTS = ["--", "#", ("/*", "*/")] 357 IDENTIFIERS = ["`"] 358 STRING_ESCAPES = ["\\"] 359 360 HEX_STRINGS = [("0x", ""), ("0X", "")] 361 362 BYTE_STRINGS = [ 363 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 364 ] 365 366 RAW_STRINGS = [ 367 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 368 ] 369 370 KEYWORDS = { 371 **tokens.Tokenizer.KEYWORDS, 372 "ANY TYPE": TokenType.VARIANT, 373 "BEGIN": TokenType.COMMAND, 374 "BEGIN TRANSACTION": TokenType.BEGIN, 375 "BYTEINT": TokenType.INT, 376 "BYTES": TokenType.BINARY, 377 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 378 "DATETIME": TokenType.TIMESTAMP, 379 "DECLARE": TokenType.COMMAND, 380 "ELSEIF": TokenType.COMMAND, 381 "EXCEPTION": TokenType.COMMAND, 382 "FLOAT64": TokenType.DOUBLE, 383 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 384 "MODEL": TokenType.MODEL, 385 "NOT DETERMINISTIC": TokenType.VOLATILE, 386 "RECORD": TokenType.STRUCT, 387 "TIMESTAMP": TokenType.TIMESTAMPTZ, 388 } 389 KEYWORDS.pop("DIV") 390 KEYWORDS.pop("VALUES") 391 KEYWORDS.pop("/*+") 392 393 class Parser(parser.Parser): 394 PREFIXED_PIVOT_COLUMNS = True 395 LOG_DEFAULTS_TO_LN = True 396 SUPPORTS_IMPLICIT_UNNEST = True 397 398 FUNCTIONS = { 399 **parser.Parser.FUNCTIONS, 400 "DATE": _build_date, 401 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 402 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 403 "DATE_TRUNC": lambda args: exp.DateTrunc( 404 unit=exp.Literal.string(str(seq_get(args, 1))), 405 this=seq_get(args, 0), 406 ), 407 "DATETIME": _build_datetime, 408 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 409 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 410 "DIV": binary_from_function(exp.IntDiv), 411 "FORMAT_DATE": lambda args: exp.TimeToStr( 412 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 413 ), 414 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 415 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 416 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 417 ), 418 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 419 "MD5": exp.MD5Digest.from_arg_list, 420 "TO_HEX": _build_to_hex, 421 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 422 [seq_get(args, 1), seq_get(args, 0)] 423 ), 424 "PARSE_TIMESTAMP": _build_parse_timestamp, 425 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 426 "REGEXP_EXTRACT": _build_regexp_extract, 427 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 428 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 429 "SPLIT": lambda args: exp.Split( 430 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 431 this=seq_get(args, 0), 432 expression=seq_get(args, 1) or exp.Literal.string(","), 433 ), 434 "TIME": _build_time, 435 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 436 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 437 "TIMESTAMP": _build_timestamp, 438 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 439 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 440 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 441 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 442 ), 443 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 444 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 445 ), 446 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 447 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 448 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 449 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 450 ), 451 } 452 453 FUNCTION_PARSERS = { 454 **parser.Parser.FUNCTION_PARSERS, 455 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 456 } 457 FUNCTION_PARSERS.pop("TRIM") 458 459 NO_PAREN_FUNCTIONS = { 460 **parser.Parser.NO_PAREN_FUNCTIONS, 461 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 462 } 463 464 NESTED_TYPE_TOKENS = { 465 *parser.Parser.NESTED_TYPE_TOKENS, 466 TokenType.TABLE, 467 } 468 469 PROPERTY_PARSERS = { 470 **parser.Parser.PROPERTY_PARSERS, 471 "NOT DETERMINISTIC": lambda self: self.expression( 472 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 473 ), 474 "OPTIONS": lambda self: self._parse_with_property(), 475 } 476 477 CONSTRAINT_PARSERS = { 478 **parser.Parser.CONSTRAINT_PARSERS, 479 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 480 } 481 482 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 483 RANGE_PARSERS.pop(TokenType.OVERLAPS) 484 485 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 486 487 STATEMENT_PARSERS = { 488 **parser.Parser.STATEMENT_PARSERS, 489 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 490 TokenType.END: lambda self: self._parse_as_command(self._prev), 491 TokenType.FOR: lambda self: self._parse_for_in(), 492 } 493 494 BRACKET_OFFSETS = { 495 "OFFSET": (0, False), 496 "ORDINAL": (1, False), 497 "SAFE_OFFSET": (0, True), 498 "SAFE_ORDINAL": (1, True), 499 } 500 501 def _parse_for_in(self) -> exp.ForIn: 502 this = self._parse_range() 503 self._match_text_seq("DO") 504 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 505 506 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 507 this = super()._parse_table_part(schema=schema) or self._parse_number() 508 509 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 510 if isinstance(this, exp.Identifier): 511 table_name = this.name 512 while self._match(TokenType.DASH, advance=False) and self._next: 513 text = "" 514 while self._is_connected() and self._curr.token_type != TokenType.DOT: 515 self._advance() 516 text += self._prev.text 517 table_name += text 518 519 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 520 elif isinstance(this, exp.Literal): 521 table_name = this.name 522 523 if self._is_connected() and self._parse_var(any_token=True): 524 table_name += self._prev.text 525 526 this = exp.Identifier(this=table_name, quoted=True) 527 528 return this 529 530 def _parse_table_parts( 531 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 532 ) -> exp.Table: 533 table = super()._parse_table_parts( 534 schema=schema, is_db_reference=is_db_reference, wildcard=True 535 ) 536 537 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 538 if not table.catalog: 539 if table.db: 540 parts = table.db.split(".") 541 if len(parts) == 2 and not table.args["db"].quoted: 542 table.set("catalog", exp.Identifier(this=parts[0])) 543 table.set("db", exp.Identifier(this=parts[1])) 544 else: 545 parts = table.name.split(".") 546 if len(parts) == 2 and not table.this.quoted: 547 table.set("db", exp.Identifier(this=parts[0])) 548 table.set("this", exp.Identifier(this=parts[1])) 549 550 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 551 catalog, db, this, *rest = ( 552 exp.to_identifier(p, quoted=True) 553 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 554 ) 555 556 if rest and this: 557 this = exp.Dot.build([this, *rest]) # type: ignore 558 559 table = exp.Table( 560 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 561 ) 562 table.meta["quoted_table"] = True 563 564 return table 565 566 def _parse_column(self) -> t.Optional[exp.Expression]: 567 column = super()._parse_column() 568 if isinstance(column, exp.Column): 569 parts = column.parts 570 if any("." in p.name for p in parts): 571 catalog, db, table, this, *rest = ( 572 exp.to_identifier(p, quoted=True) 573 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 574 ) 575 576 if rest and this: 577 this = exp.Dot.build([this, *rest]) # type: ignore 578 579 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 580 column.meta["quoted_column"] = True 581 582 return column 583 584 @t.overload 585 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 586 587 @t.overload 588 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 589 590 def _parse_json_object(self, agg=False): 591 json_object = super()._parse_json_object() 592 array_kv_pair = seq_get(json_object.expressions, 0) 593 594 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 595 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 596 if ( 597 array_kv_pair 598 and isinstance(array_kv_pair.this, exp.Array) 599 and isinstance(array_kv_pair.expression, exp.Array) 600 ): 601 keys = array_kv_pair.this.expressions 602 values = array_kv_pair.expression.expressions 603 604 json_object.set( 605 "expressions", 606 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 607 ) 608 609 return json_object 610 611 def _parse_bracket( 612 self, this: t.Optional[exp.Expression] = None 613 ) -> t.Optional[exp.Expression]: 614 bracket = super()._parse_bracket(this) 615 616 if this is bracket: 617 return bracket 618 619 if isinstance(bracket, exp.Bracket): 620 for expression in bracket.expressions: 621 name = expression.name.upper() 622 623 if name not in self.BRACKET_OFFSETS: 624 break 625 626 offset, safe = self.BRACKET_OFFSETS[name] 627 bracket.set("offset", offset) 628 bracket.set("safe", safe) 629 expression.replace(expression.expressions[0]) 630 631 return bracket 632 633 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 634 unnest = super()._parse_unnest(with_alias=with_alias) 635 636 if not unnest: 637 return None 638 639 unnest_expr = seq_get(unnest.expressions, 0) 640 if unnest_expr: 641 from sqlglot.optimizer.annotate_types import annotate_types 642 643 unnest_expr = annotate_types(unnest_expr) 644 645 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 646 # in contrast to other dialects such as DuckDB which flattens only the array by default 647 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 648 array_elem.is_type(exp.DataType.Type.STRUCT) 649 for array_elem in unnest_expr._type.expressions 650 ): 651 unnest.set("explode_array", True) 652 653 return unnest 654 655 class Generator(generator.Generator): 656 INTERVAL_ALLOWS_PLURAL_FORM = False 657 JOIN_HINTS = False 658 QUERY_HINTS = False 659 TABLE_HINTS = False 660 LIMIT_FETCH = "LIMIT" 661 RENAME_TABLE_WITH_DB = False 662 NVL2_SUPPORTED = False 663 UNNEST_WITH_ORDINALITY = False 664 COLLATE_IS_FUNC = True 665 LIMIT_ONLY_LITERALS = True 666 SUPPORTS_TABLE_ALIAS_COLUMNS = False 667 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 668 JSON_KEY_VALUE_PAIR_SEP = "," 669 NULL_ORDERING_SUPPORTED = False 670 IGNORE_NULLS_IN_FUNC = True 671 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 672 CAN_IMPLEMENT_ARRAY_ANY = True 673 SUPPORTS_TO_NUMBER = False 674 NAMED_PLACEHOLDER_TOKEN = "@" 675 HEX_FUNC = "TO_HEX" 676 WITH_PROPERTIES_PREFIX = "OPTIONS" 677 SUPPORTS_EXPLODING_PROJECTIONS = False 678 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 679 680 TRANSFORMS = { 681 **generator.Generator.TRANSFORMS, 682 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 683 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 684 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 685 exp.Array: inline_array_unless_query, 686 exp.ArrayContains: _array_contains_sql, 687 exp.ArrayFilter: filter_array_using_unnest, 688 exp.ArraySize: rename_func("ARRAY_LENGTH"), 689 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 690 exp.CollateProperty: lambda self, e: ( 691 f"DEFAULT COLLATE {self.sql(e, 'this')}" 692 if e.args.get("default") 693 else f"COLLATE {self.sql(e, 'this')}" 694 ), 695 exp.Commit: lambda *_: "COMMIT TRANSACTION", 696 exp.CountIf: rename_func("COUNTIF"), 697 exp.Create: _create_sql, 698 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 699 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 700 exp.DateDiff: lambda self, e: self.func( 701 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 702 ), 703 exp.DateFromParts: rename_func("DATE"), 704 exp.DateStrToDate: datestrtodate_sql, 705 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 706 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 707 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 708 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 709 exp.FromTimeZone: lambda self, e: self.func( 710 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 711 ), 712 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 713 exp.GroupConcat: rename_func("STRING_AGG"), 714 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 715 exp.If: if_sql(false_value="NULL"), 716 exp.ILike: no_ilike_sql, 717 exp.IntDiv: rename_func("DIV"), 718 exp.JSONFormat: rename_func("TO_JSON_STRING"), 719 exp.Max: max_or_greatest, 720 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 721 exp.MD5Digest: rename_func("MD5"), 722 exp.Min: min_or_least, 723 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 724 exp.RegexpExtract: lambda self, e: self.func( 725 "REGEXP_EXTRACT", 726 e.this, 727 e.expression, 728 e.args.get("position"), 729 e.args.get("occurrence"), 730 ), 731 exp.RegexpReplace: regexp_replace_sql, 732 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 733 exp.ReturnsProperty: _returnsproperty_sql, 734 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 735 exp.Select: transforms.preprocess( 736 [ 737 transforms.explode_to_unnest(), 738 transforms.unqualify_unnest, 739 transforms.eliminate_distinct_on, 740 _alias_ordered_group, 741 transforms.eliminate_semi_and_anti_joins, 742 ] 743 ), 744 exp.SHA: rename_func("SHA1"), 745 exp.SHA2: sha256_sql, 746 exp.StabilityProperty: lambda self, e: ( 747 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 748 ), 749 exp.StrToDate: _str_to_datetime_sql, 750 exp.StrToTime: _str_to_datetime_sql, 751 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 752 exp.TimeFromParts: rename_func("TIME"), 753 exp.TimestampFromParts: rename_func("DATETIME"), 754 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 755 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 756 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 757 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 758 exp.TimeStrToTime: timestrtotime_sql, 759 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 760 exp.TsOrDsAdd: _ts_or_ds_add_sql, 761 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 762 exp.TsOrDsToTime: rename_func("TIME"), 763 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 764 exp.Unhex: rename_func("FROM_HEX"), 765 exp.UnixDate: rename_func("UNIX_DATE"), 766 exp.UnixToTime: _unix_to_time_sql, 767 exp.Uuid: lambda *_: "GENERATE_UUID()", 768 exp.Values: _derived_table_values_to_unnest, 769 exp.VariancePop: rename_func("VAR_POP"), 770 } 771 772 SUPPORTED_JSON_PATH_PARTS = { 773 exp.JSONPathKey, 774 exp.JSONPathRoot, 775 exp.JSONPathSubscript, 776 } 777 778 TYPE_MAPPING = { 779 **generator.Generator.TYPE_MAPPING, 780 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 781 exp.DataType.Type.BIGINT: "INT64", 782 exp.DataType.Type.BINARY: "BYTES", 783 exp.DataType.Type.BOOLEAN: "BOOL", 784 exp.DataType.Type.CHAR: "STRING", 785 exp.DataType.Type.DECIMAL: "NUMERIC", 786 exp.DataType.Type.DOUBLE: "FLOAT64", 787 exp.DataType.Type.FLOAT: "FLOAT64", 788 exp.DataType.Type.INT: "INT64", 789 exp.DataType.Type.NCHAR: "STRING", 790 exp.DataType.Type.NVARCHAR: "STRING", 791 exp.DataType.Type.SMALLINT: "INT64", 792 exp.DataType.Type.TEXT: "STRING", 793 exp.DataType.Type.TIMESTAMP: "DATETIME", 794 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 795 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 796 exp.DataType.Type.TINYINT: "INT64", 797 exp.DataType.Type.ROWVERSION: "BYTES", 798 exp.DataType.Type.UUID: "STRING", 799 exp.DataType.Type.VARBINARY: "BYTES", 800 exp.DataType.Type.VARCHAR: "STRING", 801 exp.DataType.Type.VARIANT: "ANY TYPE", 802 } 803 804 PROPERTIES_LOCATION = { 805 **generator.Generator.PROPERTIES_LOCATION, 806 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 807 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 808 } 809 810 # WINDOW comes after QUALIFY 811 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 812 AFTER_HAVING_MODIFIER_TRANSFORMS = { 813 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 814 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 815 } 816 817 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 818 RESERVED_KEYWORDS = { 819 "all", 820 "and", 821 "any", 822 "array", 823 "as", 824 "asc", 825 "assert_rows_modified", 826 "at", 827 "between", 828 "by", 829 "case", 830 "cast", 831 "collate", 832 "contains", 833 "create", 834 "cross", 835 "cube", 836 "current", 837 "default", 838 "define", 839 "desc", 840 "distinct", 841 "else", 842 "end", 843 "enum", 844 "escape", 845 "except", 846 "exclude", 847 "exists", 848 "extract", 849 "false", 850 "fetch", 851 "following", 852 "for", 853 "from", 854 "full", 855 "group", 856 "grouping", 857 "groups", 858 "hash", 859 "having", 860 "if", 861 "ignore", 862 "in", 863 "inner", 864 "intersect", 865 "interval", 866 "into", 867 "is", 868 "join", 869 "lateral", 870 "left", 871 "like", 872 "limit", 873 "lookup", 874 "merge", 875 "natural", 876 "new", 877 "no", 878 "not", 879 "null", 880 "nulls", 881 "of", 882 "on", 883 "or", 884 "order", 885 "outer", 886 "over", 887 "partition", 888 "preceding", 889 "proto", 890 "qualify", 891 "range", 892 "recursive", 893 "respect", 894 "right", 895 "rollup", 896 "rows", 897 "select", 898 "set", 899 "some", 900 "struct", 901 "tablesample", 902 "then", 903 "to", 904 "treat", 905 "true", 906 "unbounded", 907 "union", 908 "unnest", 909 "using", 910 "when", 911 "where", 912 "window", 913 "with", 914 "within", 915 } 916 917 def mod_sql(self, expression: exp.Mod) -> str: 918 this = expression.this 919 expr = expression.expression 920 return self.func( 921 "MOD", 922 this.unnest() if isinstance(this, exp.Paren) else this, 923 expr.unnest() if isinstance(expr, exp.Paren) else expr, 924 ) 925 926 def column_parts(self, expression: exp.Column) -> str: 927 if expression.meta.get("quoted_column"): 928 # If a column reference is of the form `dataset.table`.name, we need 929 # to preserve the quoted table path, otherwise the reference breaks 930 table_parts = ".".join(p.name for p in expression.parts[:-1]) 931 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 932 return f"{table_path}.{self.sql(expression, 'this')}" 933 934 return super().column_parts(expression) 935 936 def table_parts(self, expression: exp.Table) -> str: 937 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 938 # we need to make sure the correct quoting is used in each case. 939 # 940 # For example, if there is a CTE x that clashes with a schema name, then the former will 941 # return the table y in that schema, whereas the latter will return the CTE's y column: 942 # 943 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 944 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 945 if expression.meta.get("quoted_table"): 946 table_parts = ".".join(p.name for p in expression.parts) 947 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 948 949 return super().table_parts(expression) 950 951 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 952 if isinstance(expression.this, exp.TsOrDsToTimestamp): 953 func_name = "FORMAT_DATETIME" 954 else: 955 func_name = "FORMAT_DATE" 956 this = ( 957 expression.this 958 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 959 else expression 960 ) 961 return self.func(func_name, self.format_time(expression), this.this) 962 963 def eq_sql(self, expression: exp.EQ) -> str: 964 # Operands of = cannot be NULL in BigQuery 965 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 966 if not isinstance(expression.parent, exp.Update): 967 return "NULL" 968 969 return self.binary(expression, "=") 970 971 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 972 parent = expression.parent 973 974 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 975 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 976 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 977 return self.func( 978 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 979 ) 980 981 return super().attimezone_sql(expression) 982 983 def trycast_sql(self, expression: exp.TryCast) -> str: 984 return self.cast_sql(expression, safe_prefix="SAFE_") 985 986 def bracket_sql(self, expression: exp.Bracket) -> str: 987 this = expression.this 988 expressions = expression.expressions 989 990 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 991 arg = expressions[0] 992 if arg.type is None: 993 from sqlglot.optimizer.annotate_types import annotate_types 994 995 arg = annotate_types(arg) 996 997 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 998 # BQ doesn't support bracket syntax with string values for structs 999 return f"{self.sql(this)}.{arg.name}" 1000 1001 expressions_sql = self.expressions(expression, flat=True) 1002 offset = expression.args.get("offset") 1003 1004 if offset == 0: 1005 expressions_sql = f"OFFSET({expressions_sql})" 1006 elif offset == 1: 1007 expressions_sql = f"ORDINAL({expressions_sql})" 1008 elif offset is not None: 1009 self.unsupported(f"Unsupported array offset: {offset}") 1010 1011 if expression.args.get("safe"): 1012 expressions_sql = f"SAFE_{expressions_sql}" 1013 1014 return f"{self.sql(this)}[{expressions_sql}]" 1015 1016 def in_unnest_op(self, expression: exp.Unnest) -> str: 1017 return self.sql(expression) 1018 1019 def version_sql(self, expression: exp.Version) -> str: 1020 if expression.name == "TIMESTAMP": 1021 expression.set("this", "SYSTEM_TIME") 1022 return super().version_sql(expression)
273class BigQuery(Dialect): 274 WEEK_OFFSET = -1 275 UNNEST_COLUMN_ONLY = True 276 SUPPORTS_USER_DEFINED_TYPES = False 277 SUPPORTS_SEMI_ANTI_JOIN = False 278 LOG_BASE_FIRST = False 279 HEX_LOWERCASE = True 280 FORCE_EARLY_ALIAS_REF_EXPANSION = True 281 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 282 283 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 284 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 285 286 # bigquery udfs are case sensitive 287 NORMALIZE_FUNCTIONS = False 288 289 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 290 TIME_MAPPING = { 291 "%D": "%m/%d/%y", 292 "%E6S": "%S.%f", 293 "%e": "%-d", 294 } 295 296 FORMAT_MAPPING = { 297 "DD": "%d", 298 "MM": "%m", 299 "MON": "%b", 300 "MONTH": "%B", 301 "YYYY": "%Y", 302 "YY": "%y", 303 "HH": "%I", 304 "HH12": "%I", 305 "HH24": "%H", 306 "MI": "%M", 307 "SS": "%S", 308 "SSSSS": "%f", 309 "TZH": "%z", 310 } 311 312 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 313 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 314 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 315 316 # All set operations require either a DISTINCT or ALL specifier 317 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 318 319 ANNOTATORS = { 320 **Dialect.ANNOTATORS, 321 **{ 322 expr_type: lambda self, e: _annotate_math_functions(self, e) 323 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 324 }, 325 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 326 } 327 328 def normalize_identifier(self, expression: E) -> E: 329 if ( 330 isinstance(expression, exp.Identifier) 331 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 332 ): 333 parent = expression.parent 334 while isinstance(parent, exp.Dot): 335 parent = parent.parent 336 337 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 338 # by default. The following check uses a heuristic to detect tables based on whether 339 # they are qualified. This should generally be correct, because tables in BigQuery 340 # must be qualified with at least a dataset, unless @@dataset_id is set. 341 case_sensitive = ( 342 isinstance(parent, exp.UserDefinedFunction) 343 or ( 344 isinstance(parent, exp.Table) 345 and parent.db 346 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 347 ) 348 or expression.meta.get("is_table") 349 ) 350 if not case_sensitive: 351 expression.set("this", expression.this.lower()) 352 353 return expression 354 355 class Tokenizer(tokens.Tokenizer): 356 QUOTES = ["'", '"', '"""', "'''"] 357 COMMENTS = ["--", "#", ("/*", "*/")] 358 IDENTIFIERS = ["`"] 359 STRING_ESCAPES = ["\\"] 360 361 HEX_STRINGS = [("0x", ""), ("0X", "")] 362 363 BYTE_STRINGS = [ 364 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 365 ] 366 367 RAW_STRINGS = [ 368 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 369 ] 370 371 KEYWORDS = { 372 **tokens.Tokenizer.KEYWORDS, 373 "ANY TYPE": TokenType.VARIANT, 374 "BEGIN": TokenType.COMMAND, 375 "BEGIN TRANSACTION": TokenType.BEGIN, 376 "BYTEINT": TokenType.INT, 377 "BYTES": TokenType.BINARY, 378 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 379 "DATETIME": TokenType.TIMESTAMP, 380 "DECLARE": TokenType.COMMAND, 381 "ELSEIF": TokenType.COMMAND, 382 "EXCEPTION": TokenType.COMMAND, 383 "FLOAT64": TokenType.DOUBLE, 384 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 385 "MODEL": TokenType.MODEL, 386 "NOT DETERMINISTIC": TokenType.VOLATILE, 387 "RECORD": TokenType.STRUCT, 388 "TIMESTAMP": TokenType.TIMESTAMPTZ, 389 } 390 KEYWORDS.pop("DIV") 391 KEYWORDS.pop("VALUES") 392 KEYWORDS.pop("/*+") 393 394 class Parser(parser.Parser): 395 PREFIXED_PIVOT_COLUMNS = True 396 LOG_DEFAULTS_TO_LN = True 397 SUPPORTS_IMPLICIT_UNNEST = True 398 399 FUNCTIONS = { 400 **parser.Parser.FUNCTIONS, 401 "DATE": _build_date, 402 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 403 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 404 "DATE_TRUNC": lambda args: exp.DateTrunc( 405 unit=exp.Literal.string(str(seq_get(args, 1))), 406 this=seq_get(args, 0), 407 ), 408 "DATETIME": _build_datetime, 409 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 410 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 411 "DIV": binary_from_function(exp.IntDiv), 412 "FORMAT_DATE": lambda args: exp.TimeToStr( 413 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 414 ), 415 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 416 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 417 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 418 ), 419 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 420 "MD5": exp.MD5Digest.from_arg_list, 421 "TO_HEX": _build_to_hex, 422 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 423 [seq_get(args, 1), seq_get(args, 0)] 424 ), 425 "PARSE_TIMESTAMP": _build_parse_timestamp, 426 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 427 "REGEXP_EXTRACT": _build_regexp_extract, 428 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 429 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 430 "SPLIT": lambda args: exp.Split( 431 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 432 this=seq_get(args, 0), 433 expression=seq_get(args, 1) or exp.Literal.string(","), 434 ), 435 "TIME": _build_time, 436 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 437 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 438 "TIMESTAMP": _build_timestamp, 439 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 440 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 441 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 442 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 443 ), 444 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 445 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 446 ), 447 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 448 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 449 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 450 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 451 ), 452 } 453 454 FUNCTION_PARSERS = { 455 **parser.Parser.FUNCTION_PARSERS, 456 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 457 } 458 FUNCTION_PARSERS.pop("TRIM") 459 460 NO_PAREN_FUNCTIONS = { 461 **parser.Parser.NO_PAREN_FUNCTIONS, 462 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 463 } 464 465 NESTED_TYPE_TOKENS = { 466 *parser.Parser.NESTED_TYPE_TOKENS, 467 TokenType.TABLE, 468 } 469 470 PROPERTY_PARSERS = { 471 **parser.Parser.PROPERTY_PARSERS, 472 "NOT DETERMINISTIC": lambda self: self.expression( 473 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 474 ), 475 "OPTIONS": lambda self: self._parse_with_property(), 476 } 477 478 CONSTRAINT_PARSERS = { 479 **parser.Parser.CONSTRAINT_PARSERS, 480 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 481 } 482 483 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 484 RANGE_PARSERS.pop(TokenType.OVERLAPS) 485 486 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 487 488 STATEMENT_PARSERS = { 489 **parser.Parser.STATEMENT_PARSERS, 490 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 491 TokenType.END: lambda self: self._parse_as_command(self._prev), 492 TokenType.FOR: lambda self: self._parse_for_in(), 493 } 494 495 BRACKET_OFFSETS = { 496 "OFFSET": (0, False), 497 "ORDINAL": (1, False), 498 "SAFE_OFFSET": (0, True), 499 "SAFE_ORDINAL": (1, True), 500 } 501 502 def _parse_for_in(self) -> exp.ForIn: 503 this = self._parse_range() 504 self._match_text_seq("DO") 505 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 506 507 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 508 this = super()._parse_table_part(schema=schema) or self._parse_number() 509 510 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 511 if isinstance(this, exp.Identifier): 512 table_name = this.name 513 while self._match(TokenType.DASH, advance=False) and self._next: 514 text = "" 515 while self._is_connected() and self._curr.token_type != TokenType.DOT: 516 self._advance() 517 text += self._prev.text 518 table_name += text 519 520 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 521 elif isinstance(this, exp.Literal): 522 table_name = this.name 523 524 if self._is_connected() and self._parse_var(any_token=True): 525 table_name += self._prev.text 526 527 this = exp.Identifier(this=table_name, quoted=True) 528 529 return this 530 531 def _parse_table_parts( 532 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 533 ) -> exp.Table: 534 table = super()._parse_table_parts( 535 schema=schema, is_db_reference=is_db_reference, wildcard=True 536 ) 537 538 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 539 if not table.catalog: 540 if table.db: 541 parts = table.db.split(".") 542 if len(parts) == 2 and not table.args["db"].quoted: 543 table.set("catalog", exp.Identifier(this=parts[0])) 544 table.set("db", exp.Identifier(this=parts[1])) 545 else: 546 parts = table.name.split(".") 547 if len(parts) == 2 and not table.this.quoted: 548 table.set("db", exp.Identifier(this=parts[0])) 549 table.set("this", exp.Identifier(this=parts[1])) 550 551 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 552 catalog, db, this, *rest = ( 553 exp.to_identifier(p, quoted=True) 554 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 555 ) 556 557 if rest and this: 558 this = exp.Dot.build([this, *rest]) # type: ignore 559 560 table = exp.Table( 561 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 562 ) 563 table.meta["quoted_table"] = True 564 565 return table 566 567 def _parse_column(self) -> t.Optional[exp.Expression]: 568 column = super()._parse_column() 569 if isinstance(column, exp.Column): 570 parts = column.parts 571 if any("." in p.name for p in parts): 572 catalog, db, table, this, *rest = ( 573 exp.to_identifier(p, quoted=True) 574 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 575 ) 576 577 if rest and this: 578 this = exp.Dot.build([this, *rest]) # type: ignore 579 580 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 581 column.meta["quoted_column"] = True 582 583 return column 584 585 @t.overload 586 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 587 588 @t.overload 589 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 590 591 def _parse_json_object(self, agg=False): 592 json_object = super()._parse_json_object() 593 array_kv_pair = seq_get(json_object.expressions, 0) 594 595 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 596 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 597 if ( 598 array_kv_pair 599 and isinstance(array_kv_pair.this, exp.Array) 600 and isinstance(array_kv_pair.expression, exp.Array) 601 ): 602 keys = array_kv_pair.this.expressions 603 values = array_kv_pair.expression.expressions 604 605 json_object.set( 606 "expressions", 607 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 608 ) 609 610 return json_object 611 612 def _parse_bracket( 613 self, this: t.Optional[exp.Expression] = None 614 ) -> t.Optional[exp.Expression]: 615 bracket = super()._parse_bracket(this) 616 617 if this is bracket: 618 return bracket 619 620 if isinstance(bracket, exp.Bracket): 621 for expression in bracket.expressions: 622 name = expression.name.upper() 623 624 if name not in self.BRACKET_OFFSETS: 625 break 626 627 offset, safe = self.BRACKET_OFFSETS[name] 628 bracket.set("offset", offset) 629 bracket.set("safe", safe) 630 expression.replace(expression.expressions[0]) 631 632 return bracket 633 634 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 635 unnest = super()._parse_unnest(with_alias=with_alias) 636 637 if not unnest: 638 return None 639 640 unnest_expr = seq_get(unnest.expressions, 0) 641 if unnest_expr: 642 from sqlglot.optimizer.annotate_types import annotate_types 643 644 unnest_expr = annotate_types(unnest_expr) 645 646 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 647 # in contrast to other dialects such as DuckDB which flattens only the array by default 648 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 649 array_elem.is_type(exp.DataType.Type.STRUCT) 650 for array_elem in unnest_expr._type.expressions 651 ): 652 unnest.set("explode_array", True) 653 654 return unnest 655 656 class Generator(generator.Generator): 657 INTERVAL_ALLOWS_PLURAL_FORM = False 658 JOIN_HINTS = False 659 QUERY_HINTS = False 660 TABLE_HINTS = False 661 LIMIT_FETCH = "LIMIT" 662 RENAME_TABLE_WITH_DB = False 663 NVL2_SUPPORTED = False 664 UNNEST_WITH_ORDINALITY = False 665 COLLATE_IS_FUNC = True 666 LIMIT_ONLY_LITERALS = True 667 SUPPORTS_TABLE_ALIAS_COLUMNS = False 668 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 669 JSON_KEY_VALUE_PAIR_SEP = "," 670 NULL_ORDERING_SUPPORTED = False 671 IGNORE_NULLS_IN_FUNC = True 672 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 673 CAN_IMPLEMENT_ARRAY_ANY = True 674 SUPPORTS_TO_NUMBER = False 675 NAMED_PLACEHOLDER_TOKEN = "@" 676 HEX_FUNC = "TO_HEX" 677 WITH_PROPERTIES_PREFIX = "OPTIONS" 678 SUPPORTS_EXPLODING_PROJECTIONS = False 679 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 680 681 TRANSFORMS = { 682 **generator.Generator.TRANSFORMS, 683 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 684 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 685 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 686 exp.Array: inline_array_unless_query, 687 exp.ArrayContains: _array_contains_sql, 688 exp.ArrayFilter: filter_array_using_unnest, 689 exp.ArraySize: rename_func("ARRAY_LENGTH"), 690 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 691 exp.CollateProperty: lambda self, e: ( 692 f"DEFAULT COLLATE {self.sql(e, 'this')}" 693 if e.args.get("default") 694 else f"COLLATE {self.sql(e, 'this')}" 695 ), 696 exp.Commit: lambda *_: "COMMIT TRANSACTION", 697 exp.CountIf: rename_func("COUNTIF"), 698 exp.Create: _create_sql, 699 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 700 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 701 exp.DateDiff: lambda self, e: self.func( 702 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 703 ), 704 exp.DateFromParts: rename_func("DATE"), 705 exp.DateStrToDate: datestrtodate_sql, 706 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 707 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 708 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 709 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 710 exp.FromTimeZone: lambda self, e: self.func( 711 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 712 ), 713 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 714 exp.GroupConcat: rename_func("STRING_AGG"), 715 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 716 exp.If: if_sql(false_value="NULL"), 717 exp.ILike: no_ilike_sql, 718 exp.IntDiv: rename_func("DIV"), 719 exp.JSONFormat: rename_func("TO_JSON_STRING"), 720 exp.Max: max_or_greatest, 721 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 722 exp.MD5Digest: rename_func("MD5"), 723 exp.Min: min_or_least, 724 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 725 exp.RegexpExtract: lambda self, e: self.func( 726 "REGEXP_EXTRACT", 727 e.this, 728 e.expression, 729 e.args.get("position"), 730 e.args.get("occurrence"), 731 ), 732 exp.RegexpReplace: regexp_replace_sql, 733 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 734 exp.ReturnsProperty: _returnsproperty_sql, 735 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 736 exp.Select: transforms.preprocess( 737 [ 738 transforms.explode_to_unnest(), 739 transforms.unqualify_unnest, 740 transforms.eliminate_distinct_on, 741 _alias_ordered_group, 742 transforms.eliminate_semi_and_anti_joins, 743 ] 744 ), 745 exp.SHA: rename_func("SHA1"), 746 exp.SHA2: sha256_sql, 747 exp.StabilityProperty: lambda self, e: ( 748 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 749 ), 750 exp.StrToDate: _str_to_datetime_sql, 751 exp.StrToTime: _str_to_datetime_sql, 752 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 753 exp.TimeFromParts: rename_func("TIME"), 754 exp.TimestampFromParts: rename_func("DATETIME"), 755 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 756 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 757 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 758 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 759 exp.TimeStrToTime: timestrtotime_sql, 760 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 761 exp.TsOrDsAdd: _ts_or_ds_add_sql, 762 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 763 exp.TsOrDsToTime: rename_func("TIME"), 764 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 765 exp.Unhex: rename_func("FROM_HEX"), 766 exp.UnixDate: rename_func("UNIX_DATE"), 767 exp.UnixToTime: _unix_to_time_sql, 768 exp.Uuid: lambda *_: "GENERATE_UUID()", 769 exp.Values: _derived_table_values_to_unnest, 770 exp.VariancePop: rename_func("VAR_POP"), 771 } 772 773 SUPPORTED_JSON_PATH_PARTS = { 774 exp.JSONPathKey, 775 exp.JSONPathRoot, 776 exp.JSONPathSubscript, 777 } 778 779 TYPE_MAPPING = { 780 **generator.Generator.TYPE_MAPPING, 781 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 782 exp.DataType.Type.BIGINT: "INT64", 783 exp.DataType.Type.BINARY: "BYTES", 784 exp.DataType.Type.BOOLEAN: "BOOL", 785 exp.DataType.Type.CHAR: "STRING", 786 exp.DataType.Type.DECIMAL: "NUMERIC", 787 exp.DataType.Type.DOUBLE: "FLOAT64", 788 exp.DataType.Type.FLOAT: "FLOAT64", 789 exp.DataType.Type.INT: "INT64", 790 exp.DataType.Type.NCHAR: "STRING", 791 exp.DataType.Type.NVARCHAR: "STRING", 792 exp.DataType.Type.SMALLINT: "INT64", 793 exp.DataType.Type.TEXT: "STRING", 794 exp.DataType.Type.TIMESTAMP: "DATETIME", 795 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 796 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 797 exp.DataType.Type.TINYINT: "INT64", 798 exp.DataType.Type.ROWVERSION: "BYTES", 799 exp.DataType.Type.UUID: "STRING", 800 exp.DataType.Type.VARBINARY: "BYTES", 801 exp.DataType.Type.VARCHAR: "STRING", 802 exp.DataType.Type.VARIANT: "ANY TYPE", 803 } 804 805 PROPERTIES_LOCATION = { 806 **generator.Generator.PROPERTIES_LOCATION, 807 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 808 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 809 } 810 811 # WINDOW comes after QUALIFY 812 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 813 AFTER_HAVING_MODIFIER_TRANSFORMS = { 814 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 815 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 816 } 817 818 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 819 RESERVED_KEYWORDS = { 820 "all", 821 "and", 822 "any", 823 "array", 824 "as", 825 "asc", 826 "assert_rows_modified", 827 "at", 828 "between", 829 "by", 830 "case", 831 "cast", 832 "collate", 833 "contains", 834 "create", 835 "cross", 836 "cube", 837 "current", 838 "default", 839 "define", 840 "desc", 841 "distinct", 842 "else", 843 "end", 844 "enum", 845 "escape", 846 "except", 847 "exclude", 848 "exists", 849 "extract", 850 "false", 851 "fetch", 852 "following", 853 "for", 854 "from", 855 "full", 856 "group", 857 "grouping", 858 "groups", 859 "hash", 860 "having", 861 "if", 862 "ignore", 863 "in", 864 "inner", 865 "intersect", 866 "interval", 867 "into", 868 "is", 869 "join", 870 "lateral", 871 "left", 872 "like", 873 "limit", 874 "lookup", 875 "merge", 876 "natural", 877 "new", 878 "no", 879 "not", 880 "null", 881 "nulls", 882 "of", 883 "on", 884 "or", 885 "order", 886 "outer", 887 "over", 888 "partition", 889 "preceding", 890 "proto", 891 "qualify", 892 "range", 893 "recursive", 894 "respect", 895 "right", 896 "rollup", 897 "rows", 898 "select", 899 "set", 900 "some", 901 "struct", 902 "tablesample", 903 "then", 904 "to", 905 "treat", 906 "true", 907 "unbounded", 908 "union", 909 "unnest", 910 "using", 911 "when", 912 "where", 913 "window", 914 "with", 915 "within", 916 } 917 918 def mod_sql(self, expression: exp.Mod) -> str: 919 this = expression.this 920 expr = expression.expression 921 return self.func( 922 "MOD", 923 this.unnest() if isinstance(this, exp.Paren) else this, 924 expr.unnest() if isinstance(expr, exp.Paren) else expr, 925 ) 926 927 def column_parts(self, expression: exp.Column) -> str: 928 if expression.meta.get("quoted_column"): 929 # If a column reference is of the form `dataset.table`.name, we need 930 # to preserve the quoted table path, otherwise the reference breaks 931 table_parts = ".".join(p.name for p in expression.parts[:-1]) 932 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 933 return f"{table_path}.{self.sql(expression, 'this')}" 934 935 return super().column_parts(expression) 936 937 def table_parts(self, expression: exp.Table) -> str: 938 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 939 # we need to make sure the correct quoting is used in each case. 940 # 941 # For example, if there is a CTE x that clashes with a schema name, then the former will 942 # return the table y in that schema, whereas the latter will return the CTE's y column: 943 # 944 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 945 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 946 if expression.meta.get("quoted_table"): 947 table_parts = ".".join(p.name for p in expression.parts) 948 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 949 950 return super().table_parts(expression) 951 952 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 953 if isinstance(expression.this, exp.TsOrDsToTimestamp): 954 func_name = "FORMAT_DATETIME" 955 else: 956 func_name = "FORMAT_DATE" 957 this = ( 958 expression.this 959 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 960 else expression 961 ) 962 return self.func(func_name, self.format_time(expression), this.this) 963 964 def eq_sql(self, expression: exp.EQ) -> str: 965 # Operands of = cannot be NULL in BigQuery 966 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 967 if not isinstance(expression.parent, exp.Update): 968 return "NULL" 969 970 return self.binary(expression, "=") 971 972 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 973 parent = expression.parent 974 975 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 976 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 977 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 978 return self.func( 979 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 980 ) 981 982 return super().attimezone_sql(expression) 983 984 def trycast_sql(self, expression: exp.TryCast) -> str: 985 return self.cast_sql(expression, safe_prefix="SAFE_") 986 987 def bracket_sql(self, expression: exp.Bracket) -> str: 988 this = expression.this 989 expressions = expression.expressions 990 991 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 992 arg = expressions[0] 993 if arg.type is None: 994 from sqlglot.optimizer.annotate_types import annotate_types 995 996 arg = annotate_types(arg) 997 998 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 999 # BQ doesn't support bracket syntax with string values for structs 1000 return f"{self.sql(this)}.{arg.name}" 1001 1002 expressions_sql = self.expressions(expression, flat=True) 1003 offset = expression.args.get("offset") 1004 1005 if offset == 0: 1006 expressions_sql = f"OFFSET({expressions_sql})" 1007 elif offset == 1: 1008 expressions_sql = f"ORDINAL({expressions_sql})" 1009 elif offset is not None: 1010 self.unsupported(f"Unsupported array offset: {offset}") 1011 1012 if expression.args.get("safe"): 1013 expressions_sql = f"SAFE_{expressions_sql}" 1014 1015 return f"{self.sql(this)}[{expressions_sql}]" 1016 1017 def in_unnest_op(self, expression: exp.Unnest) -> str: 1018 return self.sql(expression) 1019 1020 def version_sql(self, expression: exp.Version) -> str: 1021 if expression.name == "TIMESTAMP": 1022 expression.set("this", "SYSTEM_TIME") 1023 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
328 def normalize_identifier(self, expression: E) -> E: 329 if ( 330 isinstance(expression, exp.Identifier) 331 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 332 ): 333 parent = expression.parent 334 while isinstance(parent, exp.Dot): 335 parent = parent.parent 336 337 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 338 # by default. The following check uses a heuristic to detect tables based on whether 339 # they are qualified. This should generally be correct, because tables in BigQuery 340 # must be qualified with at least a dataset, unless @@dataset_id is set. 341 case_sensitive = ( 342 isinstance(parent, exp.UserDefinedFunction) 343 or ( 344 isinstance(parent, exp.Table) 345 and parent.db 346 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 347 ) 348 or expression.meta.get("is_table") 349 ) 350 if not case_sensitive: 351 expression.set("this", expression.this.lower()) 352 353 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
355 class Tokenizer(tokens.Tokenizer): 356 QUOTES = ["'", '"', '"""', "'''"] 357 COMMENTS = ["--", "#", ("/*", "*/")] 358 IDENTIFIERS = ["`"] 359 STRING_ESCAPES = ["\\"] 360 361 HEX_STRINGS = [("0x", ""), ("0X", "")] 362 363 BYTE_STRINGS = [ 364 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 365 ] 366 367 RAW_STRINGS = [ 368 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 369 ] 370 371 KEYWORDS = { 372 **tokens.Tokenizer.KEYWORDS, 373 "ANY TYPE": TokenType.VARIANT, 374 "BEGIN": TokenType.COMMAND, 375 "BEGIN TRANSACTION": TokenType.BEGIN, 376 "BYTEINT": TokenType.INT, 377 "BYTES": TokenType.BINARY, 378 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 379 "DATETIME": TokenType.TIMESTAMP, 380 "DECLARE": TokenType.COMMAND, 381 "ELSEIF": TokenType.COMMAND, 382 "EXCEPTION": TokenType.COMMAND, 383 "FLOAT64": TokenType.DOUBLE, 384 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 385 "MODEL": TokenType.MODEL, 386 "NOT DETERMINISTIC": TokenType.VOLATILE, 387 "RECORD": TokenType.STRUCT, 388 "TIMESTAMP": TokenType.TIMESTAMPTZ, 389 } 390 KEYWORDS.pop("DIV") 391 KEYWORDS.pop("VALUES") 392 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
394 class Parser(parser.Parser): 395 PREFIXED_PIVOT_COLUMNS = True 396 LOG_DEFAULTS_TO_LN = True 397 SUPPORTS_IMPLICIT_UNNEST = True 398 399 FUNCTIONS = { 400 **parser.Parser.FUNCTIONS, 401 "DATE": _build_date, 402 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 403 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 404 "DATE_TRUNC": lambda args: exp.DateTrunc( 405 unit=exp.Literal.string(str(seq_get(args, 1))), 406 this=seq_get(args, 0), 407 ), 408 "DATETIME": _build_datetime, 409 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 410 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 411 "DIV": binary_from_function(exp.IntDiv), 412 "FORMAT_DATE": lambda args: exp.TimeToStr( 413 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 414 ), 415 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 416 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 417 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 418 ), 419 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 420 "MD5": exp.MD5Digest.from_arg_list, 421 "TO_HEX": _build_to_hex, 422 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 423 [seq_get(args, 1), seq_get(args, 0)] 424 ), 425 "PARSE_TIMESTAMP": _build_parse_timestamp, 426 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 427 "REGEXP_EXTRACT": _build_regexp_extract, 428 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 429 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 430 "SPLIT": lambda args: exp.Split( 431 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 432 this=seq_get(args, 0), 433 expression=seq_get(args, 1) or exp.Literal.string(","), 434 ), 435 "TIME": _build_time, 436 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 437 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 438 "TIMESTAMP": _build_timestamp, 439 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 440 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 441 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 442 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 443 ), 444 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 445 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 446 ), 447 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 448 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 449 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 450 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 451 ), 452 } 453 454 FUNCTION_PARSERS = { 455 **parser.Parser.FUNCTION_PARSERS, 456 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 457 } 458 FUNCTION_PARSERS.pop("TRIM") 459 460 NO_PAREN_FUNCTIONS = { 461 **parser.Parser.NO_PAREN_FUNCTIONS, 462 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 463 } 464 465 NESTED_TYPE_TOKENS = { 466 *parser.Parser.NESTED_TYPE_TOKENS, 467 TokenType.TABLE, 468 } 469 470 PROPERTY_PARSERS = { 471 **parser.Parser.PROPERTY_PARSERS, 472 "NOT DETERMINISTIC": lambda self: self.expression( 473 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 474 ), 475 "OPTIONS": lambda self: self._parse_with_property(), 476 } 477 478 CONSTRAINT_PARSERS = { 479 **parser.Parser.CONSTRAINT_PARSERS, 480 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 481 } 482 483 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 484 RANGE_PARSERS.pop(TokenType.OVERLAPS) 485 486 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 487 488 STATEMENT_PARSERS = { 489 **parser.Parser.STATEMENT_PARSERS, 490 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 491 TokenType.END: lambda self: self._parse_as_command(self._prev), 492 TokenType.FOR: lambda self: self._parse_for_in(), 493 } 494 495 BRACKET_OFFSETS = { 496 "OFFSET": (0, False), 497 "ORDINAL": (1, False), 498 "SAFE_OFFSET": (0, True), 499 "SAFE_ORDINAL": (1, True), 500 } 501 502 def _parse_for_in(self) -> exp.ForIn: 503 this = self._parse_range() 504 self._match_text_seq("DO") 505 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 506 507 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 508 this = super()._parse_table_part(schema=schema) or self._parse_number() 509 510 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 511 if isinstance(this, exp.Identifier): 512 table_name = this.name 513 while self._match(TokenType.DASH, advance=False) and self._next: 514 text = "" 515 while self._is_connected() and self._curr.token_type != TokenType.DOT: 516 self._advance() 517 text += self._prev.text 518 table_name += text 519 520 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 521 elif isinstance(this, exp.Literal): 522 table_name = this.name 523 524 if self._is_connected() and self._parse_var(any_token=True): 525 table_name += self._prev.text 526 527 this = exp.Identifier(this=table_name, quoted=True) 528 529 return this 530 531 def _parse_table_parts( 532 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 533 ) -> exp.Table: 534 table = super()._parse_table_parts( 535 schema=schema, is_db_reference=is_db_reference, wildcard=True 536 ) 537 538 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 539 if not table.catalog: 540 if table.db: 541 parts = table.db.split(".") 542 if len(parts) == 2 and not table.args["db"].quoted: 543 table.set("catalog", exp.Identifier(this=parts[0])) 544 table.set("db", exp.Identifier(this=parts[1])) 545 else: 546 parts = table.name.split(".") 547 if len(parts) == 2 and not table.this.quoted: 548 table.set("db", exp.Identifier(this=parts[0])) 549 table.set("this", exp.Identifier(this=parts[1])) 550 551 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 552 catalog, db, this, *rest = ( 553 exp.to_identifier(p, quoted=True) 554 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 555 ) 556 557 if rest and this: 558 this = exp.Dot.build([this, *rest]) # type: ignore 559 560 table = exp.Table( 561 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 562 ) 563 table.meta["quoted_table"] = True 564 565 return table 566 567 def _parse_column(self) -> t.Optional[exp.Expression]: 568 column = super()._parse_column() 569 if isinstance(column, exp.Column): 570 parts = column.parts 571 if any("." in p.name for p in parts): 572 catalog, db, table, this, *rest = ( 573 exp.to_identifier(p, quoted=True) 574 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 575 ) 576 577 if rest and this: 578 this = exp.Dot.build([this, *rest]) # type: ignore 579 580 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 581 column.meta["quoted_column"] = True 582 583 return column 584 585 @t.overload 586 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 587 588 @t.overload 589 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 590 591 def _parse_json_object(self, agg=False): 592 json_object = super()._parse_json_object() 593 array_kv_pair = seq_get(json_object.expressions, 0) 594 595 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 596 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 597 if ( 598 array_kv_pair 599 and isinstance(array_kv_pair.this, exp.Array) 600 and isinstance(array_kv_pair.expression, exp.Array) 601 ): 602 keys = array_kv_pair.this.expressions 603 values = array_kv_pair.expression.expressions 604 605 json_object.set( 606 "expressions", 607 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 608 ) 609 610 return json_object 611 612 def _parse_bracket( 613 self, this: t.Optional[exp.Expression] = None 614 ) -> t.Optional[exp.Expression]: 615 bracket = super()._parse_bracket(this) 616 617 if this is bracket: 618 return bracket 619 620 if isinstance(bracket, exp.Bracket): 621 for expression in bracket.expressions: 622 name = expression.name.upper() 623 624 if name not in self.BRACKET_OFFSETS: 625 break 626 627 offset, safe = self.BRACKET_OFFSETS[name] 628 bracket.set("offset", offset) 629 bracket.set("safe", safe) 630 expression.replace(expression.expressions[0]) 631 632 return bracket 633 634 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 635 unnest = super()._parse_unnest(with_alias=with_alias) 636 637 if not unnest: 638 return None 639 640 unnest_expr = seq_get(unnest.expressions, 0) 641 if unnest_expr: 642 from sqlglot.optimizer.annotate_types import annotate_types 643 644 unnest_expr = annotate_types(unnest_expr) 645 646 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 647 # in contrast to other dialects such as DuckDB which flattens only the array by default 648 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 649 array_elem.is_type(exp.DataType.Type.STRUCT) 650 for array_elem in unnest_expr._type.expressions 651 ): 652 unnest.set("explode_array", True) 653 654 return unnest
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
656 class Generator(generator.Generator): 657 INTERVAL_ALLOWS_PLURAL_FORM = False 658 JOIN_HINTS = False 659 QUERY_HINTS = False 660 TABLE_HINTS = False 661 LIMIT_FETCH = "LIMIT" 662 RENAME_TABLE_WITH_DB = False 663 NVL2_SUPPORTED = False 664 UNNEST_WITH_ORDINALITY = False 665 COLLATE_IS_FUNC = True 666 LIMIT_ONLY_LITERALS = True 667 SUPPORTS_TABLE_ALIAS_COLUMNS = False 668 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 669 JSON_KEY_VALUE_PAIR_SEP = "," 670 NULL_ORDERING_SUPPORTED = False 671 IGNORE_NULLS_IN_FUNC = True 672 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 673 CAN_IMPLEMENT_ARRAY_ANY = True 674 SUPPORTS_TO_NUMBER = False 675 NAMED_PLACEHOLDER_TOKEN = "@" 676 HEX_FUNC = "TO_HEX" 677 WITH_PROPERTIES_PREFIX = "OPTIONS" 678 SUPPORTS_EXPLODING_PROJECTIONS = False 679 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 680 681 TRANSFORMS = { 682 **generator.Generator.TRANSFORMS, 683 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 684 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 685 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 686 exp.Array: inline_array_unless_query, 687 exp.ArrayContains: _array_contains_sql, 688 exp.ArrayFilter: filter_array_using_unnest, 689 exp.ArraySize: rename_func("ARRAY_LENGTH"), 690 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 691 exp.CollateProperty: lambda self, e: ( 692 f"DEFAULT COLLATE {self.sql(e, 'this')}" 693 if e.args.get("default") 694 else f"COLLATE {self.sql(e, 'this')}" 695 ), 696 exp.Commit: lambda *_: "COMMIT TRANSACTION", 697 exp.CountIf: rename_func("COUNTIF"), 698 exp.Create: _create_sql, 699 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 700 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 701 exp.DateDiff: lambda self, e: self.func( 702 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 703 ), 704 exp.DateFromParts: rename_func("DATE"), 705 exp.DateStrToDate: datestrtodate_sql, 706 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 707 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 708 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 709 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 710 exp.FromTimeZone: lambda self, e: self.func( 711 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 712 ), 713 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 714 exp.GroupConcat: rename_func("STRING_AGG"), 715 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 716 exp.If: if_sql(false_value="NULL"), 717 exp.ILike: no_ilike_sql, 718 exp.IntDiv: rename_func("DIV"), 719 exp.JSONFormat: rename_func("TO_JSON_STRING"), 720 exp.Max: max_or_greatest, 721 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 722 exp.MD5Digest: rename_func("MD5"), 723 exp.Min: min_or_least, 724 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 725 exp.RegexpExtract: lambda self, e: self.func( 726 "REGEXP_EXTRACT", 727 e.this, 728 e.expression, 729 e.args.get("position"), 730 e.args.get("occurrence"), 731 ), 732 exp.RegexpReplace: regexp_replace_sql, 733 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 734 exp.ReturnsProperty: _returnsproperty_sql, 735 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 736 exp.Select: transforms.preprocess( 737 [ 738 transforms.explode_to_unnest(), 739 transforms.unqualify_unnest, 740 transforms.eliminate_distinct_on, 741 _alias_ordered_group, 742 transforms.eliminate_semi_and_anti_joins, 743 ] 744 ), 745 exp.SHA: rename_func("SHA1"), 746 exp.SHA2: sha256_sql, 747 exp.StabilityProperty: lambda self, e: ( 748 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 749 ), 750 exp.StrToDate: _str_to_datetime_sql, 751 exp.StrToTime: _str_to_datetime_sql, 752 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 753 exp.TimeFromParts: rename_func("TIME"), 754 exp.TimestampFromParts: rename_func("DATETIME"), 755 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 756 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 757 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 758 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 759 exp.TimeStrToTime: timestrtotime_sql, 760 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 761 exp.TsOrDsAdd: _ts_or_ds_add_sql, 762 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 763 exp.TsOrDsToTime: rename_func("TIME"), 764 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 765 exp.Unhex: rename_func("FROM_HEX"), 766 exp.UnixDate: rename_func("UNIX_DATE"), 767 exp.UnixToTime: _unix_to_time_sql, 768 exp.Uuid: lambda *_: "GENERATE_UUID()", 769 exp.Values: _derived_table_values_to_unnest, 770 exp.VariancePop: rename_func("VAR_POP"), 771 } 772 773 SUPPORTED_JSON_PATH_PARTS = { 774 exp.JSONPathKey, 775 exp.JSONPathRoot, 776 exp.JSONPathSubscript, 777 } 778 779 TYPE_MAPPING = { 780 **generator.Generator.TYPE_MAPPING, 781 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 782 exp.DataType.Type.BIGINT: "INT64", 783 exp.DataType.Type.BINARY: "BYTES", 784 exp.DataType.Type.BOOLEAN: "BOOL", 785 exp.DataType.Type.CHAR: "STRING", 786 exp.DataType.Type.DECIMAL: "NUMERIC", 787 exp.DataType.Type.DOUBLE: "FLOAT64", 788 exp.DataType.Type.FLOAT: "FLOAT64", 789 exp.DataType.Type.INT: "INT64", 790 exp.DataType.Type.NCHAR: "STRING", 791 exp.DataType.Type.NVARCHAR: "STRING", 792 exp.DataType.Type.SMALLINT: "INT64", 793 exp.DataType.Type.TEXT: "STRING", 794 exp.DataType.Type.TIMESTAMP: "DATETIME", 795 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 796 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 797 exp.DataType.Type.TINYINT: "INT64", 798 exp.DataType.Type.ROWVERSION: "BYTES", 799 exp.DataType.Type.UUID: "STRING", 800 exp.DataType.Type.VARBINARY: "BYTES", 801 exp.DataType.Type.VARCHAR: "STRING", 802 exp.DataType.Type.VARIANT: "ANY TYPE", 803 } 804 805 PROPERTIES_LOCATION = { 806 **generator.Generator.PROPERTIES_LOCATION, 807 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 808 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 809 } 810 811 # WINDOW comes after QUALIFY 812 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 813 AFTER_HAVING_MODIFIER_TRANSFORMS = { 814 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 815 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 816 } 817 818 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 819 RESERVED_KEYWORDS = { 820 "all", 821 "and", 822 "any", 823 "array", 824 "as", 825 "asc", 826 "assert_rows_modified", 827 "at", 828 "between", 829 "by", 830 "case", 831 "cast", 832 "collate", 833 "contains", 834 "create", 835 "cross", 836 "cube", 837 "current", 838 "default", 839 "define", 840 "desc", 841 "distinct", 842 "else", 843 "end", 844 "enum", 845 "escape", 846 "except", 847 "exclude", 848 "exists", 849 "extract", 850 "false", 851 "fetch", 852 "following", 853 "for", 854 "from", 855 "full", 856 "group", 857 "grouping", 858 "groups", 859 "hash", 860 "having", 861 "if", 862 "ignore", 863 "in", 864 "inner", 865 "intersect", 866 "interval", 867 "into", 868 "is", 869 "join", 870 "lateral", 871 "left", 872 "like", 873 "limit", 874 "lookup", 875 "merge", 876 "natural", 877 "new", 878 "no", 879 "not", 880 "null", 881 "nulls", 882 "of", 883 "on", 884 "or", 885 "order", 886 "outer", 887 "over", 888 "partition", 889 "preceding", 890 "proto", 891 "qualify", 892 "range", 893 "recursive", 894 "respect", 895 "right", 896 "rollup", 897 "rows", 898 "select", 899 "set", 900 "some", 901 "struct", 902 "tablesample", 903 "then", 904 "to", 905 "treat", 906 "true", 907 "unbounded", 908 "union", 909 "unnest", 910 "using", 911 "when", 912 "where", 913 "window", 914 "with", 915 "within", 916 } 917 918 def mod_sql(self, expression: exp.Mod) -> str: 919 this = expression.this 920 expr = expression.expression 921 return self.func( 922 "MOD", 923 this.unnest() if isinstance(this, exp.Paren) else this, 924 expr.unnest() if isinstance(expr, exp.Paren) else expr, 925 ) 926 927 def column_parts(self, expression: exp.Column) -> str: 928 if expression.meta.get("quoted_column"): 929 # If a column reference is of the form `dataset.table`.name, we need 930 # to preserve the quoted table path, otherwise the reference breaks 931 table_parts = ".".join(p.name for p in expression.parts[:-1]) 932 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 933 return f"{table_path}.{self.sql(expression, 'this')}" 934 935 return super().column_parts(expression) 936 937 def table_parts(self, expression: exp.Table) -> str: 938 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 939 # we need to make sure the correct quoting is used in each case. 940 # 941 # For example, if there is a CTE x that clashes with a schema name, then the former will 942 # return the table y in that schema, whereas the latter will return the CTE's y column: 943 # 944 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 945 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 946 if expression.meta.get("quoted_table"): 947 table_parts = ".".join(p.name for p in expression.parts) 948 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 949 950 return super().table_parts(expression) 951 952 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 953 if isinstance(expression.this, exp.TsOrDsToTimestamp): 954 func_name = "FORMAT_DATETIME" 955 else: 956 func_name = "FORMAT_DATE" 957 this = ( 958 expression.this 959 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 960 else expression 961 ) 962 return self.func(func_name, self.format_time(expression), this.this) 963 964 def eq_sql(self, expression: exp.EQ) -> str: 965 # Operands of = cannot be NULL in BigQuery 966 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 967 if not isinstance(expression.parent, exp.Update): 968 return "NULL" 969 970 return self.binary(expression, "=") 971 972 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 973 parent = expression.parent 974 975 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 976 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 977 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 978 return self.func( 979 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 980 ) 981 982 return super().attimezone_sql(expression) 983 984 def trycast_sql(self, expression: exp.TryCast) -> str: 985 return self.cast_sql(expression, safe_prefix="SAFE_") 986 987 def bracket_sql(self, expression: exp.Bracket) -> str: 988 this = expression.this 989 expressions = expression.expressions 990 991 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 992 arg = expressions[0] 993 if arg.type is None: 994 from sqlglot.optimizer.annotate_types import annotate_types 995 996 arg = annotate_types(arg) 997 998 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 999 # BQ doesn't support bracket syntax with string values for structs 1000 return f"{self.sql(this)}.{arg.name}" 1001 1002 expressions_sql = self.expressions(expression, flat=True) 1003 offset = expression.args.get("offset") 1004 1005 if offset == 0: 1006 expressions_sql = f"OFFSET({expressions_sql})" 1007 elif offset == 1: 1008 expressions_sql = f"ORDINAL({expressions_sql})" 1009 elif offset is not None: 1010 self.unsupported(f"Unsupported array offset: {offset}") 1011 1012 if expression.args.get("safe"): 1013 expressions_sql = f"SAFE_{expressions_sql}" 1014 1015 return f"{self.sql(this)}[{expressions_sql}]" 1016 1017 def in_unnest_op(self, expression: exp.Unnest) -> str: 1018 return self.sql(expression) 1019 1020 def version_sql(self, expression: exp.Version) -> str: 1021 if expression.name == "TIMESTAMP": 1022 expression.set("this", "SYSTEM_TIME") 1023 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
927 def column_parts(self, expression: exp.Column) -> str: 928 if expression.meta.get("quoted_column"): 929 # If a column reference is of the form `dataset.table`.name, we need 930 # to preserve the quoted table path, otherwise the reference breaks 931 table_parts = ".".join(p.name for p in expression.parts[:-1]) 932 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 933 return f"{table_path}.{self.sql(expression, 'this')}" 934 935 return super().column_parts(expression)
937 def table_parts(self, expression: exp.Table) -> str: 938 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 939 # we need to make sure the correct quoting is used in each case. 940 # 941 # For example, if there is a CTE x that clashes with a schema name, then the former will 942 # return the table y in that schema, whereas the latter will return the CTE's y column: 943 # 944 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 945 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 946 if expression.meta.get("quoted_table"): 947 table_parts = ".".join(p.name for p in expression.parts) 948 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 949 950 return super().table_parts(expression)
952 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 953 if isinstance(expression.this, exp.TsOrDsToTimestamp): 954 func_name = "FORMAT_DATETIME" 955 else: 956 func_name = "FORMAT_DATE" 957 this = ( 958 expression.this 959 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 960 else expression 961 ) 962 return self.func(func_name, self.format_time(expression), this.this)
972 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 973 parent = expression.parent 974 975 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 976 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 977 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 978 return self.func( 979 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 980 ) 981 982 return super().attimezone_sql(expression)
987 def bracket_sql(self, expression: exp.Bracket) -> str: 988 this = expression.this 989 expressions = expression.expressions 990 991 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 992 arg = expressions[0] 993 if arg.type is None: 994 from sqlglot.optimizer.annotate_types import annotate_types 995 996 arg = annotate_types(arg) 997 998 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 999 # BQ doesn't support bracket syntax with string values for structs 1000 return f"{self.sql(this)}.{arg.name}" 1001 1002 expressions_sql = self.expressions(expression, flat=True) 1003 offset = expression.args.get("offset") 1004 1005 if offset == 0: 1006 expressions_sql = f"OFFSET({expressions_sql})" 1007 elif offset == 1: 1008 expressions_sql = f"ORDINAL({expressions_sql})" 1009 elif offset is not None: 1010 self.unsupported(f"Unsupported array offset: {offset}") 1011 1012 if expression.args.get("safe"): 1013 expressions_sql = f"SAFE_{expressions_sql}" 1014 1015 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql