sqlglot.dialects.redshift
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, transforms 6from sqlglot.dialects.dialect import ( 7 NormalizationStrategy, 8 concat_to_dpipe_sql, 9 concat_ws_to_dpipe_sql, 10 date_delta_sql, 11 generatedasidentitycolumnconstraint_sql, 12 json_extract_segments, 13 no_tablesample_sql, 14 rename_func, 15) 16from sqlglot.dialects.postgres import Postgres 17from sqlglot.helper import seq_get 18from sqlglot.tokens import TokenType 19 20if t.TYPE_CHECKING: 21 from sqlglot._typing import E 22 23 24def _build_date_delta(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 25 def _builder(args: t.List) -> E: 26 expr = expr_type(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 27 if expr_type is exp.TsOrDsAdd: 28 expr.set("return_type", exp.DataType.build("TIMESTAMP")) 29 30 return expr 31 32 return _builder 33 34 35class Redshift(Postgres): 36 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 37 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 38 39 SUPPORTS_USER_DEFINED_TYPES = False 40 INDEX_OFFSET = 0 41 42 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 43 TIME_MAPPING = { 44 **Postgres.TIME_MAPPING, 45 "MON": "%b", 46 "HH": "%H", 47 } 48 49 class Parser(Postgres.Parser): 50 FUNCTIONS = { 51 **Postgres.Parser.FUNCTIONS, 52 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 53 this=seq_get(args, 0), 54 expression=seq_get(args, 1), 55 unit=exp.var("month"), 56 return_type=exp.DataType.build("TIMESTAMP"), 57 ), 58 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 59 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 60 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 61 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 62 "GETDATE": exp.CurrentTimestamp.from_arg_list, 63 "LISTAGG": exp.GroupConcat.from_arg_list, 64 "STRTOL": exp.FromBase.from_arg_list, 65 } 66 67 NO_PAREN_FUNCTION_PARSERS = { 68 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 69 "APPROXIMATE": lambda self: self._parse_approximate_count(), 70 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 71 } 72 73 SUPPORTS_IMPLICIT_UNNEST = True 74 75 def _parse_table( 76 self, 77 schema: bool = False, 78 joins: bool = False, 79 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 80 parse_bracket: bool = False, 81 is_db_reference: bool = False, 82 ) -> t.Optional[exp.Expression]: 83 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 84 unpivot = self._match(TokenType.UNPIVOT) 85 table = super()._parse_table( 86 schema=schema, 87 joins=joins, 88 alias_tokens=alias_tokens, 89 parse_bracket=parse_bracket, 90 is_db_reference=is_db_reference, 91 ) 92 93 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 94 95 def _parse_convert( 96 self, strict: bool, safe: t.Optional[bool] = None 97 ) -> t.Optional[exp.Expression]: 98 to = self._parse_types() 99 self._match(TokenType.COMMA) 100 this = self._parse_bitwise() 101 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 102 103 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 104 index = self._index - 1 105 func = self._parse_function() 106 107 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 108 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 109 self._retreat(index) 110 return None 111 112 class Tokenizer(Postgres.Tokenizer): 113 BIT_STRINGS = [] 114 HEX_STRINGS = [] 115 STRING_ESCAPES = ["\\", "'"] 116 117 KEYWORDS = { 118 **Postgres.Tokenizer.KEYWORDS, 119 "HLLSKETCH": TokenType.HLLSKETCH, 120 "SUPER": TokenType.SUPER, 121 "TOP": TokenType.TOP, 122 "UNLOAD": TokenType.COMMAND, 123 "VARBYTE": TokenType.VARBINARY, 124 } 125 KEYWORDS.pop("VALUES") 126 127 # Redshift allows # to appear as a table identifier prefix 128 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 129 SINGLE_TOKENS.pop("#") 130 131 class Generator(Postgres.Generator): 132 LOCKING_READS_SUPPORTED = False 133 QUERY_HINTS = False 134 VALUES_AS_TABLE = False 135 TZ_TO_WITH_TIME_ZONE = True 136 NVL2_SUPPORTED = True 137 LAST_DAY_SUPPORTS_DATE_PART = False 138 CAN_IMPLEMENT_ARRAY_ANY = False 139 MULTI_ARG_DISTINCT = True 140 141 TYPE_MAPPING = { 142 **Postgres.Generator.TYPE_MAPPING, 143 exp.DataType.Type.BINARY: "VARBYTE", 144 exp.DataType.Type.INT: "INTEGER", 145 exp.DataType.Type.TIMETZ: "TIME", 146 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 147 exp.DataType.Type.VARBINARY: "VARBYTE", 148 } 149 150 TRANSFORMS = { 151 **Postgres.Generator.TRANSFORMS, 152 exp.Concat: concat_to_dpipe_sql, 153 exp.ConcatWs: concat_ws_to_dpipe_sql, 154 exp.ApproxDistinct: lambda self, 155 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 156 exp.CurrentTimestamp: lambda self, e: ( 157 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 158 ), 159 exp.DateAdd: date_delta_sql("DATEADD"), 160 exp.DateDiff: date_delta_sql("DATEDIFF"), 161 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 162 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 163 exp.FromBase: rename_func("STRTOL"), 164 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 165 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 166 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 167 exp.GroupConcat: rename_func("LISTAGG"), 168 exp.ParseJSON: rename_func("JSON_PARSE"), 169 exp.Select: transforms.preprocess( 170 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 171 ), 172 exp.SortKeyProperty: lambda self, 173 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 174 exp.StartsWith: lambda self, 175 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 176 exp.TableSample: no_tablesample_sql, 177 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 178 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 179 exp.UnixToTime: lambda self, 180 e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')", 181 } 182 183 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 184 TRANSFORMS.pop(exp.Pivot) 185 186 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 187 TRANSFORMS.pop(exp.Pow) 188 189 # Redshift supports ANY_VALUE(..) 190 TRANSFORMS.pop(exp.AnyValue) 191 192 # Redshift supports LAST_DAY(..) 193 TRANSFORMS.pop(exp.LastDay) 194 195 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 196 197 def unnest_sql(self, expression: exp.Unnest) -> str: 198 args = expression.expressions 199 num_args = len(args) 200 201 if num_args > 1: 202 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 203 return "" 204 205 arg = self.sql(seq_get(args, 0)) 206 alias = self.expressions(expression.args.get("alias"), key="columns") 207 return f"{arg} AS {alias}" if alias else arg 208 209 def with_properties(self, properties: exp.Properties) -> str: 210 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 211 return self.properties(properties, prefix=" ", suffix="") 212 213 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 214 if expression.is_type(exp.DataType.Type.JSON): 215 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 216 return self.sql(expression, "this") 217 218 return super().cast_sql(expression, safe_prefix=safe_prefix) 219 220 def datatype_sql(self, expression: exp.DataType) -> str: 221 """ 222 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 223 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 224 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 225 `TEXT` to `VARCHAR`. 226 """ 227 if expression.is_type("text"): 228 expression.set("this", exp.DataType.Type.VARCHAR) 229 precision = expression.args.get("expressions") 230 231 if not precision: 232 expression.append("expressions", exp.var("MAX")) 233 234 return super().datatype_sql(expression)
36class Redshift(Postgres): 37 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 38 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 39 40 SUPPORTS_USER_DEFINED_TYPES = False 41 INDEX_OFFSET = 0 42 43 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 44 TIME_MAPPING = { 45 **Postgres.TIME_MAPPING, 46 "MON": "%b", 47 "HH": "%H", 48 } 49 50 class Parser(Postgres.Parser): 51 FUNCTIONS = { 52 **Postgres.Parser.FUNCTIONS, 53 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 54 this=seq_get(args, 0), 55 expression=seq_get(args, 1), 56 unit=exp.var("month"), 57 return_type=exp.DataType.build("TIMESTAMP"), 58 ), 59 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 60 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 61 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 62 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 63 "GETDATE": exp.CurrentTimestamp.from_arg_list, 64 "LISTAGG": exp.GroupConcat.from_arg_list, 65 "STRTOL": exp.FromBase.from_arg_list, 66 } 67 68 NO_PAREN_FUNCTION_PARSERS = { 69 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 70 "APPROXIMATE": lambda self: self._parse_approximate_count(), 71 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 72 } 73 74 SUPPORTS_IMPLICIT_UNNEST = True 75 76 def _parse_table( 77 self, 78 schema: bool = False, 79 joins: bool = False, 80 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 81 parse_bracket: bool = False, 82 is_db_reference: bool = False, 83 ) -> t.Optional[exp.Expression]: 84 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 85 unpivot = self._match(TokenType.UNPIVOT) 86 table = super()._parse_table( 87 schema=schema, 88 joins=joins, 89 alias_tokens=alias_tokens, 90 parse_bracket=parse_bracket, 91 is_db_reference=is_db_reference, 92 ) 93 94 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 95 96 def _parse_convert( 97 self, strict: bool, safe: t.Optional[bool] = None 98 ) -> t.Optional[exp.Expression]: 99 to = self._parse_types() 100 self._match(TokenType.COMMA) 101 this = self._parse_bitwise() 102 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 103 104 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 105 index = self._index - 1 106 func = self._parse_function() 107 108 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 109 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 110 self._retreat(index) 111 return None 112 113 class Tokenizer(Postgres.Tokenizer): 114 BIT_STRINGS = [] 115 HEX_STRINGS = [] 116 STRING_ESCAPES = ["\\", "'"] 117 118 KEYWORDS = { 119 **Postgres.Tokenizer.KEYWORDS, 120 "HLLSKETCH": TokenType.HLLSKETCH, 121 "SUPER": TokenType.SUPER, 122 "TOP": TokenType.TOP, 123 "UNLOAD": TokenType.COMMAND, 124 "VARBYTE": TokenType.VARBINARY, 125 } 126 KEYWORDS.pop("VALUES") 127 128 # Redshift allows # to appear as a table identifier prefix 129 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 130 SINGLE_TOKENS.pop("#") 131 132 class Generator(Postgres.Generator): 133 LOCKING_READS_SUPPORTED = False 134 QUERY_HINTS = False 135 VALUES_AS_TABLE = False 136 TZ_TO_WITH_TIME_ZONE = True 137 NVL2_SUPPORTED = True 138 LAST_DAY_SUPPORTS_DATE_PART = False 139 CAN_IMPLEMENT_ARRAY_ANY = False 140 MULTI_ARG_DISTINCT = True 141 142 TYPE_MAPPING = { 143 **Postgres.Generator.TYPE_MAPPING, 144 exp.DataType.Type.BINARY: "VARBYTE", 145 exp.DataType.Type.INT: "INTEGER", 146 exp.DataType.Type.TIMETZ: "TIME", 147 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 148 exp.DataType.Type.VARBINARY: "VARBYTE", 149 } 150 151 TRANSFORMS = { 152 **Postgres.Generator.TRANSFORMS, 153 exp.Concat: concat_to_dpipe_sql, 154 exp.ConcatWs: concat_ws_to_dpipe_sql, 155 exp.ApproxDistinct: lambda self, 156 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 157 exp.CurrentTimestamp: lambda self, e: ( 158 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 159 ), 160 exp.DateAdd: date_delta_sql("DATEADD"), 161 exp.DateDiff: date_delta_sql("DATEDIFF"), 162 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 163 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 164 exp.FromBase: rename_func("STRTOL"), 165 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 166 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 167 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 168 exp.GroupConcat: rename_func("LISTAGG"), 169 exp.ParseJSON: rename_func("JSON_PARSE"), 170 exp.Select: transforms.preprocess( 171 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 172 ), 173 exp.SortKeyProperty: lambda self, 174 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 175 exp.StartsWith: lambda self, 176 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 177 exp.TableSample: no_tablesample_sql, 178 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 179 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 180 exp.UnixToTime: lambda self, 181 e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')", 182 } 183 184 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 185 TRANSFORMS.pop(exp.Pivot) 186 187 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 188 TRANSFORMS.pop(exp.Pow) 189 190 # Redshift supports ANY_VALUE(..) 191 TRANSFORMS.pop(exp.AnyValue) 192 193 # Redshift supports LAST_DAY(..) 194 TRANSFORMS.pop(exp.LastDay) 195 196 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 197 198 def unnest_sql(self, expression: exp.Unnest) -> str: 199 args = expression.expressions 200 num_args = len(args) 201 202 if num_args > 1: 203 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 204 return "" 205 206 arg = self.sql(seq_get(args, 0)) 207 alias = self.expressions(expression.args.get("alias"), key="columns") 208 return f"{arg} AS {alias}" if alias else arg 209 210 def with_properties(self, properties: exp.Properties) -> str: 211 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 212 return self.properties(properties, prefix=" ", suffix="") 213 214 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 215 if expression.is_type(exp.DataType.Type.JSON): 216 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 217 return self.sql(expression, "this") 218 219 return super().cast_sql(expression, safe_prefix=safe_prefix) 220 221 def datatype_sql(self, expression: exp.DataType) -> str: 222 """ 223 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 224 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 225 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 226 `TEXT` to `VARCHAR`. 227 """ 228 if expression.is_type("text"): 229 expression.set("this", exp.DataType.Type.VARCHAR) 230 precision = expression.args.get("expressions") 231 232 if not precision: 233 expression.append("expressions", exp.var("MAX")) 234 235 return super().datatype_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- SAFE_DIVISION
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
50 class Parser(Postgres.Parser): 51 FUNCTIONS = { 52 **Postgres.Parser.FUNCTIONS, 53 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 54 this=seq_get(args, 0), 55 expression=seq_get(args, 1), 56 unit=exp.var("month"), 57 return_type=exp.DataType.build("TIMESTAMP"), 58 ), 59 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 60 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 61 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 62 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 63 "GETDATE": exp.CurrentTimestamp.from_arg_list, 64 "LISTAGG": exp.GroupConcat.from_arg_list, 65 "STRTOL": exp.FromBase.from_arg_list, 66 } 67 68 NO_PAREN_FUNCTION_PARSERS = { 69 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 70 "APPROXIMATE": lambda self: self._parse_approximate_count(), 71 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 72 } 73 74 SUPPORTS_IMPLICIT_UNNEST = True 75 76 def _parse_table( 77 self, 78 schema: bool = False, 79 joins: bool = False, 80 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 81 parse_bracket: bool = False, 82 is_db_reference: bool = False, 83 ) -> t.Optional[exp.Expression]: 84 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 85 unpivot = self._match(TokenType.UNPIVOT) 86 table = super()._parse_table( 87 schema=schema, 88 joins=joins, 89 alias_tokens=alias_tokens, 90 parse_bracket=parse_bracket, 91 is_db_reference=is_db_reference, 92 ) 93 94 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 95 96 def _parse_convert( 97 self, strict: bool, safe: t.Optional[bool] = None 98 ) -> t.Optional[exp.Expression]: 99 to = self._parse_types() 100 self._match(TokenType.COMMA) 101 this = self._parse_bitwise() 102 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 103 104 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 105 index = self._index - 1 106 func = self._parse_function() 107 108 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 109 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 110 self._retreat(index) 111 return None
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
113 class Tokenizer(Postgres.Tokenizer): 114 BIT_STRINGS = [] 115 HEX_STRINGS = [] 116 STRING_ESCAPES = ["\\", "'"] 117 118 KEYWORDS = { 119 **Postgres.Tokenizer.KEYWORDS, 120 "HLLSKETCH": TokenType.HLLSKETCH, 121 "SUPER": TokenType.SUPER, 122 "TOP": TokenType.TOP, 123 "UNLOAD": TokenType.COMMAND, 124 "VARBYTE": TokenType.VARBINARY, 125 } 126 KEYWORDS.pop("VALUES") 127 128 # Redshift allows # to appear as a table identifier prefix 129 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 130 SINGLE_TOKENS.pop("#")
Inherited Members
132 class Generator(Postgres.Generator): 133 LOCKING_READS_SUPPORTED = False 134 QUERY_HINTS = False 135 VALUES_AS_TABLE = False 136 TZ_TO_WITH_TIME_ZONE = True 137 NVL2_SUPPORTED = True 138 LAST_DAY_SUPPORTS_DATE_PART = False 139 CAN_IMPLEMENT_ARRAY_ANY = False 140 MULTI_ARG_DISTINCT = True 141 142 TYPE_MAPPING = { 143 **Postgres.Generator.TYPE_MAPPING, 144 exp.DataType.Type.BINARY: "VARBYTE", 145 exp.DataType.Type.INT: "INTEGER", 146 exp.DataType.Type.TIMETZ: "TIME", 147 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 148 exp.DataType.Type.VARBINARY: "VARBYTE", 149 } 150 151 TRANSFORMS = { 152 **Postgres.Generator.TRANSFORMS, 153 exp.Concat: concat_to_dpipe_sql, 154 exp.ConcatWs: concat_ws_to_dpipe_sql, 155 exp.ApproxDistinct: lambda self, 156 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 157 exp.CurrentTimestamp: lambda self, e: ( 158 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 159 ), 160 exp.DateAdd: date_delta_sql("DATEADD"), 161 exp.DateDiff: date_delta_sql("DATEDIFF"), 162 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 163 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 164 exp.FromBase: rename_func("STRTOL"), 165 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 166 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 167 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 168 exp.GroupConcat: rename_func("LISTAGG"), 169 exp.ParseJSON: rename_func("JSON_PARSE"), 170 exp.Select: transforms.preprocess( 171 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 172 ), 173 exp.SortKeyProperty: lambda self, 174 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 175 exp.StartsWith: lambda self, 176 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 177 exp.TableSample: no_tablesample_sql, 178 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 179 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 180 exp.UnixToTime: lambda self, 181 e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')", 182 } 183 184 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 185 TRANSFORMS.pop(exp.Pivot) 186 187 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 188 TRANSFORMS.pop(exp.Pow) 189 190 # Redshift supports ANY_VALUE(..) 191 TRANSFORMS.pop(exp.AnyValue) 192 193 # Redshift supports LAST_DAY(..) 194 TRANSFORMS.pop(exp.LastDay) 195 196 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 197 198 def unnest_sql(self, expression: exp.Unnest) -> str: 199 args = expression.expressions 200 num_args = len(args) 201 202 if num_args > 1: 203 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 204 return "" 205 206 arg = self.sql(seq_get(args, 0)) 207 alias = self.expressions(expression.args.get("alias"), key="columns") 208 return f"{arg} AS {alias}" if alias else arg 209 210 def with_properties(self, properties: exp.Properties) -> str: 211 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 212 return self.properties(properties, prefix=" ", suffix="") 213 214 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 215 if expression.is_type(exp.DataType.Type.JSON): 216 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 217 return self.sql(expression, "this") 218 219 return super().cast_sql(expression, safe_prefix=safe_prefix) 220 221 def datatype_sql(self, expression: exp.DataType) -> str: 222 """ 223 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 224 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 225 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 226 `TEXT` to `VARCHAR`. 227 """ 228 if expression.is_type("text"): 229 expression.set("this", exp.DataType.Type.VARCHAR) 230 precision = expression.args.get("expressions") 231 232 if not precision: 233 expression.append("expressions", exp.var("MAX")) 234 235 return super().datatype_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
198 def unnest_sql(self, expression: exp.Unnest) -> str: 199 args = expression.expressions 200 num_args = len(args) 201 202 if num_args > 1: 203 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 204 return "" 205 206 arg = self.sql(seq_get(args, 0)) 207 alias = self.expressions(expression.args.get("alias"), key="columns") 208 return f"{arg} AS {alias}" if alias else arg
210 def with_properties(self, properties: exp.Properties) -> str: 211 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 212 return self.properties(properties, prefix=" ", suffix="")
Redshift doesn't have WITH
as part of their with_properties so we remove it
214 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 215 if expression.is_type(exp.DataType.Type.JSON): 216 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 217 return self.sql(expression, "this") 218 219 return super().cast_sql(expression, safe_prefix=safe_prefix)
221 def datatype_sql(self, expression: exp.DataType) -> str: 222 """ 223 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 224 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 225 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 226 `TEXT` to `VARCHAR`. 227 """ 228 if expression.is_type("text"): 229 expression.set("this", exp.DataType.Type.VARCHAR) 230 precision = expression.args.get("expressions") 231 232 if not precision: 233 expression.append("expressions", exp.var("MAX")) 234 235 return super().datatype_sql(expression)
Redshift converts the TEXT
data type to VARCHAR(255)
by default when people more generally mean
VARCHAR of max length which is VARCHAR(max)
in Redshift. Therefore if we get a TEXT
data type
without precision we convert it to VARCHAR(max)
and if it does have precision then we just convert
TEXT
to VARCHAR
.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTS_TO_NUMBER
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- sqlglot.dialects.postgres.Postgres.Generator
- SINGLE_STRING_INTERVAL
- RENAME_TABLE_WITH_DB
- JOIN_HINTS
- TABLE_HINTS
- PARAMETER_TOKEN
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_SEED_KEYWORD
- SUPPORTS_SELECT_INTO
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- SUPPORTED_JSON_PATH_PARTS
- PROPERTIES_LOCATION
- bracket_sql
- matchagainst_sql