-
Notifications
You must be signed in to change notification settings - Fork 117
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SNOW-1829870: Allow structured types to be enabled by default #2727
base: main
Are you sure you want to change the base?
Changes from all commits
d9bf2cb
ec43e1a
7f3a5fd
2e0dce9
ed232de
0dd7b91
13c1424
a787e74
b32806f
c3db223
1c262d7
869931f
0caef58
2380040
995e519
1b89027
4fc61d4
26fd29e
9295e11
fcd16d7
77a57a6
4769169
af5af87
dea741b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,12 +11,12 @@ | |
from enum import Enum | ||
from typing import Generic, List, Optional, Type, TypeVar, Union, Dict, Any | ||
|
||
import snowflake.snowpark.context as context | ||
import snowflake.snowpark._internal.analyzer.expression as expression | ||
import snowflake.snowpark._internal.proto.generated.ast_pb2 as proto | ||
|
||
# Use correct version from here: | ||
from snowflake.snowpark._internal.utils import installed_pandas, pandas, quote_name | ||
import snowflake.snowpark.context as context | ||
|
||
# TODO: connector installed_pandas is broken. If pyarrow is not installed, but pandas is this function returns the wrong answer. | ||
# The core issue is that in the connector detection of both pandas/arrow are mixed, which is wrong. | ||
|
@@ -334,16 +334,22 @@ class ArrayType(DataType): | |
def __init__( | ||
self, | ||
element_type: Optional[DataType] = None, | ||
structured: bool = False, | ||
structured: Optional[bool] = None, | ||
) -> None: | ||
self.structured = structured | ||
self.element_type = element_type if element_type else StringType() | ||
if context._should_use_structured_type_semantics(): | ||
self.structured = ( | ||
structured if structured is not None else element_type is not None | ||
) | ||
self.element_type = element_type | ||
else: | ||
Comment on lines
+343
to
+344
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If element_type is None than this is a semi-structured array column and could contain anything. |
||
self.structured = structured or False | ||
self.element_type = element_type if element_type else StringType() | ||
|
||
def __repr__(self) -> str: | ||
return f"ArrayType({repr(self.element_type) if self.element_type else ''})" | ||
|
||
def _as_nested(self) -> "ArrayType": | ||
if not context._should_use_structured_type_semantics: | ||
if not context._should_use_structured_type_semantics(): | ||
return self | ||
element_type = self.element_type | ||
if isinstance(element_type, (ArrayType, MapType, StructType)): | ||
|
@@ -378,6 +384,10 @@ def json_value(self) -> Dict[str, Any]: | |
|
||
def _fill_ast(self, ast: proto.SpDataType) -> None: | ||
ast.sp_array_type.structured = self.structured | ||
if self.element_type is None: | ||
raise NotImplementedError( | ||
"SNOW-1862700: AST does not support empty element_type." | ||
) | ||
self.element_type._fill_ast(ast.sp_array_type.ty) | ||
|
||
|
||
|
@@ -388,20 +398,36 @@ def __init__( | |
self, | ||
key_type: Optional[DataType] = None, | ||
value_type: Optional[DataType] = None, | ||
structured: bool = False, | ||
structured: Optional[bool] = None, | ||
) -> None: | ||
self.structured = structured | ||
self.key_type = key_type if key_type else StringType() | ||
self.value_type = value_type if value_type else StringType() | ||
if context._should_use_structured_type_semantics(): | ||
if (key_type is None and value_type is not None) or ( | ||
key_type is not None and value_type is None | ||
): | ||
raise ValueError( | ||
"Must either set both key_type and value_type or leave both unset." | ||
) | ||
self.structured = ( | ||
structured if structured is not None else key_type is not None | ||
) | ||
self.key_type = key_type | ||
self.value_type = value_type | ||
else: | ||
self.structured = structured or False | ||
self.key_type = key_type if key_type else StringType() | ||
self.value_type = value_type if value_type else StringType() | ||
|
||
def __repr__(self) -> str: | ||
return f"MapType({repr(self.key_type) if self.key_type else ''}, {repr(self.value_type) if self.value_type else ''})" | ||
type_str = "" | ||
if self.key_type and self.value_type: | ||
type_str = f"{repr(self.key_type)}, {repr(self.value_type)}" | ||
return f"MapType({type_str})" | ||
|
||
def is_primitive(self): | ||
return False | ||
|
||
def _as_nested(self) -> "MapType": | ||
if not context._should_use_structured_type_semantics: | ||
if not context._should_use_structured_type_semantics(): | ||
return self | ||
value_type = self.value_type | ||
if isinstance(value_type, (ArrayType, MapType, StructType)): | ||
|
@@ -447,6 +473,10 @@ def valueType(self): | |
|
||
def _fill_ast(self, ast: proto.SpDataType) -> None: | ||
ast.sp_map_type.structured = self.structured | ||
if self.key_type is None or self.value_type is None: | ||
raise NotImplementedError( | ||
"SNOW-1862700: AST does not support empty key or value type." | ||
) | ||
self.key_type._fill_ast(ast.sp_map_type.key_ty) | ||
self.value_type._fill_ast(ast.sp_map_type.value_ty) | ||
|
||
|
@@ -578,7 +608,7 @@ def __init__( | |
|
||
@property | ||
def name(self) -> str: | ||
if self._is_column or not context._should_use_structured_type_semantics: | ||
if self._is_column or not context._should_use_structured_type_semantics(): | ||
return self.column_identifier.name | ||
else: | ||
return self._name | ||
|
@@ -593,7 +623,7 @@ def name(self, n: Union[ColumnIdentifier, str]) -> None: | |
self.column_identifier = ColumnIdentifier(n) | ||
|
||
def _as_nested(self) -> "StructField": | ||
if not context._should_use_structured_type_semantics: | ||
if not context._should_use_structured_type_semantics(): | ||
return self | ||
datatype = self.datatype | ||
if isinstance(datatype, (ArrayType, MapType, StructType)): | ||
|
@@ -651,9 +681,17 @@ class StructType(DataType): | |
"""Represents a table schema or structured column. Contains :class:`StructField` for each field.""" | ||
|
||
def __init__( | ||
self, fields: Optional[List["StructField"]] = None, structured=False | ||
self, | ||
fields: Optional[List["StructField"]] = None, | ||
structured: Optional[bool] = None, | ||
) -> None: | ||
self.structured = structured | ||
if context._should_use_structured_type_semantics(): | ||
self.structured = ( | ||
structured if structured is not None else fields is not None | ||
) | ||
else: | ||
self.structured = structured or False | ||
|
||
self.fields = [] | ||
for field in fields or []: | ||
self.add(field) | ||
|
@@ -683,7 +721,7 @@ def add( | |
return self | ||
|
||
def _as_nested(self) -> "StructType": | ||
if not context._should_use_structured_type_semantics: | ||
if not context._should_use_structured_type_semantics(): | ||
return self | ||
return StructType( | ||
[field._as_nested() for field in self.fields], self.structured | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is there an example to illustrate how the code change would work? I'm wondering changing StringType to None would cause any change in non-structured type cases
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See here:
snowpark-python/src/snowflake/snowpark/types.py
Lines 405 to 406 in 0dd7b91
The
None
here passes responsibility to the type initializer which still defaults to StringType when dealing with non-structured cases.