Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SNOW-1853342: Add support for contains_null to ArrayType #2773

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
- Added support for `DataFrame.map`.
- Added support for `DataFrame.from_dict` and `DataFrame.from_records`.
- Added support for mixed case field names in struct type columns.
- Added support for `contains_null` parameter to ArrayType.
- Added support for `SeriesGroupBy.unique`

#### Improvements
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def schema_expression(data_type: DataType, is_nullable: bool) -> str:
return "to_timestamp('2020-09-16 06:30:00')"
if isinstance(data_type, ArrayType):
if data_type.structured:
element = schema_expression(data_type.element_type, is_nullable)
element = schema_expression(data_type.element_type, data_type.contains_null)
return f"to_array({element}) :: {convert_sp_to_sf_type(data_type)}"
return "to_array(0)"
if isinstance(data_type, MapType):
Expand Down
4 changes: 3 additions & 1 deletion src/snowflake/snowpark/_internal/type_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ def convert_metadata_to_sp_type(
return ArrayType(
convert_metadata_to_sp_type(metadata.fields[0], max_string_size),
structured=True,
contains_null=metadata.fields[0]._is_nullable,
)
elif column_type_name == "MAP":
assert (
Expand Down Expand Up @@ -285,7 +286,8 @@ def convert_sp_to_sf_type(datatype: DataType) -> str:
return "BINARY"
if isinstance(datatype, ArrayType):
if datatype.structured:
return f"ARRAY({convert_sp_to_sf_type(datatype.element_type)})"
nullable = "" if datatype.contains_null else " NOT NULL"
return f"ARRAY({convert_sp_to_sf_type(datatype.element_type)}{nullable})"
else:
return "ARRAY"
if isinstance(datatype, MapType):
Expand Down
2 changes: 2 additions & 0 deletions src/snowflake/snowpark/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,9 +335,11 @@ def __init__(
self,
element_type: Optional[DataType] = None,
structured: bool = False,
contains_null: bool = True,
) -> None:
self.structured = structured
self.element_type = element_type if element_type else StringType()
self.contains_null = contains_null

def __repr__(self) -> str:
return f"ArrayType({repr(self.element_type) if self.element_type else ''})"
Expand Down
31 changes: 29 additions & 2 deletions tests/integ/scala/test_datatype_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,33 @@ def test_structured_type_print_schema(
)


@pytest.mark.skipif(
"config.getoption('local_testing_mode', default=False)",
reason="local testing does not fully support structured types yet.",
)
def test_structured_array_contains_null(
structured_type_session, structured_type_support
):
if not structured_type_support:
pytest.skip("Test requires structured type support.")

# SNOW-1862947 create DDL test once save as table supported
array_df = structured_type_session.sql(
"select [1, 2, 3] :: ARRAY(INT NOT NULL) as A, [1, 2, 3] :: ARRAY(INT) as A_N"
)
expected_schema = StructType(
[
StructField(
"A", ArrayType(LongType(), structured=True, contains_null=False)
),
StructField(
"A_N", ArrayType(LongType(), structured=True, contains_null=True)
),
]
)
assert array_df.schema == expected_schema


@pytest.mark.skipif(
"config.getoption('local_testing_mode', default=False)",
reason="local testing does not fully support structured types yet.",
Expand Down Expand Up @@ -1112,11 +1139,11 @@ def test_structured_type_schema_expression(
non_null_union = non_null_table.union(non_null_table)
assert non_null_union._plan.schema_query == (
"( SELECT object_construct_keep_null('a' :: STRING (16777216), 0 :: DOUBLE) :: "
'MAP(STRING(16777216), DOUBLE) AS "MAP", to_array(0 :: DOUBLE) :: ARRAY(DOUBLE) AS "ARR",'
'MAP(STRING(16777216), DOUBLE) AS "MAP", to_array(NULL :: DOUBLE) :: ARRAY(DOUBLE) AS "ARR",'
" object_construct_keep_null('FIELD1', 'a' :: STRING (16777216), 'FIELD2', 0 :: "
'DOUBLE) :: OBJECT(FIELD1 STRING(16777216), FIELD2 DOUBLE) AS "OBJ") UNION ( SELECT '
"object_construct_keep_null('a' :: STRING (16777216), 0 :: DOUBLE) :: "
'MAP(STRING(16777216), DOUBLE) AS "MAP", to_array(0 :: DOUBLE) :: ARRAY(DOUBLE) AS "ARR", '
'MAP(STRING(16777216), DOUBLE) AS "MAP", to_array(NULL :: DOUBLE) :: ARRAY(DOUBLE) AS "ARR", '
"object_construct_keep_null('FIELD1', 'a' :: STRING (16777216), 'FIELD2', 0 :: "
'DOUBLE) :: OBJECT(FIELD1 STRING(16777216), FIELD2 DOUBLE) AS "OBJ")'
)
Expand Down
9 changes: 9 additions & 0 deletions tests/unit/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,15 @@ def test_convert_sp_to_sf_type():
)
assert convert_sp_to_sf_type(BinaryType()) == "BINARY"
assert convert_sp_to_sf_type(ArrayType()) == "ARRAY"
assert (
convert_sp_to_sf_type(ArrayType(IntegerType(), structured=True)) == "ARRAY(INT)"
)
assert (
convert_sp_to_sf_type(
ArrayType(IntegerType(), structured=True, contains_null=False)
)
== "ARRAY(INT NOT NULL)"
)
assert convert_sp_to_sf_type(MapType()) == "OBJECT"
assert convert_sp_to_sf_type(StructType()) == "OBJECT"
assert convert_sp_to_sf_type(VariantType()) == "VARIANT"
Expand Down
Loading