Skip to content

Commit

Permalink
Merge pull request #107 from dsgibbons/fix/empty-list-validation
Browse files Browse the repository at this point in the history
fix: empty list validation
  • Loading branch information
thomasaarholt authored Oct 21, 2024
2 parents d102584 + edef5ac commit 66430b6
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/patito/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ def _find_errors( # noqa: C901
.select(column)
# Remove those rows that do not contain lists at all
.filter(pl.col(column).is_not_null())
# Remove empty lists
.filter(pl.col(column).list.len() > 0)
# Convert lists of N items to N individual rows
.explode(column)
# Calculate how many nulls are present in lists
Expand Down
29 changes: 29 additions & 0 deletions tests/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,35 @@ class NestedPositiveStructModel(pt.Model):
NestedPositiveStructModel.validate(bad_df)


def test_empty_list_validation() -> None:
"""Test validation of model with empty lists."""

class TestModel(pt.Model):
list_field: list[str]

# validate presence of an empty list
df = pl.DataFrame({"list_field": [["a", "b"], []]})
TestModel.validate(df)

# validate when all lists are empty, so long as the schema is correct
df = pl.DataFrame(
{"list_field": [[], []]}, schema={"list_field": pl.List(pl.String)}
)
TestModel.validate(df)

class NestedTestModel(pt.Model):
nested_list_field: list[list[str]]

df = pl.DataFrame({"nested_list_field": [[["a", "b"], ["c"]], []]})
NestedTestModel.validate(df)

df = pl.DataFrame(
{"nested_list_field": [[], []]},
schema={"nested_list_field": pl.List(pl.List(pl.String))},
)
NestedTestModel.validate(df)


def test_list_struct_validation() -> None:
"""Test validation of model with list of structs column."""

Expand Down

0 comments on commit 66430b6

Please sign in to comment.