Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions crates/polars-plan/src/plans/aexpr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,26 @@ impl AExpr {
Sum(expr) => {
let mut field = ctx.arena.get(*expr).to_field_impl(ctx)?;
let dt = match field.dtype() {
String | Binary | BinaryOffset | List(_) => {
polars_bail!(
InvalidOperation: "`sum` operation not supported for dtype `{}`",
field.dtype()
)
},
#[cfg(feature = "dtype-array")]
Array(_, _) => {
polars_bail!(
InvalidOperation: "`sum` operation not supported for dtype `{}`",
field.dtype()
)
},
#[cfg(feature = "dtype-struct")]
Struct(_) => {
polars_bail!(
InvalidOperation: "`sum` operation not supported for dtype `{}`",
field.dtype()
)
},
Boolean => Some(IDX_DTYPE),
UInt8 | Int8 | Int16 | UInt16 => Some(Int64),
_ => None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,11 @@ def test_list_eval_in_group_by_schema(ldf: pl.LazyFrame, expr: pl.Expr) -> None:
pl.col("a").first().list.len()
).collect().to_series()[0] == 0

# skip sum on struct types
dtype = ldf.collect_schema()["a"]
assert isinstance(dtype, pl.List)
skip = skip or ("sum" in str(expr) and isinstance(dtype.inner, pl.Struct))

for q in [q_select, q_group_by, q_over]:
if not skip:
assert q.collect_schema() == q.collect().schema
Expand Down
13 changes: 12 additions & 1 deletion py-polars/tests/unit/operations/test_group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
import polars as pl
import polars.selectors as cs
from polars import Expr
from polars.exceptions import ColumnNotFoundError
from polars.exceptions import (
ColumnNotFoundError,
InvalidOperationError,
)
from polars.meta import get_index_type
from polars.testing import assert_frame_equal, assert_series_equal
from polars.testing.parametric import column, dataframes, series
Expand Down Expand Up @@ -2909,3 +2912,11 @@ def test_agg_first_last_non_null_25405() -> None:
}
)
assert_frame_equal(result.collect(), expected)


def test_group_by_sum_on_strings_should_error_24659() -> None:
with pytest.raises(
InvalidOperationError,
match=r"`sum`.*operation not supported for dtype.*str",
):
pl.DataFrame({"str": ["a", "b"]}).group_by(1).agg(pl.col.str.sum())
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ def test_streaming_group_by_types() -> None:
pl.col("person_name").first().alias("str_first"),
pl.col("person_name").last().alias("str_last"),
pl.col("person_name").mean().alias("str_mean"),
pl.col("person_name").sum().alias("str_sum"),
pl.col("bool").first().alias("bool_first"),
pl.col("bool").last().alias("bool_first"),
]
Expand Down
Loading