Skip to content

Commit 49adf41

Browse files
wtnclaude
andauthored
fix: Make sum on strings error in group_by context (#25456)
Co-authored-by: Claude <[email protected]>
1 parent 13086ab commit 49adf41

File tree

4 files changed

+37
-2
lines changed

4 files changed

+37
-2
lines changed

crates/polars-plan/src/plans/aexpr/schema.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,26 @@ impl AExpr {
148148
Sum(expr) => {
149149
let mut field = ctx.arena.get(*expr).to_field_impl(ctx)?;
150150
let dt = match field.dtype() {
151+
String | Binary | BinaryOffset | List(_) => {
152+
polars_bail!(
153+
InvalidOperation: "`sum` operation not supported for dtype `{}`",
154+
field.dtype()
155+
)
156+
},
157+
#[cfg(feature = "dtype-array")]
158+
Array(_, _) => {
159+
polars_bail!(
160+
InvalidOperation: "`sum` operation not supported for dtype `{}`",
161+
field.dtype()
162+
)
163+
},
164+
#[cfg(feature = "dtype-struct")]
165+
Struct(_) => {
166+
polars_bail!(
167+
InvalidOperation: "`sum` operation not supported for dtype `{}`",
168+
field.dtype()
169+
)
170+
},
151171
Boolean => Some(IDX_DTYPE),
152172
UInt8 | Int8 | Int16 | UInt16 => Some(Int64),
153173
_ => None,

py-polars/tests/unit/operations/namespaces/list/test_eval.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,11 @@ def test_list_eval_in_group_by_schema(ldf: pl.LazyFrame, expr: pl.Expr) -> None:
209209
pl.col("a").first().list.len()
210210
).collect().to_series()[0] == 0
211211

212+
# skip sum on struct types
213+
dtype = ldf.collect_schema()["a"]
214+
assert isinstance(dtype, pl.List)
215+
skip = skip or ("sum" in str(expr) and isinstance(dtype.inner, pl.Struct))
216+
212217
for q in [q_select, q_group_by, q_over]:
213218
if not skip:
214219
assert q.collect_schema() == q.collect().schema

py-polars/tests/unit/operations/test_group_by.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313
import polars as pl
1414
import polars.selectors as cs
1515
from polars import Expr
16-
from polars.exceptions import ColumnNotFoundError
16+
from polars.exceptions import (
17+
ColumnNotFoundError,
18+
InvalidOperationError,
19+
)
1720
from polars.meta import get_index_type
1821
from polars.testing import assert_frame_equal, assert_series_equal
1922
from polars.testing.parametric import column, dataframes, series
@@ -2909,3 +2912,11 @@ def test_agg_first_last_non_null_25405() -> None:
29092912
}
29102913
)
29112914
assert_frame_equal(result.collect(), expected)
2915+
2916+
2917+
def test_group_by_sum_on_strings_should_error_24659() -> None:
2918+
with pytest.raises(
2919+
InvalidOperationError,
2920+
match=r"`sum`.*operation not supported for dtype.*str",
2921+
):
2922+
pl.DataFrame({"str": ["a", "b"]}).group_by(1).agg(pl.col.str.sum())

py-polars/tests/unit/streaming/test_streaming_group_by.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ def test_streaming_group_by_types() -> None:
117117
pl.col("person_name").first().alias("str_first"),
118118
pl.col("person_name").last().alias("str_last"),
119119
pl.col("person_name").mean().alias("str_mean"),
120-
pl.col("person_name").sum().alias("str_sum"),
121120
pl.col("bool").first().alias("bool_first"),
122121
pl.col("bool").last().alias("bool_first"),
123122
]

0 commit comments

Comments
 (0)