Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
6900283
ADD `concat_struct` for pandas_like
msalvany Oct 31, 2025
c15443f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 31, 2025
4bb8a1e
remove TODO comment
msalvany Oct 31, 2025
1761bc7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 31, 2025
3679569
ADD `concat_struct` for polars
msalvany Oct 31, 2025
934b113
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 31, 2025
36a49a8
ADD concat_struct for arrow
msalvany Oct 31, 2025
88a8fa3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 31, 2025
eeaa3dc
ADD dry-run test
msalvany Oct 31, 2025
41b201d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 31, 2025
d9e70d3
removed duplicated raise
msalvany Oct 31, 2025
9cdf7ac
Change `concat_struct` to `struct`in all files
msalvany Nov 2, 2025
c1b6481
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 2, 2025
76b0267
Add `struct = not_implemented()` for dask", duckdb, ibis, pyspark, sq…
msalvany Nov 2, 2025
7e1fc08
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 2, 2025
d8b4af9
remove 'print' from test.py
msalvany Nov 2, 2025
41f4b87
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 2, 2025
661a29c
TypeError for pandas_like namespace
msalvany Nov 3, 2025
ad855a5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 3, 2025
9fa78ab
test_struct: `assert_equal_data(result, expected)`
msalvany Nov 3, 2025
c9d4b0c
doctstring example test passed
msalvany Nov 3, 2025
75f5b77
add `struct` to api-reference top-level functions
msalvany Nov 3, 2025
561aaf0
TypeError improvement
msalvany Nov 3, 2025
0ecf99f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 3, 2025
ad2bbea
imports update for pandas namespace
msalvany Nov 4, 2025
2475cd1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 4, 2025
d58cd73
remove folder
msalvany Nov 4, 2025
5539438
pandas_like supporting NaN values test check
msalvany Nov 4, 2025
64db266
updated docstring example in v2 (test passed)
msalvany Nov 4, 2025
04e828a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/narwhals.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ Here are the top-level functions available in Narwhals.
- scan_csv
- scan_parquet
- show_versions
- struct
- sum
- sum_horizontal
- to_native
Expand Down
2 changes: 2 additions & 0 deletions narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
scan_csv,
scan_parquet,
show_versions,
struct,
sum,
sum_horizontal,
when,
Expand Down Expand Up @@ -169,6 +170,7 @@
"scan_parquet",
"selectors",
"show_versions",
"struct",
"sum",
"sum_horizontal",
"to_native",
Expand Down
15 changes: 15 additions & 0 deletions narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,21 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
context=self,
)

def struct(self, *exprs: ArrowExpr) -> ArrowExpr:
def func(df: ArrowDataFrame) -> list[ArrowSeries]:
series = list(chain.from_iterable(expr(df) for expr in exprs))
arrays = [s._native_series.combine_chunks() for s in series]
name = series[0].name
struct_array = pc.make_struct(*arrays, field_names=[s.name for s in series])
return [self._series(struct_array, name=name, version=self._version)]

return self._expr._from_callable(
func=func,
evaluate_output_names=combine_evaluate_output_names(*exprs),
alias_output_names=combine_alias_output_names(*exprs),
context=self,
)

def coalesce(self, *exprs: ArrowExpr) -> ArrowExpr:
def func(df: ArrowDataFrame) -> list[ArrowSeries]:
align = self._series._align_full_broadcast
Expand Down
4 changes: 3 additions & 1 deletion narwhals/_dask/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
combine_alias_output_names,
combine_evaluate_output_names,
)
from narwhals._utils import Implementation, zip_strict
from narwhals._utils import Implementation, not_implemented, zip_strict

if TYPE_CHECKING:
from collections.abc import Iterable, Iterator
Expand Down Expand Up @@ -255,6 +255,8 @@ def func(df: DaskLazyFrame) -> list[dx.Series]:
version=self._version,
)

struct = not_implemented()

def coalesce(self, *exprs: DaskExpr) -> DaskExpr:
def func(df: DaskLazyFrame) -> list[dx.Series]:
series = align_series_full_broadcast(
Expand Down
4 changes: 3 additions & 1 deletion narwhals/_duckdb/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
combine_evaluate_output_names,
)
from narwhals._sql.namespace import SQLNamespace
from narwhals._utils import Implementation
from narwhals._utils import Implementation, not_implemented

if TYPE_CHECKING:
from collections.abc import Iterable
Expand Down Expand Up @@ -119,6 +119,8 @@ def func(df: DuckDBLazyFrame) -> list[Expression]:
version=self._version,
)

struct = not_implemented()

def mean_horizontal(self, *exprs: DuckDBExpr) -> DuckDBExpr:
def func(cols: Iterable[Expression]) -> Expression:
cols = tuple(cols)
Expand Down
4 changes: 3 additions & 1 deletion narwhals/_ibis/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from narwhals._ibis.selectors import IbisSelectorNamespace
from narwhals._ibis.utils import function, lit, narwhals_to_native_dtype
from narwhals._sql.namespace import SQLNamespace
from narwhals._utils import Implementation
from narwhals._utils import Implementation, not_implemented

if TYPE_CHECKING:
from collections.abc import Iterable, Sequence
Expand Down Expand Up @@ -100,6 +100,8 @@ def func(df: IbisLazyFrame) -> list[ir.Value]:
version=self._version,
)

struct = not_implemented()

def mean_horizontal(self, *exprs: IbisExpr) -> IbisExpr:
def func(cols: Iterable[ir.Value]) -> ir.Value:
cols = list(cols)
Expand Down
48 changes: 48 additions & 0 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,54 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
context=self,
)

def struct(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
series_list = [s for _expr in exprs for s in _expr(df)]
df = self.concat(
(s.to_frame() for s in series_list), how="horizontal"
)._native_frame

try:
import pandas as pd
import pyarrow.compute as pc
except ModuleNotFoundError as exc:
msg = "'pyarrow' and 'pandas' are required to use `struct()` in this backend."
raise ModuleNotFoundError(msg) from exc

# Check for consistent types within each column
for col in df.columns:
values = df[col].tolist()
non_null_values = [v for v in values if not pd.isna(v)]
Comment on lines +354 to +355
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

quick note that tolist and iterating over values in Python isn't allowed here, as it's very inefficient - you'll need to look for a way to do this using the pandas api

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @msalvany - what's the main reason behind this check in the first place? I thought we might be able to get away without it πŸ‘€

if not non_null_values:
continue # all nulls, skip
first_type = type(non_null_values[0])
for v in non_null_values[1:]:
if not isinstance(v, first_type):
msg = (
f"unexpected value while building Series of type {first_type.__name__}; "
f"found value of type {type(v).__name__}: {v}\n\n"
f"Hint: ensure all values in each column have the same dtype."
)
raise TypeError(msg)

df_arrow = df.convert_dtypes(dtype_backend="pyarrow")
arrays = [df_arrow[col].array._pa_array for col in df.columns]
struct_array = pc.make_struct(*arrays, field_names=df.columns)
struct_series = struct_array.to_pandas(
types_mapper=lambda x: pd.ArrowDtype(x)
)
result = PandasLikeSeries(
struct_series, implementation=self._implementation, version=self._version
).alias("struct")
return [result]

return self._expr._from_callable(
func=func,
evaluate_output_names=combine_evaluate_output_names(*exprs),
alias_output_names=combine_alias_output_names(*exprs),
context=self,
)

def _if_then_else(
self,
when: NativeSeriesT,
Expand Down
4 changes: 4 additions & 0 deletions narwhals/_polars/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,10 @@ def concat_str(
version=self._version,
)

def struct(self, *exprs: PolarsExpr) -> PolarsExpr:
pl_exprs = [expr._native_expr for expr in exprs]
return self._expr(pl.struct(pl_exprs), version=self._version)

def when_then(
self, when: PolarsExpr, then: PolarsExpr, otherwise: PolarsExpr | None = None
) -> PolarsExpr:
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_spark_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
true_divide,
)
from narwhals._sql.namespace import SQLNamespace
from narwhals._utils import not_implemented

if TYPE_CHECKING:
from collections.abc import Iterable
Expand Down Expand Up @@ -196,3 +197,5 @@ def func(df: SparkLikeLazyFrame) -> list[Column]:
version=self._version,
implementation=self._implementation,
)

struct = not_implemented()
3 changes: 3 additions & 0 deletions narwhals/_sql/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from narwhals._compliant import LazyNamespace
from narwhals._compliant.typing import NativeExprT, NativeFrameT
from narwhals._sql.typing import SQLExprT, SQLLazyFrameT
from narwhals._utils import not_implemented

if TYPE_CHECKING:
from collections.abc import Iterable
Expand Down Expand Up @@ -86,3 +87,5 @@ def func_with_otherwise(cols: list[NativeExprT]) -> NativeExprT:
return self._expr._from_elementwise_horizontal_op(
func_with_otherwise, then, predicate, otherwise
)

struct = not_implemented()
40 changes: 40 additions & 0 deletions narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1587,6 +1587,46 @@ def concat_str(
)


def struct(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
r"""Horizontally combine multiple columns into a single struct column.

Arguments:
exprs: One or more expressions to combine into a struct. Strings are treated as column names.
*more_exprs: Additional columns or expressions, passed as positional arguments.

Returns:
An expression that produces a single struct column containing the given fields.

Example:
>>> import pandas as pd
>>> import narwhals as nw
>>>
>>> data = {
... "a": [1, 2, 3],
... "b": ["dogs", "cats", None],
... "c": ["play", "swim", "walk"],
... }
>>> df_native = pd.DataFrame(data)
>>> (
... nw.from_native(df_native).select(
... nw.struct([nw.col("a") * 2, nw.col("b"), nw.col("c")]).alias(
... "my_struct"
... )
... )
... )
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
| Narwhals DataFrame |
|-------------------------------------|
| my_struct|
|0 {'a': 2, 'b': 'dogs', 'c': 'play'}|
|1 {'a': 4, 'b': 'cats', 'c': 'swim'}|
|2 {'a': 6, 'b': None, 'c': 'walk'}|
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
flat_exprs = flatten([*flatten([exprs]), *more_exprs])
return _expr_with_horizontal_op("struct", *flat_exprs)


def coalesce(
exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr | NonNestedLiteral
) -> Expr:
Expand Down
5 changes: 5 additions & 0 deletions narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1163,6 +1163,10 @@ def concat_str(
)


def struct(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
return _stableify(nw.struct(exprs, *more_exprs))


def format(f_string: str, *args: IntoExpr) -> Expr:
"""Format expressions as a string."""
return _stableify(nw.format(f_string, *args))
Expand Down Expand Up @@ -1440,6 +1444,7 @@ def scan_parquet(
"scan_parquet",
"selectors",
"show_versions",
"struct",
"sum",
"sum_horizontal",
"to_native",
Expand Down
40 changes: 40 additions & 0 deletions narwhals/stable/v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,45 @@ def concat_str(
)


def struct(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
r"""Horizontally combine multiple columns into a single struct column.

Arguments:
exprs: One or more expressions to combine into a struct. Strings are treated as column names.
*more_exprs: Additional columns or expressions, passed as positional arguments.

Returns:
An expression that produces a single struct column containing the given fields.

Example:
>>> import pandas as pd
>>> import narwhals as nw
>>>
>>> data = {
... "a": [1, 2, 3],
... "b": ["dogs", "cats", None],
... "c": ["play", "swim", "walk"],
... }
>>> df_native = pd.DataFrame(data)
>>> (
... nw.from_native(df_native).select(
... nw.struct([nw.col("a") * 2, nw.col("b"), nw.col("c")]).alias(
... "my_struct"
... )
... )
... )
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
| Narwhals DataFrame |
|-------------------------------------|
| my_struct|
|0 {'a': 2, 'b': 'dogs', 'c': 'play'}|
|1 {'a': 4, 'b': 'cats', 'c': 'swim'}|
|2 {'a': 6, 'b': None, 'c': 'walk'}|
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
return _stableify(nw.struct(exprs, *more_exprs))


def format(f_string: str, *args: IntoExpr) -> Expr:
"""Format expressions as a string.

Expand Down Expand Up @@ -1279,6 +1318,7 @@ def scan_parquet(
"selectors",
"selectors",
"show_versions",
"struct",
"sum",
"sum_horizontal",
"to_native",
Expand Down
32 changes: 32 additions & 0 deletions tests/expr_and_series/struct_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from __future__ import annotations

import pytest

import narwhals as nw
from tests.utils import POLARS_VERSION, Constructor, assert_equal_data

pytest.importorskip("pyarrow")

data = {"a": [1, 2, 3], "b": ["dogs", "cats", None], "c": ["play", "swim", "walk"]}


def test_struct(constructor: Constructor, *, request: pytest.FixtureRequest) -> None:
if "polars" in str(constructor) and POLARS_VERSION < (1, 0, 0):
request.applymarker(pytest.mark.xfail)
if any(
x in str(constructor) for x in ("dask", "duckdb", "ibis", "pyspark", "sqlframe")
):
request.applymarker(pytest.mark.xfail(reason="Not supported / not implemented"))

df = nw.from_native(constructor(data))
result = df.select(nw.struct([nw.col("a"), nw.col("b"), nw.col("c")]).alias("struct"))

expected = {
"struct": [
{"a": 1, "b": "dogs", "c": "play"},
{"a": 2, "b": "cats", "c": "swim"},
{"a": 3, "b": None, "c": "walk"},
]
}

assert_equal_data(result, expected)