narwhals-dev · msalvany · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/docs/api-reference/narwhals.md b/docs/api-reference/narwhals.md
@@ -45,6 +45,7 @@ Here are the top-level functions available in Narwhals.
         - scan_csv
         - scan_parquet
         - show_versions
+        - struct
         - sum
         - sum_horizontal
         - to_native

diff --git a/narwhals/__init__.py b/narwhals/__init__.py
@@ -75,6 +75,7 @@
     scan_csv,
     scan_parquet,
     show_versions,
+    struct,
     sum,
     sum_horizontal,
     when,
@@ -169,6 +170,7 @@
     "scan_parquet",
     "selectors",
     "show_versions",
+    "struct",
     "sum",
     "sum_horizontal",
     "to_native",

diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py
@@ -227,6 +227,21 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
             context=self,
         )
 
+    def struct(self, *exprs: ArrowExpr) -> ArrowExpr:
+        def func(df: ArrowDataFrame) -> list[ArrowSeries]:
+            series = list(chain.from_iterable(expr(df) for expr in exprs))
+            arrays = [s._native_series.combine_chunks() for s in series]
+            name = series[0].name
+            struct_array = pc.make_struct(*arrays, field_names=[s.name for s in series])
+            return [self._series(struct_array, name=name, version=self._version)]
+
+        return self._expr._from_callable(
+            func=func,
+            evaluate_output_names=combine_evaluate_output_names(*exprs),
+            alias_output_names=combine_alias_output_names(*exprs),
+            context=self,
+        )
+
     def coalesce(self, *exprs: ArrowExpr) -> ArrowExpr:
         def func(df: ArrowDataFrame) -> list[ArrowSeries]:
             align = self._series._align_full_broadcast

diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py
@@ -22,7 +22,7 @@
     combine_alias_output_names,
     combine_evaluate_output_names,
 )
-from narwhals._utils import Implementation, zip_strict
+from narwhals._utils import Implementation, not_implemented, zip_strict
 
 if TYPE_CHECKING:
     from collections.abc import Iterable, Iterator
@@ -255,6 +255,8 @@ def func(df: DaskLazyFrame) -> list[dx.Series]:
             version=self._version,
         )
 
+    struct = not_implemented()
+
     def coalesce(self, *exprs: DaskExpr) -> DaskExpr:
         def func(df: DaskLazyFrame) -> list[dx.Series]:
             series = align_series_full_broadcast(

diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py
@@ -26,7 +26,7 @@
     combine_evaluate_output_names,
 )
 from narwhals._sql.namespace import SQLNamespace
-from narwhals._utils import Implementation
+from narwhals._utils import Implementation, not_implemented
 
 if TYPE_CHECKING:
     from collections.abc import Iterable
@@ -119,6 +119,8 @@ def func(df: DuckDBLazyFrame) -> list[Expression]:
             version=self._version,
         )
 
+    struct = not_implemented()
+
     def mean_horizontal(self, *exprs: DuckDBExpr) -> DuckDBExpr:
         def func(cols: Iterable[Expression]) -> Expression:
             cols = tuple(cols)

diff --git a/narwhals/_ibis/namespace.py b/narwhals/_ibis/namespace.py
@@ -17,7 +17,7 @@
 from narwhals._ibis.selectors import IbisSelectorNamespace
 from narwhals._ibis.utils import function, lit, narwhals_to_native_dtype
 from narwhals._sql.namespace import SQLNamespace
-from narwhals._utils import Implementation
+from narwhals._utils import Implementation, not_implemented
 
 if TYPE_CHECKING:
     from collections.abc import Iterable, Sequence
@@ -100,6 +100,8 @@ def func(df: IbisLazyFrame) -> list[ir.Value]:
             version=self._version,
         )
 
+    struct = not_implemented()
+
     def mean_horizontal(self, *exprs: IbisExpr) -> IbisExpr:
         def func(cols: Iterable[ir.Value]) -> ir.Value:
             cols = list(cols)

diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py
@@ -335,6 +335,54 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
             context=self,
         )
 
+    def struct(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
+        def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
+            series_list = [s for _expr in exprs for s in _expr(df)]
+            df = self.concat(
+                (s.to_frame() for s in series_list), how="horizontal"
+            )._native_frame
+
+            try:
+                import pandas as pd
+                import pyarrow.compute as pc
+            except ModuleNotFoundError as exc:
+                msg = "'pyarrow' and 'pandas' are required to use `struct()` in this backend."
+                raise ModuleNotFoundError(msg) from exc
+
+            # Check for consistent types within each column
+            for col in df.columns:
+                values = df[col].tolist()
+                non_null_values = [v for v in values if not pd.isna(v)]
+                if not non_null_values:
+                    continue  # all nulls, skip
+                first_type = type(non_null_values[0])
+                for v in non_null_values[1:]:
+                    if not isinstance(v, first_type):
+                        msg = (
+                            f"unexpected value while building Series of type {first_type.__name__}; "
+                            f"found value of type {type(v).__name__}: {v}\n\n"
+                            f"Hint: ensure all values in each column have the same dtype."
+                        )
+                        raise TypeError(msg)
+
+            df_arrow = df.convert_dtypes(dtype_backend="pyarrow")
+            arrays = [df_arrow[col].array._pa_array for col in df.columns]
+            struct_array = pc.make_struct(*arrays, field_names=df.columns)
+            struct_series = struct_array.to_pandas(
+                types_mapper=lambda x: pd.ArrowDtype(x)
+            )
+            result = PandasLikeSeries(
+                struct_series, implementation=self._implementation, version=self._version
+            ).alias("struct")
+            return [result]
+
+        return self._expr._from_callable(
+            func=func,
+            evaluate_output_names=combine_evaluate_output_names(*exprs),
+            alias_output_names=combine_alias_output_names(*exprs),
+            context=self,
+        )
+
     def _if_then_else(
         self,
         when: NativeSeriesT,

diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py
@@ -198,6 +198,10 @@ def concat_str(
             version=self._version,
         )
 
+    def struct(self, *exprs: PolarsExpr) -> PolarsExpr:
+        pl_exprs = [expr._native_expr for expr in exprs]
+        return self._expr(pl.struct(pl_exprs), version=self._version)
+
     def when_then(
         self, when: PolarsExpr, then: PolarsExpr, otherwise: PolarsExpr | None = None
     ) -> PolarsExpr:

diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py
@@ -19,6 +19,7 @@
     true_divide,
 )
 from narwhals._sql.namespace import SQLNamespace
+from narwhals._utils import not_implemented
 
 if TYPE_CHECKING:
     from collections.abc import Iterable
@@ -196,3 +197,5 @@ def func(df: SparkLikeLazyFrame) -> list[Column]:
             version=self._version,
             implementation=self._implementation,
         )
+
+    struct = not_implemented()
diff --git a/narwhals/_sql/namespace.py b/narwhals/_sql/namespace.py
@@ -7,6 +7,7 @@
 from narwhals._compliant import LazyNamespace
 from narwhals._compliant.typing import NativeExprT, NativeFrameT
 from narwhals._sql.typing import SQLExprT, SQLLazyFrameT
+from narwhals._utils import not_implemented
 
 if TYPE_CHECKING:
     from collections.abc import Iterable
@@ -86,3 +87,5 @@ def func_with_otherwise(cols: list[NativeExprT]) -> NativeExprT:
         return self._expr._from_elementwise_horizontal_op(
             func_with_otherwise, then, predicate, otherwise
         )
+
+    struct = not_implemented()
diff --git a/narwhals/functions.py b/narwhals/functions.py
@@ -1587,6 +1587,46 @@ def concat_str(
     )
 
 
+def struct(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
+    r"""Horizontally combine multiple columns into a single struct column.
+
+    Arguments:
+        exprs: One or more expressions to combine into a struct. Strings are treated as column names.
+        *more_exprs: Additional columns or expressions, passed as positional arguments.
+
+    Returns:
+        An expression that produces a single struct column containing the given fields.
+
+    Example:
+        >>> import pandas as pd
+        >>> import narwhals as nw
+        >>>
+        >>> data = {
+        ...     "a": [1, 2, 3],
+        ...     "b": ["dogs", "cats", None],
+        ...     "c": ["play", "swim", "walk"],
+        ... }
+        >>> df_native = pd.DataFrame(data)
+        >>> (
+        ...     nw.from_native(df_native).select(
+        ...         nw.struct([nw.col("a") * 2, nw.col("b"), nw.col("c")]).alias(
+        ...             "my_struct"
+        ...         )
+        ...     )
+        ... )
+        ┌─────────────────────────────────────┐
+        |         Narwhals DataFrame          |
+        |-------------------------------------|
+        |                            my_struct|
+        |0  {'a': 2, 'b': 'dogs', 'c': 'play'}|
+        |1  {'a': 4, 'b': 'cats', 'c': 'swim'}|
+        |2    {'a': 6, 'b': None, 'c': 'walk'}|
+        └─────────────────────────────────────┘
+    """
+    flat_exprs = flatten([*flatten([exprs]), *more_exprs])
+    return _expr_with_horizontal_op("struct", *flat_exprs)
+
+
 def coalesce(
     exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr | NonNestedLiteral
 ) -> Expr:

diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py
@@ -1163,6 +1163,10 @@ def concat_str(
     )
 
 
+def struct(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
+    return _stableify(nw.struct(exprs, *more_exprs))
+
+
 def format(f_string: str, *args: IntoExpr) -> Expr:
     """Format expressions as a string."""
     return _stableify(nw.format(f_string, *args))
@@ -1440,6 +1444,7 @@ def scan_parquet(
     "scan_parquet",
     "selectors",
     "show_versions",
+    "struct",
     "sum",
     "sum_horizontal",
     "to_native",

diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py
@@ -916,6 +916,45 @@ def concat_str(
     )
 
 
+def struct(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
+    r"""Horizontally combine multiple columns into a single struct column.
+
+    Arguments:
+        exprs: One or more expressions to combine into a struct. Strings are treated as column names.
+        *more_exprs: Additional columns or expressions, passed as positional arguments.
+
+    Returns:
+        An expression that produces a single struct column containing the given fields.
+
+    Example:
+        >>> import pandas as pd
+        >>> import narwhals as nw
+        >>>
+        >>> data = {
+        ...     "a": [1, 2, 3],
+        ...     "b": ["dogs", "cats", None],
+        ...     "c": ["play", "swim", "walk"],
+        ... }
+        >>> df_native = pd.DataFrame(data)
+        >>> (
+        ...     nw.from_native(df_native).select(
+        ...         nw.struct([nw.col("a") * 2, nw.col("b"), nw.col("c")]).alias(
+        ...             "my_struct"
+        ...         )
+        ...     )
+        ... )
+        ┌─────────────────────────────────────┐
+        |         Narwhals DataFrame          |
+        |-------------------------------------|
+        |                            my_struct|
+        |0  {'a': 2, 'b': 'dogs', 'c': 'play'}|
+        |1  {'a': 4, 'b': 'cats', 'c': 'swim'}|
+        |2    {'a': 6, 'b': None, 'c': 'walk'}|
+        └─────────────────────────────────────┘
+    """
+    return _stableify(nw.struct(exprs, *more_exprs))
+
+
 def format(f_string: str, *args: IntoExpr) -> Expr:
     """Format expressions as a string.
 
@@ -1279,6 +1318,7 @@ def scan_parquet(
     "selectors",
     "selectors",
     "show_versions",
+    "struct",
     "sum",
     "sum_horizontal",
     "to_native",

diff --git a/tests/expr_and_series/struct_test.py b/tests/expr_and_series/struct_test.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+
+import pytest
+
+import narwhals as nw
+from tests.utils import POLARS_VERSION, Constructor, assert_equal_data
+
+pytest.importorskip("pyarrow")
+
+data = {"a": [1, 2, 3], "b": ["dogs", "cats", None], "c": ["play", "swim", "walk"]}
+
+
+def test_struct(constructor: Constructor, *, request: pytest.FixtureRequest) -> None:
+    if "polars" in str(constructor) and POLARS_VERSION < (1, 0, 0):
+        request.applymarker(pytest.mark.xfail)
+    if any(
+        x in str(constructor) for x in ("dask", "duckdb", "ibis", "pyspark", "sqlframe")
+    ):
+        request.applymarker(pytest.mark.xfail(reason="Not supported / not implemented"))
+
+    df = nw.from_native(constructor(data))
+    result = df.select(nw.struct([nw.col("a"), nw.col("b"), nw.col("c")]).alias("struct"))
+
+    expected = {
+        "struct": [
+            {"a": 1, "b": "dogs", "c": "play"},
+            {"a": 2, "b": "cats", "c": "swim"},
+            {"a": 3, "b": None, "c": "walk"},
+        ]
+    }
+
+    assert_equal_data(result, expected)