Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
234f04e
WIP
FBruzzesi Sep 6, 2025
57590a4
xfail sql-like
FBruzzesi Sep 6, 2025
b4d0d36
additional test and docstrings
FBruzzesi Sep 6, 2025
83fd5eb
got creative with polars
FBruzzesi Sep 10, 2025
cd25427
duckdb implementation
FBruzzesi Sep 10, 2025
ddd6d41
not implemented for old duckdb
FBruzzesi Sep 10, 2025
93c413b
merge main
FBruzzesi Sep 10, 2025
1ef1ad5
pragma no cover old version
FBruzzesi Sep 10, 2025
4164bb7
test: Add a `polars` edge case
dangotbanned Sep 11, 2025
599fb25
fix leading non-alphanum
FBruzzesi Sep 11, 2025
aa40799
pyspark :)
FBruzzesi Sep 11, 2025
26d048b
Merge branch 'main' into feat/str-to-titlecase
FBruzzesi Sep 11, 2025
0bba2d9
use duckdb lambda expression
FBruzzesi Sep 11, 2025
a9963e1
add no cover for entire method since pyspark runs its CI
FBruzzesi Sep 11, 2025
1a4d17a
docstrings update, skip old duckdb
FBruzzesi Sep 11, 2025
02edde4
refactor(suggestion): Lightly sugar `LambdaExpression`
dangotbanned Sep 11, 2025
d93e932
refactor: Rename to `elem`
dangotbanned Sep 11, 2025
7bba817
merge main
FBruzzesi Sep 18, 2025
99952d2
use @requires.backend_version for duckdb
FBruzzesi Sep 18, 2025
19fdcda
enable sqlframe
FBruzzesi Sep 19, 2025
88bb25b
Merge branch 'main' into feat/str-to-titlecase
FBruzzesi Sep 21, 2025
95ffe8e
apply (some) suggestions
FBruzzesi Sep 21, 2025
e55e269
Merge branch 'main' into feat/str-to-titlecase
FBruzzesi Oct 6, 2025
6128b49
refactor test, change table to list in docstring warning
FBruzzesi Oct 6, 2025
515e4a1
Merge branch 'main' into feat/str-to-titlecase
dangotbanned Oct 6, 2025
9a8974d
ci: Exclude coverage for `BACKEND_VERSION <`
dangotbanned Oct 6, 2025
5728b9b
Apply suggestions
FBruzzesi Oct 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/expr_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
- to_date
- to_datetime
- to_lowercase
- to_titlecase
- to_uppercase
- zfill
show_source: false
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
- to_date
- to_datetime
- to_lowercase
- to_titlecase
- to_uppercase
- zfill
show_source: false
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_arrow/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ def to_uppercase(self) -> ArrowSeries:
def to_lowercase(self) -> ArrowSeries:
return self.with_native(pc.utf8_lower(self.native))

def to_titlecase(self) -> ArrowSeries:
return self.with_native(pc.utf8_title(self.native))

def zfill(self, width: int) -> ArrowSeries:
binary_join: Incomplete = pc.binary_join_element_wise
native = self.native
Expand Down
1 change: 1 addition & 0 deletions narwhals/_compliant/any_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def split(self, by: str) -> CompliantT_co: ...
def to_datetime(self, format: str | None) -> CompliantT_co: ...
def to_date(self, format: str | None) -> CompliantT_co: ...
def to_lowercase(self) -> CompliantT_co: ...
def to_titlecase(self) -> CompliantT_co: ...
def to_uppercase(self) -> CompliantT_co: ...
def zfill(self, width: int) -> CompliantT_co: ...

Expand Down
3 changes: 3 additions & 0 deletions narwhals/_compliant/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,9 @@ def to_uppercase(self) -> EagerExprT:
def zfill(self, width: int) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "zfill", width=width)

def to_titlecase(self) -> EagerExprT:
return self.compliant._reuse_series_namespace("str", "to_titlecase")


class EagerExprStructNamespace(
EagerExprNamespace[EagerExprT], StructNamespace[EagerExprT], Generic[EagerExprT]
Expand Down
5 changes: 5 additions & 0 deletions narwhals/_dask/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ def to_lowercase(self) -> DaskExpr:
lambda expr: expr.str.lower(), "to_lowercase"
)

def to_titlecase(self) -> DaskExpr:
return self.compliant._with_callable(
lambda expr: expr.str.title(), "to_titlecase"
)

def zfill(self, width: int) -> DaskExpr:
return self.compliant._with_callable(
lambda expr, width: expr.str.zfill(width), "zfill", width=width
Expand Down
27 changes: 25 additions & 2 deletions narwhals/_duckdb/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

from typing import TYPE_CHECKING

from narwhals._duckdb.utils import F, lit
from narwhals._duckdb.utils import F, col, concat_str, lit
from narwhals._sql.expr_str import SQLExprStringNamespace
from narwhals._utils import not_implemented
from narwhals._utils import not_implemented, requires

if TYPE_CHECKING:
from duckdb import Expression

from narwhals._duckdb.expr import DuckDBExpr


Expand All @@ -27,4 +29,25 @@ def to_date(self, format: str | None) -> DuckDBExpr:
compliant_expr = self.compliant
return compliant_expr.cast(compliant_expr._version.dtypes.Date())

@requires.backend_version((1, 2))
def to_titlecase(self) -> DuckDBExpr:
from narwhals._duckdb.utils import lambda_expr

def _to_titlecase(expr: Expression) -> Expression:
extract_expr = F(
"regexp_extract_all", F("lower", expr), lit(r"[a-z0-9]*[^a-z0-9]*")
)
elem = col("_")
capitalize = lambda_expr(
elem,
concat_str(
F("upper", F("array_extract", elem, lit(1))),
F("substring", elem, lit(2)),
),
)
capitalized_expr = F("list_transform", extract_expr, capitalize)
return F("list_aggregate", capitalized_expr, lit("string_agg"), lit(""))

return self.compliant._with_elementwise(_to_titlecase)

replace = not_implemented()
19 changes: 18 additions & 1 deletion narwhals/_duckdb/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import duckdb
import duckdb.typing as duckdb_dtypes
from duckdb import Expression
from duckdb.typing import DuckDBPyType

from narwhals._utils import Version, isinstance_or_issubclass, zip_strict
Expand All @@ -13,7 +14,7 @@
if TYPE_CHECKING:
from collections.abc import Mapping, Sequence

from duckdb import DuckDBPyRelation, Expression
from duckdb import DuckDBPyRelation

from narwhals._compliant.typing import CompliantLazyFrameAny
from narwhals._duckdb.dataframe import DuckDBLazyFrame
Expand Down Expand Up @@ -50,6 +51,22 @@
"""Alias for `duckdb.FunctionExpression`."""


def lambda_expr(
params: str | Expression | tuple[Expression, ...], expr: Expression, /
) -> Expression:
"""Wraps [`duckdb.LambdaExpression`].

[`duckdb.LambdaExpression`]: https://duckdb.org/docs/stable/sql/functions/lambda
"""
try:
from duckdb import LambdaExpression
except ModuleNotFoundError as exc: # pragma: no cover
msg = f"DuckDB>=1.2.0 is required for this operation. Found: DuckDB {duckdb.__version__}"
raise NotImplementedError(msg) from exc
args = (params,) if isinstance(params, Expression) else params
return LambdaExpression(args, expr)


def concat_str(*exprs: Expression, separator: str = "") -> Expression:
"""Concatenate many strings, NULL inputs are skipped.

Expand Down
1 change: 1 addition & 0 deletions narwhals/_ibis/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,4 @@ def fn(expr: ir.StringColumn) -> ir.DateValue:
return self.compliant._with_callable(fn)

replace = not_implemented()
to_titlecase = not_implemented()
3 changes: 3 additions & 0 deletions narwhals/_pandas_like/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,5 +88,8 @@ def to_uppercase(self) -> PandasLikeSeries:
def to_lowercase(self) -> PandasLikeSeries:
return self.with_native(self.native.str.lower())

def to_titlecase(self) -> PandasLikeSeries:
return self.with_native(self.native.str.title())

def zfill(self, width: int) -> PandasLikeSeries:
return self.with_native(self.native.str.zfill(width))
16 changes: 16 additions & 0 deletions narwhals/_polars/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import polars as pl

from narwhals._polars.utils import (
BACKEND_VERSION,
PolarsAnyNamespace,
PolarsCatNamespace,
PolarsDateTimeNamespace,
Expand Down Expand Up @@ -411,6 +412,21 @@ class PolarsExprDateTimeNamespace(
class PolarsExprStringNamespace(
PolarsExprNamespace, PolarsStringNamespace[PolarsExpr, pl.Expr]
):
def to_titlecase(self) -> PolarsExpr:
native_expr = self.native

if BACKEND_VERSION < (1, 5):
native_result = (
native_expr.str.to_lowercase()
.str.extract_all(r"[a-z0-9]*[^a-z0-9]*")
.list.eval(pl.element().str.to_titlecase())
.list.join("")
)
else:
native_result = native_expr.str.to_titlecase()

return self.compliant._with_native(native_result)

@requires.backend_version((0, 20, 5))
def zfill(self, width: int) -> PolarsExpr:
backend_version = self.compliant._backend_version
Expand Down
5 changes: 5 additions & 0 deletions narwhals/_polars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,11 @@ class PolarsSeriesDateTimeNamespace(
class PolarsSeriesStringNamespace(
PolarsSeriesNamespace, PolarsStringNamespace[PolarsSeries, pl.Series]
):
def to_titlecase(self) -> PolarsSeries:
name = self.name
ns = self.__narwhals_namespace__()
return self.to_frame().select(ns.col(name).str.to_titlecase()).get_column(name)

def zfill(self, width: int) -> PolarsSeries:
name = self.name
ns = self.__narwhals_namespace__()
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,9 @@ class PolarsStringNamespace(PolarsAnyNamespace[CompliantT, NativeT_co]):
_accessor: ClassVar[Accessor] = "str"

# NOTE: Use `abstractmethod` if we have defs to implement, but also `Method` usage
@abc.abstractmethod
def to_titlecase(self) -> CompliantT: ...

@abc.abstractmethod
def zfill(self, width: int) -> CompliantT: ...

Expand Down
30 changes: 29 additions & 1 deletion narwhals/_spark_like/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@

from narwhals._spark_like.utils import strptime_to_pyspark_format
from narwhals._sql.expr_str import SQLExprStringNamespace
from narwhals._utils import _is_naive_format, not_implemented
from narwhals._utils import _is_naive_format, not_implemented, requires

if TYPE_CHECKING:
from sqlframe.base.column import Column

from narwhals._spark_like.expr import SparkLikeExpr


Expand All @@ -33,4 +35,30 @@ def to_date(self, format: str | None) -> SparkLikeExpr:
lambda expr: F.to_date(expr, format=strptime_to_pyspark_format(format))
)

def to_titlecase(self) -> SparkLikeExpr:
impl = self.compliant._implementation
sqlframe_required_version = (3, 43, 1)
if (
impl.is_sqlframe()
and (version := impl._backend_version()) < sqlframe_required_version
): # pragma: no cover
required_str = requires._unparse_version(sqlframe_required_version)
found_str = requires._unparse_version(version)
msg = (
f"`str.to_titlecase` is only available in 'sqlframe>={required_str}', "
f"found version {found_str!r}."
)
raise NotImplementedError(msg)
Comment on lines +38 to +51
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this would be a case that #3059 would be nice for?

This'll do for now though πŸ˜„


def _to_titlecase(expr: Column) -> Column:
F = self.compliant._F
lower_expr = F.lower(expr)
extract_expr = F.regexp_extract_all(
lower_expr, regexp=F.lit(r"[a-z0-9]*[^a-z0-9]*"), idx=0
)
capitalized_expr = F.transform(extract_expr, f=F.initcap)
return F.array_join(capitalized_expr, delimiter="")

return self.compliant._with_elementwise(_to_titlecase)

replace = not_implemented()
56 changes: 56 additions & 0 deletions narwhals/expr_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,62 @@ def to_lowercase(self) -> ExprT:
lambda plx: self._expr._to_compliant_expr(plx).str.to_lowercase()
)

def to_titlecase(self) -> ExprT:
"""Modify strings to their titlecase equivalent.

Notes:
This is a form of case transform where the first letter of each word is
capitalized, with the rest of the word in lowercase.

Warning:
Different backends might follow different rules to determine what a "word" is:

- duckdb, polars and spark-like use non-**alphanumeric** characters to
define the word boundaries.
- pandas-like, pyarrow and dask use non-**alphabetic** characters to define
the word boundaries, matching the behavior of
[`str.title`](https://docs.python.org/3/library/stdtypes.html#str.title).

We can observe the difference with the string `"with123numbers"`:

- non-**alphanumeric** -> `"With123numbers"`
- notice lowercase **n** after the digits
- non-**alphabetic** -> `"With123Numbers"`
- notice uppercase **N** after the digits

Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame(
... {
... "quotes": [
... "'e.t. phone home'",
... "you talkin' to me?",
... "to infinity,and BEYOND!",
... ]
... }
... )
>>> df = nw.from_native(df_native)
>>> df.with_columns(quotes_title=nw.col("quotes").str.to_titlecase())
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
| Narwhals DataFrame |
|-----------------------------------------------------|
|shape: (3, 2) |
|β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”|
|β”‚ quotes ┆ quotes_title β”‚|
|β”‚ --- ┆ --- β”‚|
|β”‚ str ┆ str β”‚|
|β•žβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•ͺ═════════════════════════║|
|β”‚ 'e.t. phone home' ┆ 'E.T. Phone Home' β”‚|
|β”‚ you talkin' to me? ┆ You Talkin' To Me? β”‚|
|β”‚ to infinity,and BEYOND! ┆ To Infinity,And Beyond! β”‚|
|β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜|
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
return self._expr._with_elementwise(
lambda plx: self._expr._to_compliant_expr(plx).str.to_titlecase()
)

def zfill(self, width: int) -> ExprT:
"""Transform string to zero-padded variant.

Expand Down
46 changes: 46 additions & 0 deletions narwhals/series_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,52 @@ def to_date(self, format: str | None = None) -> SeriesT:
self._narwhals_series._compliant_series.str.to_date(format=format)
)

def to_titlecase(self) -> SeriesT:
"""Modify strings to their titlecase equivalent.

Notes:
This is a form of case transform where the first letter of each word is
capitalized, with the rest of the word in lowercase.

Warning:
Different backends might follow different rules to determine what a "word" is:

- polars uses **non-alphanumeric** characters to define the word boundaries.
- pandas-like and pyarrow use **non-alphabetic** characters to define
the word boundaries, matching the behavior of
[`str.title`](https://docs.python.org/3/library/stdtypes.html#str.title).

As an example of such difference, in the former case the string `"with123numbers"`
is mapped to `"With123numbers"` (notice lowercase **n** after the digits), while
in the latter to `"With123Numbers"` (notice uppercase **N** after the digits).

Examples:
>>> import pyarrow as pa
>>> import narwhals as nw
>>> s_native = pa.chunked_array(
... [
... [
... "'e.t. phone home'",
... "you talkin' to me?",
... "to infinity,and BEYOND!",
... ]
... ]
... )
>>> s = nw.from_native(s_native, series_only=True)
>>> s.str.to_titlecase().to_native() # doctest: +ELLIPSIS
<pyarrow.lib.ChunkedArray object at ...>
[
[
"'E.T. Phone Home'",
"You Talkin' To Me?",
"To Infinity,And Beyond!"
]
]
"""
return self._narwhals_series._with_compliant(
self._narwhals_series._compliant_series.str.to_titlecase()
)

def zfill(self, width: int) -> SeriesT:
r"""Pad strings with zeros on the left.

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ exclude_also = [
'request.applymarker\(pytest.mark.xfail',
'backend_version <',
'.*._backend_version\(\) <',
'BACKEND_VERSION <',
'if ".*" in str\(constructor',
'pytest.skip\(',
'assert_never\(',
Expand Down
Loading
Loading