Skip to content

Commit 5a0a6dd

Browse files
authored
Merge pull request #20037 from rapidsai/branch-25.10
Forward-merge branch-25.10 into branch-25.12
2 parents ee84ea4 + c2cb7bd commit 5a0a6dd

File tree

9 files changed

+193
-759
lines changed

9 files changed

+193
-759
lines changed

docs/cudf/source/user_guide/api_docs/dataframe.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,14 @@ Time Series-related
246246
DataFrame.shift
247247
DataFrame.resample
248248

249+
250+
Metadata
251+
~~~~~~~~
252+
.. autosummary::
253+
:toctree: api/
254+
255+
DataFrame.attrs
256+
249257
Serialization / IO / conversion
250258
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
251259
.. autosummary::

docs/cudf/source/user_guide/api_docs/series.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,13 @@ Time Series-related
236236
Series.shift
237237
Series.resample
238238

239+
Metadata
240+
~~~~~~~~
241+
.. autosummary::
242+
:toctree: api/
243+
244+
Series.attrs
245+
239246
Accessors
240247
---------
241248

python/cudf/cudf/core/accessors/base_accessor.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,16 @@ def _return_or_inplace(
8181
return idx
8282
else:
8383
return self._parent._constructor_expanddim._from_data(
84-
data=table, index=self._parent.index
84+
data=table,
85+
index=self._parent.index,
86+
attrs=self._parent.attrs,
8587
)
8688
elif isinstance(self._parent, cudf.Series):
8789
return cudf.Series._from_column(
8890
new_col,
8991
name=self._parent.name,
9092
index=self._parent.index if retain_index else None,
93+
attrs=self._parent.attrs,
9194
)
9295
elif isinstance(self._parent, cudf.Index):
9396
return cudf.Index._from_column(new_col, name=self._parent.name)

python/cudf/cudf/core/dataframe.py

Lines changed: 55 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import copy
56
import functools
67
import inspect
78
import itertools
@@ -1002,7 +1003,7 @@ def __init__(
10021003

10031004
second_index = None
10041005
second_columns = None
1005-
1006+
attrs = None
10061007
if isinstance(data, (DataFrame, pd.DataFrame)):
10071008
if isinstance(data, pd.DataFrame):
10081009
cols = {
@@ -1017,6 +1018,7 @@ def __init__(
10171018
col_accessor = data._data
10181019
index, second_index = data.index, index
10191020
second_columns = columns
1021+
attrs = data.attrs
10201022
elif isinstance(data, (Series, pd.Series)):
10211023
if isinstance(data, pd.Series):
10221024
data = Series(data, nan_as_null=nan_as_null)
@@ -1197,7 +1199,7 @@ def __init__(
11971199
label_dtype=second_columns.dtype,
11981200
)
11991201

1200-
super().__init__(col_accessor, index=index)
1202+
super().__init__(col_accessor, index=index, attrs=attrs)
12011203
if second_index is not None:
12021204
reindexed = self.reindex(index=second_index, copy=False)
12031205
self._data = reindexed._data
@@ -1207,13 +1209,14 @@ def __init__(
12071209
self._data = self.astype(dtype)._data
12081210

12091211
@classmethod
1210-
def _from_data(
1212+
def _from_data( # type: ignore[override]
12111213
cls,
12121214
data: MutableMapping,
12131215
index: Index | None = None,
12141216
columns: Any = None,
1217+
attrs: dict | None = None,
12151218
) -> Self:
1216-
out = super()._from_data(data=data, index=index)
1219+
out = super()._from_data(data=data, index=index, attrs=attrs)
12171220
if columns is not None:
12181221
out.columns = columns
12191222
return out
@@ -1370,10 +1373,10 @@ def _getitem_preprocessed(
13701373
inputs.
13711374
"""
13721375
if col_is_scalar:
1373-
series = Series._from_data(ca, index=self.index)
1376+
series = Series._from_data(ca, index=self.index, attrs=self.attrs)
13741377
return series._getitem_preprocessed(spec)
13751378
if ca.names != self._column_names:
1376-
frame = self._from_data(ca, index=self.index)
1379+
frame = self._from_data(ca, index=self.index, attrs=self.attrs)
13771380
else:
13781381
frame = self
13791382
if isinstance(spec, indexing_utils.MapIndexer):
@@ -1405,6 +1408,7 @@ def _getitem_preprocessed(
14051408
)
14061409
result.index = result_index
14071410
result.name = new_name
1411+
result._attrs = frame.attrs
14081412
return result
14091413
except TypeError:
14101414
if get_option("mode.pandas_compatible"):
@@ -1501,7 +1505,9 @@ def __getitem__(self, arg):
15011505
and all(n == "" for n in out._column_names[0])
15021506
)
15031507
):
1504-
out = self._constructor_sliced._from_data(out._data)
1508+
out = self._constructor_sliced._from_data(
1509+
out._data, attrs=self.attrs
1510+
)
15051511
out._data.multiindex = False
15061512
out.index = self.index
15071513
out.name = arg
@@ -3436,17 +3442,16 @@ def reset_index(
34363442
allow_duplicates: bool = False,
34373443
names: Hashable | Sequence[Hashable] | None = None,
34383444
):
3445+
data, index = self._reset_index(
3446+
level=level,
3447+
drop=drop,
3448+
col_level=col_level,
3449+
col_fill=col_fill,
3450+
allow_duplicates=allow_duplicates,
3451+
names=names,
3452+
)
34393453
return self._mimic_inplace(
3440-
DataFrame._from_data(
3441-
*self._reset_index(
3442-
level=level,
3443-
drop=drop,
3444-
col_level=col_level,
3445-
col_fill=col_fill,
3446-
allow_duplicates=allow_duplicates,
3447-
names=names,
3448-
)
3449-
),
3454+
DataFrame._from_data(data=data, index=index, attrs=self.attrs),
34503455
inplace=inplace,
34513456
)
34523457

@@ -4328,6 +4333,7 @@ def transpose(self) -> Self:
43284333
result = type(self)._from_data(
43294334
ColumnAccessor(dict(enumerate(result_columns)), verify=False),
43304335
index=Index(index),
4336+
attrs=self.attrs,
43314337
)
43324338
# Set the old index as the new column names
43334339
result.columns = self.index
@@ -5071,7 +5077,7 @@ def _func(x): # pragma: no cover
50715077
apply_sr = Series._from_column(col)
50725078
result[name] = apply_sr.apply(_func)._column
50735079

5074-
return DataFrame._from_data(result, index=self.index)
5080+
return DataFrame._from_data(result, index=self.index, attrs=self.attrs)
50755081

50765082
@_performance_tracking
50775083
@applyutils.doc_applychunks()
@@ -5667,6 +5673,7 @@ def to_pandas(
56675673

56685674
out_df = pd.DataFrame(out_data, index=out_index)
56695675
out_df.columns = self._data.to_pandas_index
5676+
out_df.attrs = self.attrs
56705677

56715678
return out_df
56725679

@@ -5720,6 +5727,7 @@ def from_pandas(cls, dataframe, nan_as_null=no_default):
57205727
df = cls._from_data(data, index)
57215728
# Checks duplicate columns and sets column metadata
57225729
df.columns = dataframe.columns
5730+
df._attrs = copy.deepcopy(dataframe.attrs)
57235731
return df
57245732
else:
57255733
raise TypeError(
@@ -6310,7 +6318,10 @@ def quantile(
63106318
if q_is_number:
63116319
result = result.transpose()
63126320
return Series._from_column(
6313-
result._columns[0], name=q, index=result.index
6321+
result._columns[0],
6322+
name=q,
6323+
index=result.index,
6324+
attrs=self.attrs,
63146325
)
63156326
elif method == "single":
63166327
# Ensure that qs is non-scalar so that we always get a column back.
@@ -6328,7 +6339,7 @@ def quantile(
63286339
if len(res) == 0:
63296340
res = column_empty(row_count=len(qs), dtype=ser.dtype)
63306341
result[k] = res
6331-
result = DataFrame._from_data(result)
6342+
result = DataFrame._from_data(result, attrs=self.attrs)
63326343

63336344
if q_is_number and numeric_only:
63346345
result = result.fillna(np.nan).iloc[0]
@@ -6478,7 +6489,7 @@ def make_false_column_like_self():
64786489
)
64796490

64806491
# TODO: Update this logic to properly preserve MultiIndex columns.
6481-
return DataFrame._from_data(result, self.index)
6492+
return DataFrame._from_data(result, self.index, attrs=self.attrs)
64826493

64836494
#
64846495
# Stats
@@ -6590,6 +6601,7 @@ def count(self, axis=0, numeric_only=False):
65906601
]
65916602
),
65926603
index=Index(self._column_names),
6604+
attrs=self.attrs,
65936605
)
65946606

65956607
_SUPPORT_AXIS_LOOKUP = {
@@ -6639,12 +6651,14 @@ def _reduce(
66396651
)
66406652
source = self._get_columns_by_label(numeric_cols)
66416653
if source.empty:
6642-
return Series(
6654+
res = Series(
66436655
index=self._data.to_pandas_index[:0]
66446656
if axis == 0
66456657
else source.index,
66466658
dtype="float64",
66476659
)
6660+
res._attrs = self._attrs
6661+
return res
66486662
if (
66496663
axis == 2
66506664
and op in {"kurtosis", "skew"}
@@ -6746,7 +6760,7 @@ def _reduce(
67466760
new_dtype = get_dtype_of_same_kind(common_dtype, res_dtype)
67476761
res = res.astype(new_dtype)
67486762

6749-
return Series._from_column(res, index=idx)
6763+
return Series._from_column(res, index=idx, attrs=self.attrs)
67506764

67516765
@_performance_tracking
67526766
def _scan(
@@ -7028,10 +7042,13 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
70287042
result = as_column(result, dtype=result_dtype)
70297043
if mask is not None:
70307044
result = result.set_mask(mask._column.as_mask())
7031-
return Series._from_column(result, index=self.index)
7045+
return Series._from_column(
7046+
result, index=self.index, attrs=self.attrs
7047+
)
70327048
else:
70337049
result_df = DataFrame(result, index=self.index)
70347050
result_df._set_columns_like(prepared._data)
7051+
result_df._attrs = self.attrs
70357052
return result_df
70367053

70377054
@_performance_tracking
@@ -7652,7 +7669,9 @@ def unnamed_group_generator():
76527669

76537670
# Construct the resulting dataframe / series
76547671
if not has_unnamed_levels:
7655-
result = Series._from_column(stacked[0], index=new_index)
7672+
result = Series._from_column(
7673+
stacked[0], index=new_index, attrs=self.attrs
7674+
)
76567675
else:
76577676
if unnamed_level_values.nlevels == 1:
76587677
unnamed_level_values = unnamed_level_values.get_level_values(0)
@@ -7677,7 +7696,9 @@ def unnamed_group_generator():
76777696
unnamed_level_values.names,
76787697
)
76797698

7680-
result = DataFrame._from_data(data, index=new_index)
7699+
result = DataFrame._from_data(
7700+
data, index=new_index, attrs=self.attrs
7701+
)
76817702

76827703
if not future_stack and dropna:
76837704
return result.dropna(how="all")
@@ -7724,6 +7745,7 @@ def cov(self, min_periods=None, ddof: int = 1, numeric_only: bool = False):
77247745
cols = self._data.to_pandas_index
77257746
df = DataFrame(cupy.asfortranarray(cov), index=cols)
77267747
df._set_columns_like(self._data)
7748+
df._attrs = self.attrs
77277749
return df
77287750

77297751
def corr(
@@ -7770,6 +7792,7 @@ def corr(
77707792
cols = self._data.to_pandas_index
77717793
df = DataFrame(cupy.asfortranarray(corr), index=cols)
77727794
df._set_columns_like(self._data)
7795+
df._attrs = self.attrs
77737796
return df
77747797

77757798
@_performance_tracking
@@ -8080,9 +8103,13 @@ def nunique(self, axis=0, dropna: bool = True) -> Series:
80808103
if axis != 0:
80818104
raise NotImplementedError("axis parameter is not supported yet.")
80828105
counts = [col.distinct_count(dropna=dropna) for col in self._columns]
8083-
return self._constructor_sliced(
8084-
counts, index=self._data.to_pandas_index
8106+
res = self._constructor_sliced(
8107+
counts,
8108+
index=self._data.to_pandas_index,
8109+
dtype="float64" if len(counts) == 0 else None,
80858110
)
8111+
res._attrs = self.attrs
8112+
return res
80868113

80878114
def _sample_axis_1(
80888115
self,

0 commit comments

Comments
 (0)