2
2
3
3
from __future__ import annotations
4
4
5
+ import copy
5
6
import functools
6
7
import inspect
7
8
import itertools
@@ -1002,7 +1003,7 @@ def __init__(
1002
1003
1003
1004
second_index = None
1004
1005
second_columns = None
1005
-
1006
+ attrs = None
1006
1007
if isinstance (data , (DataFrame , pd .DataFrame )):
1007
1008
if isinstance (data , pd .DataFrame ):
1008
1009
cols = {
@@ -1017,6 +1018,7 @@ def __init__(
1017
1018
col_accessor = data ._data
1018
1019
index , second_index = data .index , index
1019
1020
second_columns = columns
1021
+ attrs = data .attrs
1020
1022
elif isinstance (data , (Series , pd .Series )):
1021
1023
if isinstance (data , pd .Series ):
1022
1024
data = Series (data , nan_as_null = nan_as_null )
@@ -1197,7 +1199,7 @@ def __init__(
1197
1199
label_dtype = second_columns .dtype ,
1198
1200
)
1199
1201
1200
- super ().__init__ (col_accessor , index = index )
1202
+ super ().__init__ (col_accessor , index = index , attrs = attrs )
1201
1203
if second_index is not None :
1202
1204
reindexed = self .reindex (index = second_index , copy = False )
1203
1205
self ._data = reindexed ._data
@@ -1207,13 +1209,14 @@ def __init__(
1207
1209
self ._data = self .astype (dtype )._data
1208
1210
1209
1211
@classmethod
1210
- def _from_data (
1212
+ def _from_data ( # type: ignore[override]
1211
1213
cls ,
1212
1214
data : MutableMapping ,
1213
1215
index : Index | None = None ,
1214
1216
columns : Any = None ,
1217
+ attrs : dict | None = None ,
1215
1218
) -> Self :
1216
- out = super ()._from_data (data = data , index = index )
1219
+ out = super ()._from_data (data = data , index = index , attrs = attrs )
1217
1220
if columns is not None :
1218
1221
out .columns = columns
1219
1222
return out
@@ -1370,10 +1373,10 @@ def _getitem_preprocessed(
1370
1373
inputs.
1371
1374
"""
1372
1375
if col_is_scalar :
1373
- series = Series ._from_data (ca , index = self .index )
1376
+ series = Series ._from_data (ca , index = self .index , attrs = self . attrs )
1374
1377
return series ._getitem_preprocessed (spec )
1375
1378
if ca .names != self ._column_names :
1376
- frame = self ._from_data (ca , index = self .index )
1379
+ frame = self ._from_data (ca , index = self .index , attrs = self . attrs )
1377
1380
else :
1378
1381
frame = self
1379
1382
if isinstance (spec , indexing_utils .MapIndexer ):
@@ -1405,6 +1408,7 @@ def _getitem_preprocessed(
1405
1408
)
1406
1409
result .index = result_index
1407
1410
result .name = new_name
1411
+ result ._attrs = frame .attrs
1408
1412
return result
1409
1413
except TypeError :
1410
1414
if get_option ("mode.pandas_compatible" ):
@@ -1501,7 +1505,9 @@ def __getitem__(self, arg):
1501
1505
and all (n == "" for n in out ._column_names [0 ])
1502
1506
)
1503
1507
):
1504
- out = self ._constructor_sliced ._from_data (out ._data )
1508
+ out = self ._constructor_sliced ._from_data (
1509
+ out ._data , attrs = self .attrs
1510
+ )
1505
1511
out ._data .multiindex = False
1506
1512
out .index = self .index
1507
1513
out .name = arg
@@ -3436,17 +3442,16 @@ def reset_index(
3436
3442
allow_duplicates : bool = False ,
3437
3443
names : Hashable | Sequence [Hashable ] | None = None ,
3438
3444
):
3445
+ data , index = self ._reset_index (
3446
+ level = level ,
3447
+ drop = drop ,
3448
+ col_level = col_level ,
3449
+ col_fill = col_fill ,
3450
+ allow_duplicates = allow_duplicates ,
3451
+ names = names ,
3452
+ )
3439
3453
return self ._mimic_inplace (
3440
- DataFrame ._from_data (
3441
- * self ._reset_index (
3442
- level = level ,
3443
- drop = drop ,
3444
- col_level = col_level ,
3445
- col_fill = col_fill ,
3446
- allow_duplicates = allow_duplicates ,
3447
- names = names ,
3448
- )
3449
- ),
3454
+ DataFrame ._from_data (data = data , index = index , attrs = self .attrs ),
3450
3455
inplace = inplace ,
3451
3456
)
3452
3457
@@ -4328,6 +4333,7 @@ def transpose(self) -> Self:
4328
4333
result = type (self )._from_data (
4329
4334
ColumnAccessor (dict (enumerate (result_columns )), verify = False ),
4330
4335
index = Index (index ),
4336
+ attrs = self .attrs ,
4331
4337
)
4332
4338
# Set the old index as the new column names
4333
4339
result .columns = self .index
@@ -5071,7 +5077,7 @@ def _func(x): # pragma: no cover
5071
5077
apply_sr = Series ._from_column (col )
5072
5078
result [name ] = apply_sr .apply (_func )._column
5073
5079
5074
- return DataFrame ._from_data (result , index = self .index )
5080
+ return DataFrame ._from_data (result , index = self .index , attrs = self . attrs )
5075
5081
5076
5082
@_performance_tracking
5077
5083
@applyutils .doc_applychunks ()
@@ -5667,6 +5673,7 @@ def to_pandas(
5667
5673
5668
5674
out_df = pd .DataFrame (out_data , index = out_index )
5669
5675
out_df .columns = self ._data .to_pandas_index
5676
+ out_df .attrs = self .attrs
5670
5677
5671
5678
return out_df
5672
5679
@@ -5720,6 +5727,7 @@ def from_pandas(cls, dataframe, nan_as_null=no_default):
5720
5727
df = cls ._from_data (data , index )
5721
5728
# Checks duplicate columns and sets column metadata
5722
5729
df .columns = dataframe .columns
5730
+ df ._attrs = copy .deepcopy (dataframe .attrs )
5723
5731
return df
5724
5732
else :
5725
5733
raise TypeError (
@@ -6310,7 +6318,10 @@ def quantile(
6310
6318
if q_is_number :
6311
6319
result = result .transpose ()
6312
6320
return Series ._from_column (
6313
- result ._columns [0 ], name = q , index = result .index
6321
+ result ._columns [0 ],
6322
+ name = q ,
6323
+ index = result .index ,
6324
+ attrs = self .attrs ,
6314
6325
)
6315
6326
elif method == "single" :
6316
6327
# Ensure that qs is non-scalar so that we always get a column back.
@@ -6328,7 +6339,7 @@ def quantile(
6328
6339
if len (res ) == 0 :
6329
6340
res = column_empty (row_count = len (qs ), dtype = ser .dtype )
6330
6341
result [k ] = res
6331
- result = DataFrame ._from_data (result )
6342
+ result = DataFrame ._from_data (result , attrs = self . attrs )
6332
6343
6333
6344
if q_is_number and numeric_only :
6334
6345
result = result .fillna (np .nan ).iloc [0 ]
@@ -6478,7 +6489,7 @@ def make_false_column_like_self():
6478
6489
)
6479
6490
6480
6491
# TODO: Update this logic to properly preserve MultiIndex columns.
6481
- return DataFrame ._from_data (result , self .index )
6492
+ return DataFrame ._from_data (result , self .index , attrs = self . attrs )
6482
6493
6483
6494
#
6484
6495
# Stats
@@ -6590,6 +6601,7 @@ def count(self, axis=0, numeric_only=False):
6590
6601
]
6591
6602
),
6592
6603
index = Index (self ._column_names ),
6604
+ attrs = self .attrs ,
6593
6605
)
6594
6606
6595
6607
_SUPPORT_AXIS_LOOKUP = {
@@ -6639,12 +6651,14 @@ def _reduce(
6639
6651
)
6640
6652
source = self ._get_columns_by_label (numeric_cols )
6641
6653
if source .empty :
6642
- return Series (
6654
+ res = Series (
6643
6655
index = self ._data .to_pandas_index [:0 ]
6644
6656
if axis == 0
6645
6657
else source .index ,
6646
6658
dtype = "float64" ,
6647
6659
)
6660
+ res ._attrs = self ._attrs
6661
+ return res
6648
6662
if (
6649
6663
axis == 2
6650
6664
and op in {"kurtosis" , "skew" }
@@ -6746,7 +6760,7 @@ def _reduce(
6746
6760
new_dtype = get_dtype_of_same_kind (common_dtype , res_dtype )
6747
6761
res = res .astype (new_dtype )
6748
6762
6749
- return Series ._from_column (res , index = idx )
6763
+ return Series ._from_column (res , index = idx , attrs = self . attrs )
6750
6764
6751
6765
@_performance_tracking
6752
6766
def _scan (
@@ -7028,10 +7042,13 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
7028
7042
result = as_column (result , dtype = result_dtype )
7029
7043
if mask is not None :
7030
7044
result = result .set_mask (mask ._column .as_mask ())
7031
- return Series ._from_column (result , index = self .index )
7045
+ return Series ._from_column (
7046
+ result , index = self .index , attrs = self .attrs
7047
+ )
7032
7048
else :
7033
7049
result_df = DataFrame (result , index = self .index )
7034
7050
result_df ._set_columns_like (prepared ._data )
7051
+ result_df ._attrs = self .attrs
7035
7052
return result_df
7036
7053
7037
7054
@_performance_tracking
@@ -7652,7 +7669,9 @@ def unnamed_group_generator():
7652
7669
7653
7670
# Construct the resulting dataframe / series
7654
7671
if not has_unnamed_levels :
7655
- result = Series ._from_column (stacked [0 ], index = new_index )
7672
+ result = Series ._from_column (
7673
+ stacked [0 ], index = new_index , attrs = self .attrs
7674
+ )
7656
7675
else :
7657
7676
if unnamed_level_values .nlevels == 1 :
7658
7677
unnamed_level_values = unnamed_level_values .get_level_values (0 )
@@ -7677,7 +7696,9 @@ def unnamed_group_generator():
7677
7696
unnamed_level_values .names ,
7678
7697
)
7679
7698
7680
- result = DataFrame ._from_data (data , index = new_index )
7699
+ result = DataFrame ._from_data (
7700
+ data , index = new_index , attrs = self .attrs
7701
+ )
7681
7702
7682
7703
if not future_stack and dropna :
7683
7704
return result .dropna (how = "all" )
@@ -7724,6 +7745,7 @@ def cov(self, min_periods=None, ddof: int = 1, numeric_only: bool = False):
7724
7745
cols = self ._data .to_pandas_index
7725
7746
df = DataFrame (cupy .asfortranarray (cov ), index = cols )
7726
7747
df ._set_columns_like (self ._data )
7748
+ df ._attrs = self .attrs
7727
7749
return df
7728
7750
7729
7751
def corr (
@@ -7770,6 +7792,7 @@ def corr(
7770
7792
cols = self ._data .to_pandas_index
7771
7793
df = DataFrame (cupy .asfortranarray (corr ), index = cols )
7772
7794
df ._set_columns_like (self ._data )
7795
+ df ._attrs = self .attrs
7773
7796
return df
7774
7797
7775
7798
@_performance_tracking
@@ -8080,9 +8103,13 @@ def nunique(self, axis=0, dropna: bool = True) -> Series:
8080
8103
if axis != 0 :
8081
8104
raise NotImplementedError ("axis parameter is not supported yet." )
8082
8105
counts = [col .distinct_count (dropna = dropna ) for col in self ._columns ]
8083
- return self ._constructor_sliced (
8084
- counts , index = self ._data .to_pandas_index
8106
+ res = self ._constructor_sliced (
8107
+ counts ,
8108
+ index = self ._data .to_pandas_index ,
8109
+ dtype = "float64" if len (counts ) == 0 else None ,
8085
8110
)
8111
+ res ._attrs = self .attrs
8112
+ return res
8086
8113
8087
8114
def _sample_axis_1 (
8088
8115
self ,
0 commit comments