Skip to content

Commit

Permalink
Use Series._from_column more consistently to avoid validation (rapids…
Browse files Browse the repository at this point in the history
…ai#16716)

This modifies cases where `_from_column` provided the same logic or where 1 column was produced so `._from_column` was valid to use

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: rapidsai#16716
  • Loading branch information
mroeschke authored Sep 3, 2024
1 parent 0097b45 commit e18b537
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 21 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/text.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,4 @@ def read_text(object filepaths_or_buffers,
delim,
c_options))

return {None: Column.from_unique_ptr(move(c_col))}
return Column.from_unique_ptr(move(c_col))
15 changes: 4 additions & 11 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,15 +473,8 @@ def __getitem__(self, arg):
ca = self._frame._data
index = self._frame.index
if col_is_scalar:
s = Series._from_data(
data=ColumnAccessor(
{key: ca._data[key] for key in column_names},
multiindex=ca.multiindex,
level_names=ca.level_names,
verify=False,
),
index=index,
)
name = column_names[0]
s = Series._from_column(ca._data[name], name=name, index=index)
return s._getitem_preprocessed(row_spec)
if column_names != list(self._frame._column_names):
frame = self._frame._from_data(
Expand Down Expand Up @@ -7770,8 +7763,8 @@ def interleave_columns(self):
"interleave_columns does not support 'category' dtype."
)

return self._constructor_sliced._from_data(
{None: libcudf.reshape.interleave_columns([*self._columns])}
return self._constructor_sliced._from_column(
libcudf.reshape.interleave_columns([*self._columns])
)

@_performance_tracking
Expand Down
14 changes: 6 additions & 8 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,9 +611,7 @@ def from_masked_array(cls, data, mask, null_count=None):
4 14
dtype: int64
"""
col = as_column(data).set_mask(mask)
ca = ColumnAccessor({None: col}, verify=False)
return cls._from_data(ca)
return cls._from_column(as_column(data).set_mask(mask))

@_performance_tracking
def __init__(
Expand Down Expand Up @@ -1150,7 +1148,7 @@ def reset_index(
if name is no_default:
name = 0 if self.name is None else self.name
data[name] = data.pop(self.name)
return cudf.core.dataframe.DataFrame._from_data(data, index)
return self._constructor_expanddim._from_data(data, index)
# For ``name`` behavior, see:
# https://github.com/pandas-dev/pandas/issues/44575
# ``name`` has to be ignored when `drop=True`
Expand Down Expand Up @@ -1661,9 +1659,7 @@ def _concat(cls, objs, axis=0, index: bool = True):
if len(objs):
col = col._with_type_metadata(objs[0].dtype)

return cls._from_data(
ColumnAccessor({name: col}, verify=False), index=result_index
)
return cls._from_column(col, name=name, index=result_index)

@property # type: ignore
@_performance_tracking
Expand Down Expand Up @@ -1977,7 +1973,9 @@ def between(self, left, right, inclusive="both") -> Series:
"Inclusive has to be either string of 'both', "
"'left', 'right', or 'neither'."
)
return self._from_data({self.name: lmask & rmask}, self.index)
return self._from_column(
lmask & rmask, name=self.name, index=self.index
)

@_performance_tracking
def all(self, axis=0, bool_only=None, skipna=True, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/io/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def read_text(
filepath_or_buffer, "read_text"
)

return cudf.Series._from_data(
return cudf.Series._from_column(
libtext.read_text(
filepath_or_buffer,
delimiter=delimiter,
Expand Down

0 comments on commit e18b537

Please sign in to comment.