Skip to content

Commit

Permalink
Add return type annotations to MultiIndex (rapidsai#16696)
Browse files Browse the repository at this point in the history
Mostly just return type annotations. No logic changes.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: rapidsai#16696
  • Loading branch information
mroeschke authored Sep 3, 2024
1 parent e18b537 commit a83ac6f
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 48 deletions.
2 changes: 2 additions & 0 deletions docs/cudf/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,8 @@ def on_missing_reference(app, env, node, contnode):
("py:obj", "cudf.Index.to_flat_index"),
("py:obj", "cudf.MultiIndex.to_flat_index"),
("py:meth", "pyarrow.Table.to_pandas"),
("py:class", "pd.DataFrame"),
("py:class", "pandas.core.indexes.frozen.FrozenList"),
("py:class", "pa.Array"),
("py:class", "ScalarLike"),
("py:class", "ParentType"),
Expand Down
109 changes: 61 additions & 48 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def to_series(self, index=None, name=None):
)

@_performance_tracking
def astype(self, dtype, copy: bool = True):
def astype(self, dtype, copy: bool = True) -> Self:
if not is_object_dtype(dtype):
raise TypeError(
"Setting a MultiIndex dtype to anything other than object is "
Expand All @@ -256,7 +256,7 @@ def astype(self, dtype, copy: bool = True):
return self

@_performance_tracking
def rename(self, names, inplace=False):
def rename(self, names, inplace: bool = False) -> Self | None:
"""
Alter MultiIndex level names
Expand Down Expand Up @@ -303,7 +303,9 @@ def rename(self, names, inplace=False):
return self.set_names(names, level=None, inplace=inplace)

@_performance_tracking
def set_names(self, names, level=None, inplace=False):
def set_names(
self, names, level=None, inplace: bool = False
) -> Self | None:
names_is_list_like = is_list_like(names)
level_is_list_like = is_list_like(level)

Expand Down Expand Up @@ -345,7 +347,7 @@ def _from_data(
cls,
data: MutableMapping,
name: Any = None,
) -> MultiIndex:
) -> Self:
"""
Use when you have a ColumnAccessor-like mapping but no codes and levels.
"""
Expand Down Expand Up @@ -394,7 +396,7 @@ def copy(
names=None,
deep=False,
name=None,
):
) -> Self:
"""Returns copy of MultiIndex object.
Returns a copy of `MultiIndex`. The `levels` and `codes` value can be
Expand Down Expand Up @@ -457,7 +459,7 @@ def copy(
)

@_performance_tracking
def __repr__(self):
def __repr__(self) -> str:
max_seq_items = pd.get_option("display.max_seq_items") or len(self)

if len(self) > max_seq_items:
Expand Down Expand Up @@ -503,7 +505,7 @@ def __repr__(self):
@property # type: ignore
@_external_only_api("Use ._codes instead")
@_performance_tracking
def codes(self):
def codes(self) -> pd.core.indexes.frozen.FrozenList:
"""
Returns the codes of the underlying MultiIndex.
Expand Down Expand Up @@ -531,7 +533,7 @@ def get_slice_bound(self, label, side):

@property # type: ignore
@_performance_tracking
def nlevels(self):
def nlevels(self) -> int:
"""Integer number of levels in this MultiIndex."""
return self._num_columns

Expand Down Expand Up @@ -590,7 +592,7 @@ def _get_level_label(self, level):
return self.names[level]

@_performance_tracking
def isin(self, values, level=None):
def isin(self, values, level=None) -> cp.ndarray:
"""Return a boolean array where the index values are in values.
Compute boolean array of whether each index value is found in
Expand Down Expand Up @@ -864,7 +866,7 @@ def _validate_indexer(
| slice
| tuple[Any, ...]
| list[tuple[Any, ...]],
):
) -> None:
if isinstance(indexer, numbers.Number):
return
if isinstance(indexer, tuple):
Expand Down Expand Up @@ -900,12 +902,12 @@ def __eq__(self, other):

@property # type: ignore
@_performance_tracking
def size(self):
def size(self) -> int:
# The size of a MultiIndex is only dependent on the number of rows.
return self._num_rows

@_performance_tracking
def take(self, indices):
def take(self, indices) -> Self:
if isinstance(indices, cudf.Series) and indices.has_nulls:
raise ValueError("Column must have no nulls.")
obj = super().take(indices)
Expand Down Expand Up @@ -957,7 +959,12 @@ def __getitem__(self, index):
return result

@_performance_tracking
def to_frame(self, index=True, name=no_default, allow_duplicates=False):
def to_frame(
self,
index: bool = True,
name=no_default,
allow_duplicates: bool = False,
) -> cudf.DataFrame:
"""
Create a DataFrame with the levels of the MultiIndex as columns.
Expand Down Expand Up @@ -1034,7 +1041,7 @@ def to_frame(self, index=True, name=no_default, allow_duplicates=False):
)

@_performance_tracking
def get_level_values(self, level):
def get_level_values(self, level) -> cudf.Index:
"""
Return the values at the requested level
Expand Down Expand Up @@ -1067,30 +1074,30 @@ def get_level_values(self, level):
)
return level_values

def _is_numeric(self):
def _is_numeric(self) -> bool:
return False

def _is_boolean(self):
def _is_boolean(self) -> bool:
return False

def _is_integer(self):
def _is_integer(self) -> bool:
return False

def _is_floating(self):
def _is_floating(self) -> bool:
return False

def _is_object(self):
def _is_object(self) -> bool:
return False

def _is_categorical(self):
def _is_categorical(self) -> bool:
return False

def _is_interval(self):
def _is_interval(self) -> bool:
return False

@classmethod
@_performance_tracking
def _concat(cls, objs):
def _concat(cls, objs) -> Self:
source_data = [o.to_frame(index=False) for o in objs]

# TODO: Verify if this is really necessary or if we can rely on
Expand All @@ -1100,17 +1107,19 @@ def _concat(cls, objs):
for obj in source_data[1:]:
obj.columns = colnames

source_data = cudf.DataFrame._concat(source_data)
source_df = cudf.DataFrame._concat(source_data)
try:
# Only set names if all objs have the same names
(names,) = {o.names for o in objs} - {None}
except ValueError:
names = [None] * source_data._num_columns
return cudf.MultiIndex.from_frame(source_data, names=names)
names = [None] * source_df._num_columns
return cudf.MultiIndex.from_frame(source_df, names=names)

@classmethod
@_performance_tracking
def from_tuples(cls, tuples, sortorder: int | None = None, names=None):
def from_tuples(
cls, tuples, sortorder: int | None = None, names=None
) -> Self:
"""
Convert list of tuples to MultiIndex.
Expand Down Expand Up @@ -1153,7 +1162,7 @@ def from_tuples(cls, tuples, sortorder: int | None = None, names=None):
return cls.from_pandas(pdi)

@_performance_tracking
def to_numpy(self):
def to_numpy(self) -> np.ndarray:
return self.values_host

def to_flat_index(self):
Expand All @@ -1167,7 +1176,7 @@ def to_flat_index(self):

@property # type: ignore
@_performance_tracking
def values_host(self):
def values_host(self) -> np.ndarray:
"""
Return a numpy representation of the MultiIndex.
Expand Down Expand Up @@ -1195,7 +1204,7 @@ def values_host(self):

@property # type: ignore
@_performance_tracking
def values(self):
def values(self) -> cp.ndarray:
"""
Return a CuPy representation of the MultiIndex.
Expand Down Expand Up @@ -1236,7 +1245,7 @@ def from_frame(
df: pd.DataFrame | cudf.DataFrame,
sortorder: int | None = None,
names=None,
):
) -> Self:
"""
Make a MultiIndex from a DataFrame.
Expand Down Expand Up @@ -1303,7 +1312,9 @@ def from_frame(

@classmethod
@_performance_tracking
def from_product(cls, iterables, sortorder: int | None = None, names=None):
def from_product(
cls, iterables, sortorder: int | None = None, names=None
) -> Self:
"""
Make a MultiIndex from the cartesian product of multiple iterables.
Expand Down Expand Up @@ -1355,7 +1366,7 @@ def from_arrays(
arrays,
sortorder=None,
names=None,
) -> MultiIndex:
) -> Self:
"""
Convert arrays to MultiIndex.
Expand Down Expand Up @@ -1410,7 +1421,7 @@ def from_arrays(
)

@_performance_tracking
def _poplevels(self, level):
def _poplevels(self, level) -> None | MultiIndex | cudf.Index:
"""
Remove and return the specified levels from self.
Expand Down Expand Up @@ -1461,7 +1472,7 @@ def _poplevels(self, level):
return popped

@_performance_tracking
def swaplevel(self, i=-2, j=-1):
def swaplevel(self, i=-2, j=-1) -> Self:
"""
Swap level i with level j.
Calling this method does not change the ordering of the values.
Expand Down Expand Up @@ -1512,7 +1523,7 @@ def swaplevel(self, i=-2, j=-1):
return midx

@_performance_tracking
def droplevel(self, level=-1):
def droplevel(self, level=-1) -> MultiIndex | cudf.Index:
"""
Removes the specified levels from the MultiIndex.
Expand Down Expand Up @@ -1598,7 +1609,9 @@ def to_pandas(

@classmethod
@_performance_tracking
def from_pandas(cls, multiindex: pd.MultiIndex, nan_as_null=no_default):
def from_pandas(
cls, multiindex: pd.MultiIndex, nan_as_null=no_default
) -> Self:
"""
Convert from a Pandas MultiIndex
Expand Down Expand Up @@ -1633,11 +1646,11 @@ def from_pandas(cls, multiindex: pd.MultiIndex, nan_as_null=no_default):

@cached_property # type: ignore
@_performance_tracking
def is_unique(self):
def is_unique(self) -> bool:
return len(self) == len(self.unique())

@property
def dtype(self):
def dtype(self) -> np.dtype:
return np.dtype("O")

@_performance_tracking
Expand Down Expand Up @@ -1706,7 +1719,7 @@ def is_monotonic_decreasing(self) -> bool:
)

@_performance_tracking
def fillna(self, value):
def fillna(self, value) -> Self:
"""
Fill null values with the specified value.
Expand Down Expand Up @@ -1758,7 +1771,7 @@ def nunique(self, dropna: bool = True) -> int:
mi = self.dropna(how="all") if dropna else self
return len(mi.unique())

def _clean_nulls_from_index(self):
def _clean_nulls_from_index(self) -> Self:
"""
Convert all na values(if any) in MultiIndex object
to `<NA>` as a preprocessing step to `__repr__` methods.
Expand All @@ -1769,20 +1782,20 @@ def _clean_nulls_from_index(self):
)

@_performance_tracking
def memory_usage(self, deep=False):
def memory_usage(self, deep: bool = False) -> int:
usage = sum(col.memory_usage for col in self._columns)
usage += sum(level.memory_usage(deep=deep) for level in self._levels)
usage += sum(code.memory_usage for code in self._codes)
return usage

@_performance_tracking
def difference(self, other, sort=None):
def difference(self, other, sort=None) -> Self:
if hasattr(other, "to_pandas"):
other = other.to_pandas()
return cudf.from_pandas(self.to_pandas().difference(other, sort))

@_performance_tracking
def append(self, other):
def append(self, other) -> Self:
"""
Append a collection of MultiIndex objects together
Expand Down Expand Up @@ -2000,7 +2013,7 @@ def get_loc(self, key):
mask[true_inds] = True
return mask

def _get_reconciled_name_object(self, other) -> MultiIndex:
def _get_reconciled_name_object(self, other) -> Self:
"""
If the result of a set operation will be self,
return self, unless the names change, in which
Expand All @@ -2026,7 +2039,7 @@ def _maybe_match_names(self, other):
]

@_performance_tracking
def union(self, other, sort=None):
def union(self, other, sort=None) -> Self:
if not isinstance(other, MultiIndex):
msg = "other must be a MultiIndex or a list of tuples"
try:
Expand All @@ -2050,7 +2063,7 @@ def union(self, other, sort=None):
return self._union(other, sort=sort)

@_performance_tracking
def _union(self, other, sort=None):
def _union(self, other, sort=None) -> Self:
# TODO: When to_frame is refactored to return a
# deep copy in future, we should push most of the common
# logic between MultiIndex._union & BaseIndex._union into
Expand All @@ -2076,7 +2089,7 @@ def _union(self, other, sort=None):
return midx

@_performance_tracking
def _intersection(self, other, sort=None):
def _intersection(self, other, sort=None) -> Self:
if self.names != other.names:
deep = True
col_names = list(range(0, self.nlevels))
Expand Down Expand Up @@ -2167,7 +2180,7 @@ def _columns_for_reset_index(
else:
yield from self._split_columns_by_levels(levels, in_levels=True)

def repeat(self, repeats, axis=None):
def repeat(self, repeats, axis=None) -> Self:
return self._from_data(
self._data._from_columns_like_self(
super()._repeat([*self._columns], repeats, axis)
Expand Down

0 comments on commit a83ac6f

Please sign in to comment.