Skip to content

Commit

Permalink
Merge pull request #6098 from rapidsai/branch-24.10
Browse files Browse the repository at this point in the history
Forward-merge branch-24.10 into branch-24.12
  • Loading branch information
GPUtester authored Oct 3, 2024
2 parents c78a748 + 65a02f6 commit 82483ac
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 58 deletions.
11 changes: 9 additions & 2 deletions python/cuml/cuml/cluster/kmeans.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ from cuml.internals.safe_imports import cpu_only_import
np = cpu_only_import('numpy')
from cuml.internals.safe_imports import gpu_only_import
rmm = gpu_only_import('rmm')
from cuml.internals.safe_imports import safe_import_from, return_false
import typing

IF GPUBUILD == 1:
Expand All @@ -46,7 +47,10 @@ from cuml.common import input_to_cuml_array
from cuml.internals.api_decorators import device_interop_preparation
from cuml.internals.api_decorators import enable_device_interop

from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
# from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
_openmp_effective_n_threads = safe_import_from(
"sklearn.utils._openmp_helpers", "_openmp_effective_n_threads", alt=return_false
)


class KMeans(UniversalBase,
Expand Down Expand Up @@ -235,7 +239,10 @@ class KMeans(UniversalBase,
self.cluster_centers_ = None

# For sklearn interoperability
self._n_threads = _openmp_effective_n_threads()
if _openmp_effective_n_threads():
self._n_threads = _openmp_effective_n_threads()
else:
self._n_threads = 1

# cuPy does not allow comparing with string. See issue #2372
init_str = init if isinstance(init, str) else None
Expand Down
15 changes: 8 additions & 7 deletions python/cuml/cuml/internals/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1251,13 +1251,14 @@ def array_to_memory_order(arr, default="C"):
return arr.order
except AttributeError:
pass
try:
array_interface = arr.__cuda_array_interface__
except AttributeError:
try:
array_interface = arr.__array_interface__
except AttributeError:
return array_to_memory_order(CumlArray.from_input(arr, order="K"))
array_interface = getattr(
arr,
"__cuda_array_interface__",
getattr(arr, "__array_interface__", False),
)
if not array_interface:
return array_to_memory_order(CumlArray.from_input(arr, order="K"))

strides = array_interface.get("strides", None)
if strides is None:
try:
Expand Down
106 changes: 57 additions & 49 deletions python/cuml/cuml/model_selection/_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,18 @@ def train_test_split(
string"
)

x_order = array_to_memory_order(X)
X_arr, X_row, *_ = input_to_cuml_array(X, order=x_order)
all_numeric = True
if isinstance(X, cudf.DataFrame):
all_numeric = all(
cudf.api.types.is_numeric_dtype(X[col]) for col in X.columns
)

if all_numeric:
x_order = array_to_memory_order(X)
X_arr, X_row, *_ = input_to_cuml_array(X, order=x_order)
else:
x_order = "F"
X_arr, X_row = X, X.shape[0]
if y is not None:
y_order = array_to_memory_order(y)
y_arr, y_row, *_ = input_to_cuml_array(y, order=y_order)
Expand Down Expand Up @@ -363,55 +373,53 @@ def train_test_split(
train_indices = range(0, train_size)
test_indices = range(-1 * test_size, 0)

# Gather from indices
X_train = X_arr[train_indices]
X_test = X_arr[test_indices]
if y is not None:
y_train = y_arr[train_indices]
y_test = y_arr[test_indices]

# Coerce output to original input type
if ty := determine_df_obj_type(X):
x_type = ty
else:
x_type = determine_array_type(X)

if ty := determine_df_obj_type(y):
y_type = ty
else:
y_type = determine_array_type(y)

if x_type in ("series", "dataframe"):
X_train = output_to_df_obj_like(X_train, X, x_type)
X_test = output_to_df_obj_like(X_test, X, x_type)

if determine_array_type(X.index) == "pandas":
if isinstance(train_indices, cp.ndarray):
train_indices = train_indices.get()
if isinstance(test_indices, cp.ndarray):
test_indices = test_indices.get()
if all_numeric:
# Gather from indices
X_train = X_arr[train_indices]
X_test = X_arr[test_indices]
if y is not None:
y_train = y_arr[train_indices]
y_test = y_arr[test_indices]

# Coerce output to original input type
x_type = determine_df_obj_type(X) or determine_array_type(X)
if y is not None:
y_type = determine_df_obj_type(y) or determine_array_type(y)

def _process_df_objs(
df, df_type, df_train, df_test, train_indices, test_indices
):
if df_type in {"series", "dataframe"}:
df_train = output_to_df_obj_like(df_train, df, df_type)
df_test = output_to_df_obj_like(df_test, df, df_type)

if determine_array_type(df.index) == "pandas":
if isinstance(train_indices, cp.ndarray):
train_indices = train_indices.get()
if isinstance(test_indices, cp.ndarray):
test_indices = test_indices.get()

df_train.index = df.index[train_indices]
df_test.index = df.index[test_indices]
else:
df_train = df_train.to_output(df_type)
df_test = df_test.to_output(df_type)
return df_train, df_test

X_train, X_test = _process_df_objs(
X, x_type, X_train, X_test, train_indices, test_indices
)
if y is not None:
y_train, y_test = _process_df_objs(
y, y_type, y_train, y_test, train_indices, test_indices
)

X_train.index = X.index[train_indices]
X_test.index = X.index[test_indices]
else:
X_train = X_train.to_output(x_type)
X_test = X_test.to_output(x_type)

if y_type in ("series", "dataframe"):
y_train = output_to_df_obj_like(y_train, y, y_type)
y_test = output_to_df_obj_like(y_test, y, y_type)

if determine_array_type(y.index) == "pandas":
if isinstance(train_indices, cp.ndarray):
train_indices = train_indices.get()
if isinstance(test_indices, cp.ndarray):
test_indices = test_indices.get()

y_train.index = y.index[train_indices]
y_test.index = y.index[test_indices]
elif y_type is not None:
y_train = y_train.to_output(y_type)
y_test = y_test.to_output(y_type)
X_train = X_arr.iloc[train_indices]
X_test = X_arr.iloc[test_indices]
if y is not None:
y_train = y_arr[train_indices]
y_test = y_arr[test_indices]

if y is not None:
return X_train, X_test, y_train, y_test
Expand Down

0 comments on commit 82483ac

Please sign in to comment.