Skip to content

Commit

Permalink
passing tests
Browse files Browse the repository at this point in the history
  • Loading branch information
divyegala committed Oct 11, 2024
1 parent 592168e commit e51419d
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 15 deletions.
4 changes: 3 additions & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ dependencies:
- fmt>=11.0.2,<12
- gcc_linux-64=11.*
- graphviz
- hdbscan>=0.8.38,<0.8.39
- hypothesis>=6.0,<7
- ipykernel
- ipython
Expand All @@ -51,6 +50,7 @@ dependencies:
- numpydoc
- nvcc_linux-64=11.8
- packaging
- pip
- pydata-sphinx-theme!=0.14.2
- pylibraft==24.12.*,>=0.0.0a0
- pynndescent
Expand Down Expand Up @@ -78,4 +78,6 @@ dependencies:
- sysroot_linux-64==2.17
- treelite==4.3.0
- umap-learn==0.5.6
- pip:
- hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master
name: all_cuda-118_arch-x86_64
4 changes: 3 additions & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ dependencies:
- fmt>=11.0.2,<12
- gcc_linux-64=11.*
- graphviz
- hdbscan>=0.8.38,<0.8.39
- hypothesis>=6.0,<7
- ipykernel
- ipython
Expand All @@ -47,6 +46,7 @@ dependencies:
- numpy>=1.23,<3.0a0
- numpydoc
- packaging
- pip
- pydata-sphinx-theme!=0.14.2
- pylibraft==24.12.*,>=0.0.0a0
- pynndescent
Expand Down Expand Up @@ -74,4 +74,6 @@ dependencies:
- sysroot_linux-64==2.17
- treelite==4.3.0
- umap-learn==0.5.6
- pip:
- hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master
name: all_cuda-125_arch-x86_64
10 changes: 9 additions & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,6 @@ dependencies:
packages:
- *cython
- dask-ml
- hdbscan>=0.8.38,<0.8.39
- hypothesis>=6.0,<7
- nltk
- numpydoc
Expand All @@ -527,6 +526,15 @@ dependencies:
- umap-learn==0.5.6
- pynndescent
- setuptools # Needed on Python 3.12 for dask-glm, which requires pkg_resources but Python 3.12 doesn't have setuptools by default
- output_types: conda
packages:
- pip
- pip:
- hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master
- output_types: pyproject
packages:
- hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master

test_notebooks:
common:
- output_types: [conda, requirements]
Expand Down
22 changes: 14 additions & 8 deletions python/cuml/cuml/cluster/hdbscan/prediction.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -166,19 +166,25 @@ def all_points_membership_vectors(clusterer, batch_size=4096):

# trained on gpu
if not hasattr(clusterer, "_cpu_model"):
# the reference HDBSCAN implementations uses @property
# for attributes without setters available for them,
# so they can't be transferred from the GPU model
# to the CPU model
raise ValueError("Inferring on CPU is not supported yet when the "
"model has been trained on GPU")
clusterer.import_cpu_model()
clusterer.build_cpu_model()
clusterer.gpu_to_cpu()
# These attributes have to be reassigned to the CPU model
# as the raw arrays because the reference HDBSCAN implementation
# reconstructs the objects from the raw arrays
clusterer._cpu_model.condensed_tree_ = \
clusterer.condensed_tree_._raw_tree
clusterer._cpu_model.single_linkage_tree_ = \
clusterer.single_linkage_tree_._linkage
clusterer._cpu_model.minimum_spanning_tree_ = \
clusterer.minimum_spanning_tree_._mst

# this took a long debugging session to figure out, but
# this method on cpu does not work without this copy for some reason
clusterer._cpu_model.prediction_data_.raw_data = \
clusterer._cpu_model.prediction_data_.raw_data.copy()
return cpu_all_points_membership_vectors(clusterer._cpu_model)

# gpu infer, cpu/gpu train
elif device_type == DeviceType.device:
# trained on cpu
if hasattr(clusterer, "_cpu_model"):
Expand Down
5 changes: 2 additions & 3 deletions python/cuml/cuml/tests/test_device_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,9 +932,6 @@ def test_nn_methods(train_device, infer_device):
@pytest.mark.parametrize("infer_device", ["cpu", "gpu"])
def test_hdbscan_methods(train_device, infer_device):

if train_device == "gpu" and infer_device == "cpu":
pytest.skip("Can't transfer attributes to cpu for now")

ref_model = refHDBSCAN(
prediction_data=True,
approx_min_span_tree=False,
Expand All @@ -951,11 +948,13 @@ def test_hdbscan_methods(train_device, infer_device):
ref_membership = cpu_all_points_membership_vectors(ref_model)
ref_labels, ref_probs = cpu_approximate_predict(ref_model, X_test_blob)

gen_min_span_tree = train_device == "gpu" and infer_device == "cpu"
model = HDBSCAN(
prediction_data=True,
approx_min_span_tree=False,
max_cluster_size=0,
min_cluster_size=30,
gen_min_span_tree=gen_min_span_tree,
)
with using_device_type(train_device):
trained_labels = model.fit_predict(X_train_blob)
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ classifiers = [
test = [
"cython>=3.0.0",
"dask-ml",
"hdbscan>=0.8.38,<0.8.39",
"hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master",
"hypothesis>=6.0,<7",
"nltk",
"numpydoc",
Expand Down

0 comments on commit e51419d

Please sign in to comment.