Skip to content

Commit

Permalink
Merge branch 'branch-24.10' into fix-dask-estimator-serialization-pri…
Browse files Browse the repository at this point in the history
…or-training
  • Loading branch information
dantegd authored Sep 26, 2024
2 parents eec940b + f818527 commit 809daea
Show file tree
Hide file tree
Showing 33 changed files with 309 additions and 132 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ repos:
files: python/.*
args: [--config, python/cuml/pyproject.toml]
- repo: https://github.com/PyCQA/flake8
rev: 5.0.4
rev: 7.1.1
hooks:
- id: flake8
args: [--config=python/cuml/.flake8]
Expand Down
2 changes: 1 addition & 1 deletion BUILD.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ To install cuML from source, ensure the following dependencies are met:
It is recommended to use conda for environment/package management. If doing so, development environment .yaml files are located in `conda/environments/all_*.yaml`. These files contains most of the dependencies mentioned above (notable exceptions are `gcc` and `zlib`). To create a development environment named `cuml_dev`, you can use the follow commands:

```bash
conda create -n cuml_dev python=3.11
conda create -n cuml_dev python=3.12
conda env update -n cuml_dev --file=conda/environments/all_cuda-118_arch-x86_64.yaml
conda activate cuml_dev
```
Expand Down
2 changes: 1 addition & 1 deletion ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}

# Need to distutils-normalize the original version
NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))")

echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"

Expand Down
10 changes: 3 additions & 7 deletions ci/test_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,9 @@ rapids-logger "pytest cuml single GPU"
-k 'test_sparse_pca_inputs' \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cuml-sparse-pca.xml"

# Temporarily disabled for CUDA 12.x wheels.
# Reference issue: https://github.com/rapidsai/cuml/issues/6050
if [[ "${RAPIDS_PY_CUDA_SUFFIX}" == "cu11" ]]; then
rapids-logger "pytest cuml-dask"
./ci/run_cuml_dask_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cuml-dask.xml"
fi
rapids-logger "pytest cuml-dask"
./ci/run_cuml_dask_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cuml-dask.xml"

rapids-logger "Test script exiting with value: $EXITCODE"
exit ${EXITCODE}
7 changes: 5 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ dependencies:
- dask-cudf==24.10.*,>=0.0.0a0
- dask-ml
- doxygen=1.9.1
- fmt>=11.0.2,<12
- gcc_linux-64=11.*
- graphviz
- hdbscan>=0.8.38,<0.8.39
Expand Down Expand Up @@ -58,7 +59,7 @@ dependencies:
- pytest-cov
- pytest-xdist
- pytest==7.*
- python>=3.10,<3.12
- python>=3.10,<3.13
- raft-dask==24.10.*,>=0.0.0a0
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- rapids-dask-dependency==24.10.*,>=0.0.0a0
Expand All @@ -68,13 +69,15 @@ dependencies:
- scikit-learn==1.5
- scipy>=1.8.0
- seaborn
- setuptools
- spdlog>=1.14.1,<1.15
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx<6
- statsmodels
- sysroot_linux-64==2.17
- treelite==4.3.0
- umap-learn==0.5.3
- umap-learn==0.5.6
- pip:
- dask-glm==0.3.0
name: all_cuda-118_arch-x86_64
7 changes: 5 additions & 2 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies:
- dask-cudf==24.10.*,>=0.0.0a0
- dask-ml
- doxygen=1.9.1
- fmt>=11.0.2,<12
- gcc_linux-64=11.*
- graphviz
- hdbscan>=0.8.38,<0.8.39
Expand Down Expand Up @@ -54,7 +55,7 @@ dependencies:
- pytest-cov
- pytest-xdist
- pytest==7.*
- python>=3.10,<3.12
- python>=3.10,<3.13
- raft-dask==24.10.*,>=0.0.0a0
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- rapids-dask-dependency==24.10.*,>=0.0.0a0
Expand All @@ -64,13 +65,15 @@ dependencies:
- scikit-learn==1.5
- scipy>=1.8.0
- seaborn
- setuptools
- spdlog>=1.14.1,<1.15
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx<6
- statsmodels
- sysroot_linux-64==2.17
- treelite==4.3.0
- umap-learn==0.5.3
- umap-learn==0.5.6
- pip:
- dask-glm==0.3.0
name: all_cuda-125_arch-x86_64
2 changes: 2 additions & 0 deletions conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dependencies:
- cuda-version=11.8
- cudatoolkit
- cxx-compiler
- fmt>=11.0.2,<12
- gcc_linux-64=11.*
- libcublas-dev=11.11.3.6
- libcublas=11.11.3.6
Expand All @@ -31,6 +32,7 @@ dependencies:
- librmm==24.10.*,>=0.0.0a0
- ninja
- nvcc_linux-64=11.8
- spdlog>=1.14.1,<1.15
- sysroot_linux-64==2.17
- tomli
name: clang_tidy_cuda-118_arch-x86_64
2 changes: 2 additions & 0 deletions conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dependencies:
- cuda-version=11.8
- cudatoolkit
- cxx-compiler
- fmt>=11.0.2,<12
- gcc_linux-64=11.*
- libcublas-dev=11.11.3.6
- libcublas=11.11.3.6
Expand All @@ -29,5 +30,6 @@ dependencies:
- librmm==24.10.*,>=0.0.0a0
- ninja
- nvcc_linux-64=11.8
- spdlog>=1.14.1,<1.15
- sysroot_linux-64==2.17
name: cpp_all_cuda-118_arch-x86_64
2 changes: 2 additions & 0 deletions conda/environments/cpp_all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dependencies:
- cuda-profiler-api
- cuda-version=12.5
- cxx-compiler
- fmt>=11.0.2,<12
- gcc_linux-64=11.*
- libcublas-dev
- libcufft-dev
Expand All @@ -25,5 +26,6 @@ dependencies:
- libraft==24.10.*,>=0.0.0a0
- librmm==24.10.*,>=0.0.0a0
- ninja
- spdlog>=1.14.1,<1.15
- sysroot_linux-64==2.17
name: cpp_all_cuda-125_arch-x86_64
2 changes: 1 addition & 1 deletion conda/recipes/cuml-cpu/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ requirements:
- pandas
- scikit-learn=1.2
- hdbscan>=0.8.38,<0.8.39
- umap-learn=0.5.3
- umap-learn=0.5.6
- nvtx

tests: # [linux64]
Expand Down
6 changes: 6 additions & 0 deletions conda/recipes/libcuml/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ c_stdlib_version:
cmake_version:
- ">=3.26.4,!=3.30.0"

fmt_version:
- ">=11.0.2,<12"

spdlog_version:
- ">=1.14.1,<1.15"

treelite_version:
- "=4.3.0"

Expand Down
2 changes: 2 additions & 0 deletions conda/recipes/libcuml/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,12 @@ requirements:
- libcusolver-dev
- libcusparse-dev
{% endif %}
- fmt {{ fmt_version }}
- libcumlprims ={{ minor_version }}
- libraft ={{ minor_version }}
- libraft-headers ={{ minor_version }}
- librmm ={{ minor_version }}
- spdlog {{ spdlog_version }}
- treelite {{ treelite_version }}

outputs:
Expand Down
21 changes: 21 additions & 0 deletions cpp/include/cuml/manifold/umap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,27 @@ void refine(const raft::handle_t& handle,
UMAPParams* params,
float* embeddings);

/**
* Initializes embeddings and performs a UMAP fit on them, which enables
* iterative fitting without callbacks.
*
* @param[in] handle: raft::handle_t
* @param[in] X: pointer to input array
* @param[in] n: n_samples of input array
* @param[in] d: n_features of input array
* @param[in] graph: pointer to raft::sparse::COO object computed using ML::UMAP::get_graph
* @param[in] params: pointer to ML::UMAPParams object
* @param[out] embeddings: pointer to current embedding with shape n * n_components, stores updated
* embeddings on executing refine
*/
void init_and_refine(const raft::handle_t& handle,
float* X,
int n,
int d,
raft::sparse::COO<float, int>* graph,
UMAPParams* params,
float* embeddings);

/**
* Dense fit
*
Expand Down
19 changes: 19 additions & 0 deletions cpp/src/umap/runner.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,31 @@ void _refine(const raft::handle_t& handle,
value_t* embeddings)
{
cudaStream_t stream = handle.get_stream();
ML::Logger::get().setLevel(params->verbosity);

/**
* Run simplicial set embedding to approximate low-dimensional representation
*/
SimplSetEmbed::run<TPB_X, value_t>(inputs.n, inputs.d, graph, params, embeddings, stream);
}

template <typename value_idx, typename value_t, typename umap_inputs, int TPB_X>
void _init_and_refine(const raft::handle_t& handle,
const umap_inputs& inputs,
UMAPParams* params,
raft::sparse::COO<value_t>* graph,
value_t* embeddings)
{
cudaStream_t stream = handle.get_stream();
ML::Logger::get().setLevel(params->verbosity);

// Initialize embeddings
InitEmbed::run(handle, inputs.n, inputs.d, graph, params, embeddings, stream, params->init);

// Run simplicial set embedding
SimplSetEmbed::run<TPB_X, value_t>(inputs.n, inputs.d, graph, params, embeddings, stream);
}

template <typename value_idx, typename value_t, typename umap_inputs, int TPB_X>
void _fit(const raft::handle_t& handle,
const umap_inputs& inputs,
Expand Down
14 changes: 14 additions & 0 deletions cpp/src/umap/umap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,20 @@ void refine(const raft::handle_t& handle,
handle, inputs, params, graph, embeddings);
}

void init_and_refine(const raft::handle_t& handle,
float* X,
int n,
int d,
raft::sparse::COO<float>* graph,
UMAPParams* params,
float* embeddings)
{
CUML_LOG_DEBUG("Calling UMAP::init_and_refine() with precomputed KNN");
manifold_dense_inputs_t<float> inputs(X, nullptr, n, d);
UMAPAlgo::_init_and_refine<knn_indices_dense_t, float, manifold_dense_inputs_t<float>, TPB_X>(
handle, inputs, params, graph, embeddings);
}

void fit(const raft::handle_t& handle,
float* X,
float* y,
Expand Down
11 changes: 9 additions & 2 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,12 @@ dependencies:
packages:
- c-compiler
- cxx-compiler
- fmt>=11.0.2,<12
- libcumlprims==24.10.*,>=0.0.0a0
- libraft==24.10.*,>=0.0.0a0
- libraft-headers==24.10.*,>=0.0.0a0
- librmm==24.10.*,>=0.0.0a0
- spdlog>=1.14.1,<1.15
specific:
- output_types: conda
matrices:
Expand Down Expand Up @@ -472,8 +474,12 @@ dependencies:
packages:
- python=3.11
- matrix:
py: "3.12"
packages:
- python>=3.10,<3.12
- python=3.12
- matrix:
packages:
- python>=3.10,<3.13
test_libcuml:
common:
- output_types: conda
Expand Down Expand Up @@ -509,8 +515,9 @@ dependencies:
- seaborn
- *scikit_learn
- statsmodels
- umap-learn==0.5.3
- umap-learn==0.5.6
- pynndescent
- setuptools # Needed on Python 3.12 for dask-glm, which requires pkg_resources but Python 3.12 doesn't have setuptools by default
- output_types: conda
packages:
- pip
Expand Down
5 changes: 5 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,11 @@ UMAP
.. autoclass:: cuml.UMAP
:members:

.. autofunction:: cuml.manifold.umap.fuzzy_simplicial_set

.. autofunction:: cuml.manifold.umap.simplicial_set_embedding


Random Projections
------------------

Expand Down
10 changes: 4 additions & 6 deletions python/cuml/cuml/common/kernel_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -101,11 +101,9 @@ def cuda_kernel_factory(nvrtc_kernel_str, dtypes, kernel_name=None):
"{%d}" % idx, dtype_strs[idx]
)

kernel_name = f"""{uuid1()
if kernel_name is None
else kernel_name}_{
"".join(dtype_strs).replace(" ", "_")
}"""
kernel_name_prefix = uuid1() if kernel_name is None else kernel_name
kernel_name_suffix = "".join(dtype_strs).replace(" ", "_")
kernel_name = f"{kernel_name_prefix}_{kernel_name_suffix}"

nvrtc_kernel_str = "%s\nvoid %s%s" % (
extern_prefix,
Expand Down
6 changes: 2 additions & 4 deletions python/cuml/cuml/dask/manifold/umap.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -83,9 +83,7 @@ class UMAP(BaseEstimator, DelayedTransformMixin):
In addition to these missing features, you should expect to see
the final embeddings differing between `cuml.umap` and the reference
UMAP. In particular, the reference UMAP uses an approximate kNN
algorithm for large data sizes while cuml.umap always uses exact
kNN.
UMAP.
**Known issue:** If a UMAP model has not yet been fit, it cannot be pickled
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/cuml/fil/fil.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ rmm = gpu_only_import('rmm')

from libcpp cimport bool
from libc.stdint cimport uintptr_t
from libc.stdlib cimport free
from libc.stdlib cimport free as c_free

import cuml.internals
from cuml.internals.array import CumlArray
Expand Down Expand Up @@ -545,7 +545,7 @@ cdef class ForestInference_impl():
treelite_params.threads_per_tree = kwargs['threads_per_tree']
if kwargs['compute_shape_str']:
if self.shape_str:
free(self.shape_str)
c_free(self.shape_str)
treelite_params.pforest_shape_str = &self.shape_str
else:
treelite_params.pforest_shape_str = NULL
Expand Down
Loading

0 comments on commit 809daea

Please sign in to comment.