Merge branch 'branch-24.10' into fix-dask-estimator-serialization-pri…

…or-training
rapidsai · Sep 26, 2024 · 809daea · 809daea
2 parents eec940b + f818527
commit 809daea
Show file tree

Hide file tree

Showing 33 changed files with 309 additions and 132 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@ repos:
             files: python/.*
             args: [--config, python/cuml/pyproject.toml]
     - repo: https://github.com/PyCQA/flake8
-      rev: 5.0.4
+      rev: 7.1.1
       hooks:
           - id: flake8
             args: [--config=python/cuml/.flake8]

diff --git a/BUILD.md b/BUILD.md
@@ -18,7 +18,7 @@ To install cuML from source, ensure the following dependencies are met:
 It is recommended to use conda for environment/package management. If doing so, development environment .yaml files are located in `conda/environments/all_*.yaml`. These files contains most of the dependencies mentioned above (notable exceptions are `gcc` and `zlib`). To create a development environment named `cuml_dev`, you can use the follow commands:
 
 ```bash
-conda create -n cuml_dev python=3.11
+conda create -n cuml_dev python=3.12
 conda env update -n cuml_dev --file=conda/environments/all_cuda-118_arch-x86_64.yaml
 conda activate cuml_dev
 ```

diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
@@ -24,7 +24,7 @@ NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
 NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
 
 # Need to distutils-normalize the original version
-NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
+NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))")
 
 echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"
 

diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
@@ -35,13 +35,9 @@ rapids-logger "pytest cuml single GPU"
   -k 'test_sparse_pca_inputs' \
   --junitxml="${RAPIDS_TESTS_DIR}/junit-cuml-sparse-pca.xml"
 
-# Temporarily disabled for CUDA 12.x wheels. 
-# Reference issue: https://github.com/rapidsai/cuml/issues/6050
-if [[ "${RAPIDS_PY_CUDA_SUFFIX}" == "cu11" ]]; then
-  rapids-logger "pytest cuml-dask"
-  ./ci/run_cuml_dask_pytests.sh \
-    --junitxml="${RAPIDS_TESTS_DIR}/junit-cuml-dask.xml"
-fi
+rapids-logger "pytest cuml-dask"
+./ci/run_cuml_dask_pytests.sh \
+  --junitxml="${RAPIDS_TESTS_DIR}/junit-cuml-dask.xml"
 
 rapids-logger "Test script exiting with value: $EXITCODE"
 exit ${EXITCODE}
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -20,6 +20,7 @@ dependencies:
 - dask-cudf==24.10.*,>=0.0.0a0
 - dask-ml
 - doxygen=1.9.1
+- fmt>=11.0.2,<12
 - gcc_linux-64=11.*
 - graphviz
 - hdbscan>=0.8.38,<0.8.39
@@ -58,7 +59,7 @@ dependencies:
 - pytest-cov
 - pytest-xdist
 - pytest==7.*
-- python>=3.10,<3.12
+- python>=3.10,<3.13
 - raft-dask==24.10.*,>=0.0.0a0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
 - rapids-dask-dependency==24.10.*,>=0.0.0a0
@@ -68,13 +69,15 @@ dependencies:
 - scikit-learn==1.5
 - scipy>=1.8.0
 - seaborn
+- setuptools
+- spdlog>=1.14.1,<1.15
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sphinx<6
 - statsmodels
 - sysroot_linux-64==2.17
 - treelite==4.3.0
-- umap-learn==0.5.3
+- umap-learn==0.5.6
 - pip:
   - dask-glm==0.3.0
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -22,6 +22,7 @@ dependencies:
 - dask-cudf==24.10.*,>=0.0.0a0
 - dask-ml
 - doxygen=1.9.1
+- fmt>=11.0.2,<12
 - gcc_linux-64=11.*
 - graphviz
 - hdbscan>=0.8.38,<0.8.39
@@ -54,7 +55,7 @@ dependencies:
 - pytest-cov
 - pytest-xdist
 - pytest==7.*
-- python>=3.10,<3.12
+- python>=3.10,<3.13
 - raft-dask==24.10.*,>=0.0.0a0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
 - rapids-dask-dependency==24.10.*,>=0.0.0a0
@@ -64,13 +65,15 @@ dependencies:
 - scikit-learn==1.5
 - scipy>=1.8.0
 - seaborn
+- setuptools
+- spdlog>=1.14.1,<1.15
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sphinx<6
 - statsmodels
 - sysroot_linux-64==2.17
 - treelite==4.3.0
-- umap-learn==0.5.3
+- umap-learn==0.5.6
 - pip:
   - dask-glm==0.3.0
 name: all_cuda-125_arch-x86_64
diff --git a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
@@ -14,6 +14,7 @@ dependencies:
 - cuda-version=11.8
 - cudatoolkit
 - cxx-compiler
+- fmt>=11.0.2,<12
 - gcc_linux-64=11.*
 - libcublas-dev=11.11.3.6
 - libcublas=11.11.3.6
@@ -31,6 +32,7 @@ dependencies:
 - librmm==24.10.*,>=0.0.0a0
 - ninja
 - nvcc_linux-64=11.8
+- spdlog>=1.14.1,<1.15
 - sysroot_linux-64==2.17
 - tomli
 name: clang_tidy_cuda-118_arch-x86_64
diff --git a/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
@@ -12,6 +12,7 @@ dependencies:
 - cuda-version=11.8
 - cudatoolkit
 - cxx-compiler
+- fmt>=11.0.2,<12
 - gcc_linux-64=11.*
 - libcublas-dev=11.11.3.6
 - libcublas=11.11.3.6
@@ -29,5 +30,6 @@ dependencies:
 - librmm==24.10.*,>=0.0.0a0
 - ninja
 - nvcc_linux-64=11.8
+- spdlog>=1.14.1,<1.15
 - sysroot_linux-64==2.17
 name: cpp_all_cuda-118_arch-x86_64
diff --git a/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-125_arch-x86_64.yaml
@@ -14,6 +14,7 @@ dependencies:
 - cuda-profiler-api
 - cuda-version=12.5
 - cxx-compiler
+- fmt>=11.0.2,<12
 - gcc_linux-64=11.*
 - libcublas-dev
 - libcufft-dev
@@ -25,5 +26,6 @@ dependencies:
 - libraft==24.10.*,>=0.0.0a0
 - librmm==24.10.*,>=0.0.0a0
 - ninja
+- spdlog>=1.14.1,<1.15
 - sysroot_linux-64==2.17
 name: cpp_all_cuda-125_arch-x86_64
diff --git a/conda/recipes/cuml-cpu/meta.yaml b/conda/recipes/cuml-cpu/meta.yaml
@@ -35,7 +35,7 @@ requirements:
     - pandas
     - scikit-learn=1.2
     - hdbscan>=0.8.38,<0.8.39
-    - umap-learn=0.5.3
+    - umap-learn=0.5.6
     - nvtx
 
 tests:                                 # [linux64]

diff --git a/conda/recipes/libcuml/conda_build_config.yaml b/conda/recipes/libcuml/conda_build_config.yaml
@@ -19,6 +19,12 @@ c_stdlib_version:
 cmake_version:
   - ">=3.26.4,!=3.30.0"
 
+fmt_version:
+  - ">=11.0.2,<12"
+
+spdlog_version:
+  - ">=1.14.1,<1.15"
+
 treelite_version:
   - "=4.3.0"
 

diff --git a/conda/recipes/libcuml/meta.yaml b/conda/recipes/libcuml/meta.yaml
@@ -68,10 +68,12 @@ requirements:
     - libcusolver-dev
     - libcusparse-dev
     {% endif %}
+    - fmt {{ fmt_version }}
     - libcumlprims ={{ minor_version }}
     - libraft ={{ minor_version }}
     - libraft-headers ={{ minor_version }}
     - librmm ={{ minor_version }}
+    - spdlog {{ spdlog_version }}
     - treelite {{ treelite_version }}
 
 outputs:

diff --git a/cpp/include/cuml/manifold/umap.hpp b/cpp/include/cuml/manifold/umap.hpp
@@ -84,6 +84,27 @@ void refine(const raft::handle_t& handle,
             UMAPParams* params,
             float* embeddings);
 
+/**
+ * Initializes embeddings and performs a UMAP fit on them, which enables
+ * iterative fitting without callbacks.
+ *
+ * @param[in] handle: raft::handle_t
+ * @param[in] X: pointer to input array
+ * @param[in] n: n_samples of input array
+ * @param[in] d: n_features of input array
+ * @param[in] graph: pointer to raft::sparse::COO object computed using ML::UMAP::get_graph
+ * @param[in] params: pointer to ML::UMAPParams object
+ * @param[out] embeddings: pointer to current embedding with shape n * n_components, stores updated
+ * embeddings on executing refine
+ */
+void init_and_refine(const raft::handle_t& handle,
+                     float* X,
+                     int n,
+                     int d,
+                     raft::sparse::COO<float, int>* graph,
+                     UMAPParams* params,
+                     float* embeddings);
+
 /**
  * Dense fit
  *

diff --git a/cpp/src/umap/runner.cuh b/cpp/src/umap/runner.cuh
@@ -247,12 +247,31 @@ void _refine(const raft::handle_t& handle,
              value_t* embeddings)
 {
   cudaStream_t stream = handle.get_stream();
+  ML::Logger::get().setLevel(params->verbosity);
+
   /**
    * Run simplicial set embedding to approximate low-dimensional representation
    */
   SimplSetEmbed::run<TPB_X, value_t>(inputs.n, inputs.d, graph, params, embeddings, stream);
 }
 
+template <typename value_idx, typename value_t, typename umap_inputs, int TPB_X>
+void _init_and_refine(const raft::handle_t& handle,
+                      const umap_inputs& inputs,
+                      UMAPParams* params,
+                      raft::sparse::COO<value_t>* graph,
+                      value_t* embeddings)
+{
+  cudaStream_t stream = handle.get_stream();
+  ML::Logger::get().setLevel(params->verbosity);
+
+  // Initialize embeddings
+  InitEmbed::run(handle, inputs.n, inputs.d, graph, params, embeddings, stream, params->init);
+
+  // Run simplicial set embedding
+  SimplSetEmbed::run<TPB_X, value_t>(inputs.n, inputs.d, graph, params, embeddings, stream);
+}
+
 template <typename value_idx, typename value_t, typename umap_inputs, int TPB_X>
 void _fit(const raft::handle_t& handle,
           const umap_inputs& inputs,

diff --git a/cpp/src/umap/umap.cu b/cpp/src/umap/umap.cu
@@ -92,6 +92,20 @@ void refine(const raft::handle_t& handle,
     handle, inputs, params, graph, embeddings);
 }
 
+void init_and_refine(const raft::handle_t& handle,
+                     float* X,
+                     int n,
+                     int d,
+                     raft::sparse::COO<float>* graph,
+                     UMAPParams* params,
+                     float* embeddings)
+{
+  CUML_LOG_DEBUG("Calling UMAP::init_and_refine() with precomputed KNN");
+  manifold_dense_inputs_t<float> inputs(X, nullptr, n, d);
+  UMAPAlgo::_init_and_refine<knn_indices_dense_t, float, manifold_dense_inputs_t<float>, TPB_X>(
+    handle, inputs, params, graph, embeddings);
+}
+
 void fit(const raft::handle_t& handle,
          float* X,
          float* y,

diff --git a/dependencies.yaml b/dependencies.yaml
@@ -140,10 +140,12 @@ dependencies:
         packages:
           - c-compiler
           - cxx-compiler
+          - fmt>=11.0.2,<12
           - libcumlprims==24.10.*,>=0.0.0a0
           - libraft==24.10.*,>=0.0.0a0
           - libraft-headers==24.10.*,>=0.0.0a0
           - librmm==24.10.*,>=0.0.0a0
+          - spdlog>=1.14.1,<1.15
     specific:
       - output_types: conda
         matrices:
@@ -472,8 +474,12 @@ dependencies:
             packages:
               - python=3.11
           - matrix:
+              py: "3.12"
             packages:
-              - python>=3.10,<3.12
+              - python=3.12
+          - matrix:
+            packages:
+              - python>=3.10,<3.13
   test_libcuml:
     common:
       - output_types: conda
@@ -509,8 +515,9 @@ dependencies:
           - seaborn
           - *scikit_learn
           - statsmodels
-          - umap-learn==0.5.3
+          - umap-learn==0.5.6
           - pynndescent
+          - setuptools  # Needed on Python 3.12 for dask-glm, which requires pkg_resources but Python 3.12 doesn't have setuptools by default
       - output_types: conda
         packages:
           - pip

diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -506,6 +506,11 @@ UMAP
 .. autoclass:: cuml.UMAP
     :members:
 
+.. autofunction:: cuml.manifold.umap.fuzzy_simplicial_set
+
+.. autofunction:: cuml.manifold.umap.simplicial_set_embedding
+
+
 Random Projections
 ------------------
 

diff --git a/python/cuml/cuml/common/kernel_utils.py b/python/cuml/cuml/common/kernel_utils.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -101,11 +101,9 @@ def cuda_kernel_factory(nvrtc_kernel_str, dtypes, kernel_name=None):
             "{%d}" % idx, dtype_strs[idx]
         )
 
-    kernel_name = f"""{uuid1()
-                      if kernel_name is None
-                      else kernel_name}_{
-                        "".join(dtype_strs).replace(" ", "_")
-                    }"""
+    kernel_name_prefix = uuid1() if kernel_name is None else kernel_name
+    kernel_name_suffix = "".join(dtype_strs).replace(" ", "_")
+    kernel_name = f"{kernel_name_prefix}_{kernel_name_suffix}"
 
     nvrtc_kernel_str = "%s\nvoid %s%s" % (
         extern_prefix,

diff --git a/python/cuml/cuml/dask/manifold/umap.py b/python/cuml/cuml/dask/manifold/umap.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -83,9 +83,7 @@ class UMAP(BaseEstimator, DelayedTransformMixin):
 
     In addition to these missing features, you should expect to see
     the final embeddings differing between `cuml.umap` and the reference
-    UMAP. In particular, the reference UMAP uses an approximate kNN
-    algorithm for large data sizes while cuml.umap always uses exact
-    kNN.
+    UMAP.
 
     **Known issue:** If a UMAP model has not yet been fit, it cannot be pickled
 

diff --git a/python/cuml/cuml/fil/fil.pyx b/python/cuml/cuml/fil/fil.pyx
@@ -26,7 +26,7 @@ rmm = gpu_only_import('rmm')
 
 from libcpp cimport bool
 from libc.stdint cimport uintptr_t
-from libc.stdlib cimport free
+from libc.stdlib cimport free as c_free
 
 import cuml.internals
 from cuml.internals.array import CumlArray
@@ -545,7 +545,7 @@ cdef class ForestInference_impl():
         treelite_params.threads_per_tree = kwargs['threads_per_tree']
         if kwargs['compute_shape_str']:
             if self.shape_str:
-                free(self.shape_str)
+                c_free(self.shape_str)
             treelite_params.pforest_shape_str = &self.shape_str
         else:
             treelite_params.pforest_shape_str = NULL