Skip to content

Commit

Permalink
Multiple small fixes and improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
dantegd authored Feb 23, 2024
1 parent e2cca0b commit 667cef5
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 127 deletions.
2 changes: 1 addition & 1 deletion python/cuvs/cuvs/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

# Set the list of Cython files to build
set(cython_sources cydlpack.pyx)
set(linked_libraries cuvs::cuvs)
set(linked_libraries cuvs::cuvs cuvs_c)

# Build all of the Cython targets
rapids_cython_create_modules(
Expand Down
11 changes: 3 additions & 8 deletions python/cuvs/cuvs/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,9 @@
# limitations under the License.


from .cagra import Index, IndexParams, SearchParams, build, load, save, search
from .temp_raft import auto_sync_resources


__all__ = [
"Index",
"IndexParams",
"SearchParams",
"build",
"load",
"save",
"search",
"auto_sync_resources"
]
17 changes: 16 additions & 1 deletion python/cuvs/cuvs/common/temp_raft.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,21 @@

# This file has code that will be upstreamed to RAFT

import functools

from pylibraft.common import DeviceResources


_resources_param_string = """
handle : Optional RAFT resource handle for reusing CUDA resources.
If a handle isn't supplied, CUDA resources will be
allocated inside this function and synchronized before the
function exits. If a handle is supplied, you will need to
explicitly synchronize yourself by calling `handle.sync()`
before accessing the output.
""".strip()


def auto_sync_resources(f):
"""
This is identical to auto_sync_handle except for the proposed name change.
Expand All @@ -36,6 +51,6 @@ def wrapper(*args, resources=None, **kwargs):
return ret_value

wrapper.__doc__ = wrapper.__doc__.format(
handle_docstring=_HANDLE_PARAM_DOCSTRING
resources_docstring=_resources_param_string
)
return wrapper
2 changes: 1 addition & 1 deletion python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

# Set the list of Cython files to build
set(cython_sources cagra.pyx)
set(linked_libraries cuvs::cuvs)
set(linked_libraries cuvs::cuvs cuvs_c)

# Build all of the Cython targets
rapids_cython_create_modules(
Expand Down
5 changes: 2 additions & 3 deletions python/cuvs/cuvs/neighbors/cagra/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,11 @@
# limitations under the License.


from .cagra import Index, IndexParams, SearchParams, build, load, save, search
from .cagra import Index, IndexParams, SearchParams, build_index

__all__ = [
"Index",
"IndexParams",
"SearchParams",
"build",
"search",
"build_index",
]
4 changes: 2 additions & 2 deletions python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@ cdef extern from "cuvs/neighbors/cagra_c.h" nogil:
cuvsError_t cagraIndexDestroy(cagraIndex_t index)

cuvsError_t cagraBuild(cuvsResources_t res,
cagraIndexParams params,
cagraIndexParams* params,
DLManagedTensor* dataset,
cagraIndex_t index);

cuvsError_t cagraSearch(cuvsResources_t res,
cagraSearchParams params,
cagraSearchParams* params,
cagraIndex_t index,
DLManagedTensor* queries,
DLManagedTensor* neighbors,
Expand Down
222 changes: 111 additions & 111 deletions python/cuvs/cuvs/neighbors/cagra/cagra.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def build_index(IndexParams index_params, dataset, resources=None):
index_params : IndexParams object
dataset : CUDA array interface compliant matrix shape (n_samples, dim)
Supported dtype [float, int8, uint8]
{handle_docstring}
{resources_docstring}
Returns
-------
Expand Down Expand Up @@ -187,11 +187,12 @@ def build_index(IndexParams index_params, dataset, resources=None):
cdef Index idx = Index()
cdef cuvsError_t build_status
cdef cydlpack.DLManagedTensor dataset_dlpack = cydlpack.dlpack_c(dataset_ai)
cdef cagra_c.cagraIndexParams* params = &index_params.params

with cuda_interruptible():
build_status = cagra_c.cagraBuild(
deref(resources_),
index_params.params,
params,
&dataset_dlpack,
idx.index
)
Expand Down Expand Up @@ -363,112 +364,111 @@ cdef class SearchParams:
def rand_xor_mask(self):
return self.params.rand_xor_mask


@auto_sync_resources
@auto_convert_output
def search(SearchParams search_params,
Index index,
queries,
k,
neighbors=None,
distances=None,
resources=None):
"""
Find the k nearest neighbors for each query.
Parameters
----------
search_params : SearchParams
index : Index
Trained CAGRA index.
queries : CUDA array interface compliant matrix shape (n_samples, dim)
Supported dtype [float, int8, uint8]
k : int
The number of neighbors.
neighbors : Optional CUDA array interface compliant matrix shape
(n_queries, k), dtype int64_t. If supplied, neighbor
indices will be written here in-place. (default None)
distances : Optional CUDA array interface compliant matrix shape
(n_queries, k) If supplied, the distances to the
neighbors will be written here in-place. (default None)
{handle_docstring}
Examples
--------
>>> import cupy as cp
>>> from pylibraft.common import DeviceResources
>>> from pylibraft.neighbors import cagra
>>> n_samples = 50000
>>> n_features = 50
>>> n_queries = 1000
>>> dataset = cp.random.random_sample((n_samples, n_features),
... dtype=cp.float32)
>>> # Build index
>>> handle = DeviceResources()
>>> index = cagra.build(cagra.IndexParams(), dataset, handle=handle)
>>> # Search using the built index
>>> queries = cp.random.random_sample((n_queries, n_features),
... dtype=cp.float32)
>>> k = 10
>>> search_params = cagra.SearchParams(
... max_queries=100,
... itopk_size=64
... )
>>> # Using a pooling allocator reduces overhead of temporary array
>>> # creation during search. This is useful if multiple searches
>>> # are performad with same query size.
>>> distances, neighbors = cagra.search(search_params, index, queries,
... k, handle=handle)
>>> # pylibraft functions are often asynchronous so the
>>> # handle needs to be explicitly synchronized
>>> handle.sync()
>>> neighbors = cp.asarray(neighbors)
>>> distances = cp.asarray(distances)
"""
if not index.trained:
raise ValueError("Index need to be built before calling search.")

if resources is None:
resources = DeviceResources()
cdef device_resources* resources_ = \
<device_resources*><size_t>resources.getHandle()

# todo(dgd): we can make the check of dtype a parameter of wrap_array
# in RAFT to make this a single call
queries_cai = cai_wrapper(queries)
_check_input_array(queries_cai, [np.dtype('float32'), np.dtype('byte'),
np.dtype('ubyte')],
exp_cols=index.dim)

cdef uint32_t n_queries = queries_cai.shape[0]

if neighbors is None:
neighbors = device_ndarray.empty((n_queries, k), dtype='uint32')

neighbors_cai = cai_wrapper(neighbors)
_check_input_array(neighbors_cai, [np.dtype('uint32')],
exp_rows=n_queries, exp_cols=k)

if distances is None:
distances = device_ndarray.empty((n_queries, k), dtype='float32')

distances_cai = cai_wrapper(distances)
_check_input_array(distances_cai, [np.dtype('float32')],
exp_rows=n_queries, exp_cols=k)

cdef cagra_c.cagraSearchParams params = search_params.params
cdef cydlpack.DLManagedTensor queries_dlpack = cydlpack.dlpack_c(queries_cai)
cdef cydlpack.DLManagedTensor neighbors_dlpack = cydlpack.dlpack_c(neighbors_cai)
cdef cydlpack.DLManagedTensor distances_dlpack = cydlpack.dlpack_c(distances_cai)

with cuda_interruptible():
cagra_c.cagraSearch(
<cuvsResources_t> resources_,
params,
index.index,
&queries_dlpack,
&neighbors_dlpack,
&distances_dlpack
)

return (distances, neighbors)
# @auto_sync_resources
# @auto_convert_output
# def search(SearchParams search_params,
# Index index,
# queries,
# k,
# neighbors=None,
# distances=None,
# resources=None):
# """
# Find the k nearest neighbors for each query.

# Parameters
# ----------
# search_params : SearchParams
# index : Index
# Trained CAGRA index.
# queries : CUDA array interface compliant matrix shape (n_samples, dim)
# Supported dtype [float, int8, uint8]
# k : int
# The number of neighbors.
# neighbors : Optional CUDA array interface compliant matrix shape
# (n_queries, k), dtype int64_t. If supplied, neighbor
# indices will be written here in-place. (default None)
# distances : Optional CUDA array interface compliant matrix shape
# (n_queries, k) If supplied, the distances to the
# neighbors will be written here in-place. (default None)
# {resources_docstring}

# Examples
# --------
# >>> import cupy as cp
# >>> from pylibraft.common import DeviceResources
# >>> from pylibraft.neighbors import cagra
# >>> n_samples = 50000
# >>> n_features = 50
# >>> n_queries = 1000
# >>> dataset = cp.random.random_sample((n_samples, n_features),
# ... dtype=cp.float32)
# >>> # Build index
# >>> handle = DeviceResources()
# >>> index = cagra.build(cagra.IndexParams(), dataset, handle=handle)
# >>> # Search using the built index
# >>> queries = cp.random.random_sample((n_queries, n_features),
# ... dtype=cp.float32)
# >>> k = 10
# >>> search_params = cagra.SearchParams(
# ... max_queries=100,
# ... itopk_size=64
# ... )
# >>> # Using a pooling allocator reduces overhead of temporary array
# >>> # creation during search. This is useful if multiple searches
# >>> # are performad with same query size.
# >>> distances, neighbors = cagra.search(search_params, index, queries,
# ... k, handle=handle)
# >>> # pylibraft functions are often asynchronous so the
# >>> # handle needs to be explicitly synchronized
# >>> handle.sync()
# >>> neighbors = cp.asarray(neighbors)
# >>> distances = cp.asarray(distances)
# """
# if not index.trained:
# raise ValueError("Index need to be built before calling search.")

# if resources is None:
# resources = DeviceResources()
# cdef device_resources* resources_ = \
# <device_resources*><size_t>resources.getHandle()

# # todo(dgd): we can make the check of dtype a parameter of wrap_array
# # in RAFT to make this a single call
# queries_cai = cai_wrapper(queries)
# _check_input_array(queries_cai, [np.dtype('float32'), np.dtype('byte'),
# np.dtype('ubyte')],
# exp_cols=index.dim)

# cdef uint32_t n_queries = queries_cai.shape[0]

# if neighbors is None:
# neighbors = device_ndarray.empty((n_queries, k), dtype='uint32')

# neighbors_cai = cai_wrapper(neighbors)
# _check_input_array(neighbors_cai, [np.dtype('uint32')],
# exp_rows=n_queries, exp_cols=k)

# if distances is None:
# distances = device_ndarray.empty((n_queries, k), dtype='float32')

# distances_cai = cai_wrapper(distances)
# _check_input_array(distances_cai, [np.dtype('float32')],
# exp_rows=n_queries, exp_cols=k)

# cdef cagra_c.cagraSearchParams* params = &search_params.params
# cdef cydlpack.DLManagedTensor queries_dlpack = cydlpack.dlpack_c(queries_cai)
# cdef cydlpack.DLManagedTensor neighbors_dlpack = cydlpack.dlpack_c(neighbors_cai)
# cdef cydlpack.DLManagedTensor distances_dlpack = cydlpack.dlpack_c(distances_cai)

# with cuda_interruptible():
# cagra_c.cagraSearch(
# <cuvsResources_t> resources_,
# params,
# index.index,
# &queries_dlpack,
# &neighbors_dlpack,
# &distances_dlpack
# )

# return (distances, neighbors)

0 comments on commit 667cef5

Please sign in to comment.