forked from dusty-nv/jetson-containers
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathvector_index.py
316 lines (249 loc) · 11.7 KB
/
vector_index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
#!/usr/bin/env python3
import os
import sys
import time
import tqdm
import torch
import numpy as np
import ctypes as C
from faiss_lite import (
cudaKNN,
cudaL2Norm,
cudaAllocMapped,
DistanceMetrics,
assert_cuda
)
from cuda.cudart import (
cudaStreamCreateWithFlags,
cudaStreamNonBlocking,
cudaStreamSynchronize,
)
from .utils import AttrDict, torch_dtype, tqdm_redirect_stdout
class cudaVectorIndex:
"""
Flat vector store that uses FAISS CUDA kernels for KNN search
-- uses zero-copy mapped CUDA memory
-- supports np.float16 and np.float32 vectors
-- returned distances are float32, indexes are int64
It's aimed at storing high-dimensional embeddings,
with a fixed reserve allocation due to their size.
"""
def __init__(self, dim, dtype=np.float32, reserve=1<<30, metric='cosine', max_search_queries=1, **kwargs):
"""
Allocate memory for a vector index.
Parameters:
dim (int) -- dimension of the vectors (i.e. the size of the embedding)
dtype (np.dtype) -- data type of the vectors, either float32 or float16
reserve (int) -- maximum amount of memory (in bytes) to allocate for storing vectors (default 1GB)
metric (str) -- the distance metric to use (recommend 'l2', 'inner_product', or 'cosine')
max_search_queries (int) -- the maximum number of queries to search for at a time
"""
self.shape = (0, dim)
self.dtype = dtype
self.dsize = np.dtype(dtype).itemsize
self.metric = metric
self.stats = AttrDict()
self.reserved_size = reserve
self.reserved = int(self.reserved_size / (dim * self.dsize))
self.max_search_queries = max_search_queries
err, self.stream = cudaStreamCreateWithFlags(cudaStreamNonBlocking)
assert_cuda(err)
self.torch_stream = torch.cuda.ExternalStream(int(self.stream), device='cuda:0')
#torch.cuda.set_stream(self.torch_stream)
print(f"-- creating CUDA stream {self.stream}")
print(f"-- creating CUDA vector index ({self.reserved},{dim}) dtype={dtype} metric={metric}")
self.vectors = cudaAllocMapped((self.reserved, dim), dtype) # inputs
self.queries = cudaAllocMapped((max_search_queries, dim), dtype)
self.indexes = cudaAllocMapped((max_search_queries, self.reserved), np.int64) # outputs
self.distances = cudaAllocMapped((max_search_queries, self.reserved), np.float32)
if metric == 'l2':
self.vector_norms = cudaAllocMapped(self.reserved, np.float32)
def __len__(self):
return self.shape[0]
def size(self):
return self.shape[0] * self.shape[1] * self.dsize
def sync(self):
assert_cuda(cudaStreamSynchronize(self.stream))
def add(self, vector, sync=True):
"""
Add a vector and return its index.
If the L2 metric is being used, its L2 norm will be computed.
If sync=True, the CPU will wait for the CUDA stream to complete.
"""
index = self.shape[0]
if isinstance(vector, np.ndarray):
np.copyto(dst=self.vectors.array[index], src=vector, casting='no')
elif isinstance(vector, torch.Tensor):
with torch.cuda.StreamContext(self.torch_stream):
self.vectors.tensor[index] = vector
else:
raise ValueError(f"vector needs to be a np.ndarray or torch.Tensor (was type {type(vector)})")
if self.metric == 'l2':
assert(cudaL2Norm(
C.cast(self.vectors.ptr + index * self.shape[1] * self.dsize, C.c_void_p),
self.dsize, 1, self.shape[1],
C.cast(self.vector_norms.ptr + index * 4, C.POINTER(C.c_float)),
True, C.c_void_p(int(self.stream)) if self.stream else None
))
if sync:
self.sync()
elif self.metric == 'cosine':
with torch.cuda.StreamContext(self.torch_stream), torch.inference_mode():
torch.nn.functional.normalize(
self.vectors.tensor[index], dim=-1,
out=self.vectors.tensor[index]
)
if sync:
self.sync()
self.shape = (index + 1, self.shape[1])
return index
def search(self, queries, k=1, sync=True, return_tensors='np'):
"""
Returns the indexes and distances of the k-closest vectors using the given distance metric.
Each query should be of the same dimension and dtype that this class was created with.
Returns (indexes, distances) tuple where each of these has shape (queries, K)
The returned distances are always with float32 dtype and the indexes are int64
If return_tensors is 'np' ndarray's will be returned, or 'pt' for PyTorch tensors.
Note that for 'inner_product' and 'cosine' metrics, these are similarity metrics not
distances, so they are in descending order (not ascending). 'cosine' is normalized
between [0,1], where 1.0 is the highest probability of a match.
If sync=True, the CPU will wait for the CUDA stream to complete. Otherwise, the function
is aynchronous and None is returned. In this case, cudaStreamSynchronize() should be called.
"""
if isinstance(queries, list):
queries = np.asarray(queries)
if len(queries.shape) == 1:
queries.shape = (1, queries.shape[0])
if queries.shape[0] > self.max_search_queries:
raise ValueError(f"the number of queries exceeds the max_search_queries of {self.max_search_queries}")
if queries.shape[1] != self.shape[1]:
raise ValueError(f"queries must match the vector dimension ({self.shape[1]})")
if queries.dtype != self.dtype and queries.dtype != torch_dtype(self.dtype):
raise ValueError(f"queries need to use {self.dtype} dtype (was type {queries.dtype})")
if isinstance(queries, np.ndarray):
np.copyto(dst=self.queries.array[:queries.shape[0]], src=queries, casting='no')
elif isinstance(queries, torch.Tensor):
with torch.cuda.StreamContext(self.torch_stream):
self.queries.tensor[:queries.shape[0]] = queries
time_begin = time.perf_counter()
if self.metric == 'cosine':
with torch.cuda.StreamContext(self.torch_stream), torch.inference_mode():
torch.nn.functional.normalize(
self.queries.tensor[:queries.shape[0]], dim=-1,
out=self.queries.tensor[:queries.shape[0]]
)
metric=self.metric if self.metric != 'cosine' else 'inner_product'
assert(cudaKNN(
C.cast(self.vectors.ptr, C.c_void_p),
C.cast(self.queries.ptr, C.c_void_p),
self.dsize,
self.shape[0],
queries.shape[0],
self.shape[1],
k,
DistanceMetrics[metric],
C.cast(self.vector_norms.ptr, C.POINTER(C.c_float)) if self.metric == 'l2' else None,
C.cast(self.distances.ptr, C.POINTER(C.c_float)),
C.cast(self.indexes.ptr, C.POINTER(C.c_longlong)),
C.cast(int(self.stream), C.c_void_p) if self.stream else None
))
if sync:
self.sync()
self.stats.query_shape = queries.shape
self.stats.index_shape = self.shape
self.stats.search_time = time.perf_counter() - time_begin
if return_tensors == 'np':
return (
np.copy(self.indexes.array[:queries.shape[0], :k]).squeeze(),
np.copy(self.distances.array[:queries.shape[0], :k]).squeeze()
)
else:
self.stats.search_time = time.perf_counter() - time_begin
return None
def validate(self, k=4):
"""
Sanity check search
"""
correct=True
metric=self.metric if self.metric != 'cosine' else 'inner_product'
for n in tqdm.tqdm(range(self.shape[0]), file=sys.stdout):
with tqdm_redirect_stdout():
assert(cudaKNN(
C.cast(self.vectors.ptr, C.c_void_p),
C.cast(self.vectors.ptr+n*self.shape[1]*self.dsize, C.c_void_p),
self.dsize,
self.shape[0],
1,
self.shape[1],
k,
DistanceMetrics[metric],
C.cast(self.vector_norms.ptr, C.POINTER(C.c_float)) if self.metric == 'l2' else None,
C.cast(self.distances.ptr, C.POINTER(C.c_float)),
C.cast(self.indexes.ptr, C.POINTER(C.c_longlong)),
C.cast(int(self.stream), C.c_void_p) if self.stream else None
))
self.sync()
if self.indexes.array[0][0] != n:
print(f"incorrect or duplicate index [{n}] indexes={self.indexes.array[0,:k]} distances={self.distances.array[0,:k]}")
#assert(self.indexes[0][0]==n)
correct=False
return correct
'''
# https://ai.stackexchange.com/questions/36191/how-to-calculate-a-meaningful-distance-between-multidimensional-tensors
# https://ai.stackexchange.com/questions/37233/similarities-between-2d-vectors-to-flatten-or-to-not
def cumsum_3d(a):
a = torch.cumsum(a, -1)
a = torch.cumsum(a, -2)
a = torch.cumsum(a, -3)
return a
def norm_3d(a):
return a / torch.sum(a, dim=(-1,-2,-3), keepdim=True)
def emd_3d(a, b):
a = norm_3d(a)
b = norm_3d(b)
return torch.mean(torch.square(cumsum_3d(a) - cumsum_3d(b)), dim=(-1,-2,-3))
def cumsum_2d(a):
a = torch.cumsum(a, -1)
a = torch.cumsum(a, -2)
return a
def norm_2d(a):
return a / torch.sum(a, dim=(-1,-2), keepdim=True)
def emd_2d(a, b):
a = norm_2d(a)
b = norm_2d(b)
return torch.mean(torch.square(cumsum_2d(a) - cumsum_2d(b)), dim=(-1,-2))
cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
et = torch.cat(embeddings)
print('et', et.shape, et.dtype, et.device)
for n, embedding in enumerate(embeddings):
print(n, embedding.shape, embedding.dtype, type(embedding))
distances = cos(embedding, et)
"""
distances = torch.zeros((len(embeddings)), dtype=torch.float32, device='cuda:0')
print(f"distances: {distances.shape} {distances.dtype}")
for m, embedding2 in enumerate(embeddings):
distances[m] = emd_2d(embedding, embedding2)
print(distances)
"""
indexes = torch.argsort(distances, descending=True).squeeze()
print(f"-- search results for {n} {metadata[n]}")
for k in range(args.k):
print(f" * {indexes[k]} {metadata[indexes[k]]} dist={distances[indexes[k]]}")
'''
"""
# pylibraft - https://github.com/rapidsai/raft/blob/4f0a2d2d6e30eea0c036ca3b531e03e44e760fbe/python/pylibraft/pylibraft/distance/pairwise_distance.pyx#L93
time_dist_begin = time.perf_counter()
pairwise_distance(
vector,
cudaArrayInterface(self.data, self.shape, self.dtype),
cudaArrayInterface(self.search_data, (vector.shape[0], self.shape[0]), self.dtype),
metric=metric
)
cudaDeviceSynchronize()
time_dist_end = time.perf_counter()
if k == 1:
topk = np.argmin(self.search_array, axis=1)
time_sort_end = time.perf_counter()
print(f'dist: {(time_dist_end - time_dist_begin) * 1000:.3f} ms')
print(f'sort: {(time_sort_end - time_dist_end) * 1000:.3f} ms')
"""