From 53d69b92d75b07577a1ccfbed0e46c0c17c4c99d Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 30 Sep 2023 16:54:10 -0700 Subject: [PATCH 1/8] Add: exact search in JS --- c/lib.cpp | 2 +- c/usearch.h | 2 +- javascript/lib.cpp | 67 ++++++++++++++++- javascript/usearch.d.ts | 159 ++++++++++++++++++++++++++++++---------- 4 files changed, 189 insertions(+), 41 deletions(-) diff --git a/c/lib.cpp b/c/lib.cpp index 1f2e4d97..f141a2c8 100644 --- a/c/lib.cpp +++ b/c/lib.cpp @@ -255,7 +255,7 @@ USEARCH_EXPORT usearch_distance_t usearch_distance( // return metric((byte_t const*)vector_first, (byte_t const*)vector_second); } -USEARCH_EXPORT void usearch_brute_force( // +USEARCH_EXPORT void usearch_exact_search( // void const* dataset, size_t dataset_count, size_t dataset_stride, // void const* queries, size_t queries_count, size_t queries_stride, // usearch_scalar_kind_t scalar_kind, size_t dimensions, // diff --git a/c/usearch.h b/c/usearch.h index b3cb7f7e..653a6bae 100644 --- a/c/usearch.h +++ b/c/usearch.h @@ -274,7 +274,7 @@ USEARCH_EXPORT usearch_distance_t usearch_distance( // * @param[out] error Pointer to a string where the error message will be stored, if an error occurs. * @return Number of found matches. */ -USEARCH_EXPORT void usearch_brute_force( // +USEARCH_EXPORT void usearch_exact_search( // void const* dataset, size_t dataset_size, size_t dataset_stride, // void const* queries, size_t queries_size, size_t queries_stride, // usearch_scalar_kind_t scalar_kind, size_t dimensions, // diff --git a/javascript/lib.cpp b/javascript/lib.cpp index 95778007..5a8353b7 100644 --- a/javascript/lib.cpp +++ b/javascript/lib.cpp @@ -373,6 +373,71 @@ Napi::Value Index::Contains(Napi::CallbackInfo const& ctx) { } } -Napi::Object InitAll(Napi::Env env, Napi::Object exports) { return Index::Init(env, exports); } +Napi::Value ExactSearch(Napi::CallbackInfo const& info) { + Napi::Env env = info.Env(); + + // Validate the number of arguments and their types. + if (info.Length() < 5) { + Napi::TypeError::New(env, "Wrong number of arguments").ThrowAsJavaScriptException(); + return env.Null(); + } + + // Dataset + if (!info[0].IsTypedArray()) { + Napi::TypeError::New(env, "Dataset should be a TypedArray").ThrowAsJavaScriptException(); + return env.Null(); + } + Napi::TypedArray dataset = info[0].As(); + + // Queries + if (!info[1].IsTypedArray()) { + Napi::TypeError::New(env, "Queries should be a TypedArray").ThrowAsJavaScriptException(); + return env.Null(); + } + Napi::TypedArray queries = info[1].As(); + + // Dimensions + if (!info[2].IsBigInt()) { + Napi::TypeError::New(env, "Dimensions should be a BigInt").ThrowAsJavaScriptException(); + return env.Null(); + } + std::uint64_t dimensions = info[2].As().Uint64Value(); + + // Count + if (!info[3].IsBigInt()) { + Napi::TypeError::New(env, "Count should be a BigInt").ThrowAsJavaScriptException(); + return env.Null(); + } + std::uint64_t count = info[3].As().Uint64Value(); + + // Metric + if (!info[4].IsString()) { + Napi::TypeError::New(env, "Metric should be a string").ThrowAsJavaScriptException(); + return env.Null(); + } + std::string metric_str = info[4].As().Utf8Value(); + metric_punned_t metric(dimensions, metric_kind_t::cos_k); // Adjust as needed based on the metric_str + + // Perform exact search + exact_search_t search; + auto results = search( // + (byte_t const*)dataset.Data(), dataset.ElementLength() / dimensions, dimensions * sizeof(float), // + (byte_t const*)queries.Data(), queries.ElementLength() / dimensions, dimensions * sizeof(float), // + count, metric); + + // Construct the result object + Napi::Object result = Napi::Object::New(env); + + result.Set("keys", /* TODO: populate keys */); + result.Set("distances", /* TODO: populate distances */); + result.Set("count", Napi::BigInt::New(env, results.size())); + + return result; +} + +Napi::Object InitAll(Napi::Env env, Napi::Object exports) { + exports.Set("exactSearch", Napi::Function::New(env, ExactSearch)); + return Index::Init(env, exports); +} NODE_API_MODULE(usearch, InitAll) diff --git a/javascript/usearch.d.ts b/javascript/usearch.d.ts index a4bf7fcb..0049922e 100644 --- a/javascript/usearch.d.ts +++ b/javascript/usearch.d.ts @@ -1,12 +1,56 @@ -/** Search result object. */ +export enum MetricKind { + Unknown = 'unknown', + Cos = 'cos', + IP = 'ip', + L2sq = 'l2sq', + Haversine = 'haversine', + Pearson = 'pearson', + Jaccard = 'jaccard', + Hamming = 'hamming', + Tanimoto = 'tanimoto', + Sorensen = 'sorensen' +} + +export enum ScalarKind { + Unknown = 'unknown', + F32 = 'f32', + F64 = 'f64', + F16 = 'f16', + I8 = 'i8', + B1 = 'b1' +} + +export type IntOrAlike = number | bigint; +export type Keys = BigUint64Array; +export type Distances = Float32Array; + +export type KeyOrKeys = bigint | bigint[] | BigUint64Array; +export type IndicatorOrIndicators = boolean | boolean[]; +export type CountOrCounts = bigint | BigUint64Array; +export type VectorOrVectors = Float32Array | Float64Array | Int8Array; + +/** Represents a set of search results */ export interface Matches { - /** The keys of the nearest neighbors found, size n*k. */ - keys: BigUint64Array, - /** The distances of the nearest neighbors found, size n*k. */ - distances: Float32Array, - /** The distances of the nearest neighbors found, size n*k. */ - count: bigint + /** Keys of the nearest neighbors found (size: n*k). */ + keys: Keys; + /** Distances of the nearest neighbors found (size: n*k). */ + distances: Distances; +} + +/** Represents a set of batched search results */ +export class BatchMatches { + /** Keys of the nearest neighbors found (size: n*k). */ + keys: Keys; + /** Distances of the nearest neighbors found (size: n*k). */ + distances: Distances; + /** Counts of the nearest neighbors found (size: n*k). */ + counts: BigUint64Array; + /** Limit for search results per query. */ + k: bigint; + + /** Retrieve Matches object at the specified index in the batch. */ + get(i: IntOrAlike): Matches; } /** K-Approximate Nearest Neighbors search index. */ @@ -15,22 +59,24 @@ export class Index { /** * Constructs a new index. * - * @param {bigint} dimensions - * @param {string} metric - * @param {string} quantization - * @param {bigint} capacity - * @param {bigint} connectivity - * @param {bigint} expansion_add - * @param {bigint} expansion_search + * @param {IntOrAlike} dimensions + * @param {MetricKind} metric + * @param {ScalarKind} quantization + * @param {IntOrAlike} capacity + * @param {IntOrAlike} connectivity + * @param {IntOrAlike} expansion_add + * @param {IntOrAlike} expansion_search + * @param {boolean} multi */ constructor( - dimensions: bigint, - metric: string, - quantization: string, - capacity: bigint, - connectivity: bigint, - expansion_add: bigint, - expansion_search: bigint + dimensions: IntOrAlike, + metric: MetricKind, + quantization: ScalarKind, + capacity: IntOrAlike, + connectivity: IntOrAlike, + expansion_add: IntOrAlike, + expansion_search: IntOrAlike, + multi: boolean, ); /** @@ -41,7 +87,7 @@ export class Index { /** * Returns the bigint of vectors currently indexed. - * @return {bigint} The bigint of vectors currently indexed. + * @return {bigint} The number of vectors currently indexed. */ size(): bigint; @@ -73,38 +119,75 @@ export class Index { * View index from a file, without loading into RAM. * @param {string} path File path to read. */ - load(path: string): void; + view(path: string): void; /** * Add n vectors of dimension d to the index. * - * @param {bigint | bigint[]} keys Input identifiers for every vector. - * @param {Float32Array | Float32Array[]} mat Input matrix, matrix of size n * d. + * @param {KeyOrKeys} keys Input identifiers for every vector. + * @param {VectorOrVectors} vectors Input matrix, matrix of size n * d. */ - add(keys: bigint | bigint[], mat: Float32Array | Float32Array[]): void; + add(keys: KeyOrKeys, vectors: VectorOrVectors): void; /** * Query n vectors of dimension d to the index. Return at most k vectors for each. * If there are not enough results for a query, the result array is padded with -1s. * - * @param {Float32Array} mat Input vectors to search, matrix of size n * d. - * @param {bigint} k The bigint of nearest neighbors to search for. - * @return {Matches} Output of the search result. + * @param {VectorOrVectors} vectors Input vectors to search, matrix of size n * d. + * @param {IntOrAlike} k The number of nearest neighbors to search for. + * @return {Matches | BatchMatches} Search results for one or more queries. */ - search(mat: Float32Array, k: bigint): Matches; + search(vectors: VectorOrVectors, k: IntOrAlike): Matches | BatchMatches; - /** - * Check if an entry is contained in the index. - * - * @param {bigint} key Identifier to look up. + /** + * Check if one or more entries are contained in the index. + * @param {KeyOrKeys} keys - Identifier(s) to look up. + * @return {IndicatorOrIndicators} - Returns true if the key is contained in the index, false otherwise when a single key is provided. + * Returns an array of booleans corresponding to the presence of each key in the index when an array of keys is provided. */ - contains(key: bigint): boolean; + contains(keys: KeyOrKeys): IndicatorOrIndicators; + + /** + * Check if one or more entries are contained in the index. + * @param {KeyOrKeys} keys - Identifier(s) to look up. + * @return {CountOrCounts} - Number of vectors found per query. + */ + contains(keys: KeyOrKeys): CountOrCounts; + /** * Remove a vector from the index. * - * @param {bigint} key Input identifier for every vector to be removed. + * @param {KeyOrKeys} keys Identifier(s) for every vector to be removed. + * @return {CountOrCounts} - Number of vectors deleted per query. */ - remove(key: bigint): boolean; + remove(keys: KeyOrKeys): CountOrCounts; + +} -} \ No newline at end of file +/** + * Performs an exact search on the given dataset to find the best matching vectors for each query. + * + * @param {VectorOrVectors} dataset - The dataset containing vectors to be searched. It should be a flat array representing a matrix of size `n * dimensions`, where `n` is the number of vectors, and `dimensions` is the number of elements in each vector. + * @param {VectorOrVectors} queries - The queries containing vectors to search for in the dataset. It should be a flat array representing a matrix of size `m * dimensions`, where `m` is the number of query vectors, and `dimensions` is the number of elements in each vector. + * @param {IntOrAlike} dimensions - The dimensionality of the vectors in both the dataset and the queries. It defines the number of elements in each vector. + * @param {IntOrAlike} count - The number of nearest neighbors to return for each query. If the dataset contains fewer vectors than the specified count, the result will contain only the available vectors. + * @param {MetricKind} metric - The distance metric to be used for the search. It should be one of the supported metric strings, for example, "euclidean" for Euclidean distance, "cosine" for Cosine distance, etc. + * @return {Matches} - Returns a `Matches` object containing the results of the search. The `keys` field contains the indices of the matching vectors in the dataset, the `distances` field contains the distances between the query and the matching vectors, and the `count` field contains the actual number of matches found for each query. + * + * @example + * const dataset = new VectorOrVectors([1.0, 2.0, 3.0, 4.0]); // Two vectors: [1.0, 2.0] and [3.0, 4.0] + * const queries = new VectorOrVectors([1.5, 2.5]); // One vector: [1.5, 2.5] + * const dimensions = BigInt(2); + * const count = BigInt(1); + * const metric = "euclidean"; + * + * const result = exactSearch(dataset, queries, dimensions, count, metric); + * // result might be: + * // { + * // keys: BigUint64Array [ 1n ], + * // distances: VectorOrVectors [ some_value ], + * // count: 1n + * // } + */ +export function exactSearch(dataset: VectorOrVectors, queries: VectorOrVectors, dimensions: IntOrAlike, count: IntOrAlike, metric: MetricKind): Matches | BatchMatches; From d1f5aaf1caec38b8e324e9ffb35b500889e8421f Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 30 Sep 2023 16:54:36 -0700 Subject: [PATCH 2/8] Make: Use SimSIMD in JS builds --- binding.gyp | 1 + 1 file changed, 1 insertion(+) diff --git a/binding.gyp b/binding.gyp index 4c7fc952..733a496d 100644 --- a/binding.gyp +++ b/binding.gyp @@ -10,6 +10,7 @@ "simsimd/include", ], "dependencies": [" Date: Sat, 30 Sep 2023 16:55:21 -0700 Subject: [PATCH 3/8] Make: Attach TS docs to JS package --- .vscode/settings.json | 3 +++ .vscode/tasks.json | 8 ++++---- package.json | 15 ++++++++++++--- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index a9054712..c8f37b24 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -113,10 +113,12 @@ "BLAS", "Cdecl", "cflags", + "cibuildwheel", "citerator", "downcasted", "Downcasting", "dtype", + "emcmake", "equi", "equidimensional", "FAISS", @@ -153,6 +155,7 @@ "usearch", "usecases", "Vardanian", + "vectorize", "Xunit" ], "autoDocstring.docstringFormat": "sphinx", diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 896774c7..3265c920 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -3,7 +3,7 @@ "tasks": [ { "label": "Linux Build C++ Debug", - "command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && make -C ./build_debug", + "command": "cmake -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && make -C ./build_debug", "args": [], "type": "shell", "problemMatcher": [ @@ -12,7 +12,7 @@ }, { "label": "Linux Build C++ Release", - "command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DCMAKE_BUILD_TYPE=RelWithDebInfo -B ./build_release && make -C ./build_release", + "command": "cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -B ./build_release && make -C ./build_release", "args": [], "type": "shell", "problemMatcher": [ @@ -21,13 +21,13 @@ }, { "label": "MacOS Build C++ Debug", - "command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && make -C ./build_debug", + "command": "cmake -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && make -C ./build_debug", "args": [], "type": "shell", }, { "label": "MacOS Build C++ Release", - "command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -B ./build_release && make -C ./build_release", + "command": "cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -B ./build_release && make -C ./build_release", "args": [], "type": "shell" } diff --git a/package.json b/package.json index b630b53a..1878de97 100644 --- a/package.json +++ b/package.json @@ -2,13 +2,22 @@ "name": "usearch", "version": "2.6.0", "description": "Smaller & Faster Single-File Vector Search Engine from Unum", - "author": "Ash Vardanian", + "author": "Ash Vardanian (https://ashvardanian.com/)", "license": "Apache 2.0", - "main": "javascript/usearch.js", + "homepage": "https://unum-cloud.github.io/usearch/", "repository": { "type": "git", "url": "https://github.com/unum-cloud/usearch.git" }, + "bugs": { + "url": "https://github.com/unum-cloud/usearch/issues", + "email": "info@unum.cloud" + }, + "main": "javascript/usearch.js", + "types": "javascript/usearch.d.ts", + "files": [ + "javascript/usearch.d.ts" + ], "gypfile": true, "engines": { "node": "~10 >=10.20 || >=12.17" @@ -28,4 +37,4 @@ "semantic-release": "^21.1.2", "typescript": "^5.1.6" } -} +} \ No newline at end of file From 72eff5e07d7ebed3331a109e82855729f6902008 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 30 Sep 2023 17:00:59 -0700 Subject: [PATCH 4/8] Make: Broader testing matrix for OSes/compilers --- .github/workflows/prerelease.yml | 121 +++++++++++++++---------------- .gitignore | 1 + 2 files changed, 61 insertions(+), 61 deletions(-) diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 675ebe47..3ae4bf99 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -16,59 +16,80 @@ permissions: contents: read jobs: - - test_c: - name: Test C + + test_cpp_c_ubuntu: + name: Test C++ Ubuntu (${{ matrix.compiler }}) runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + compiler: [g++-9, g++-10, g++-11, clang-10, clang-11, clang-12] + steps: - uses: actions/checkout@v3 with: ref: main-dev - run: git submodule update --init --recursive - - name: Prepare Environment + + - name: Install Dependencies run: | sudo apt update && - sudo apt install -y cmake g++-12 build-essential libjemalloc-dev + sudo apt install -y cmake build-essential libjemalloc-dev ${{ matrix.compiler }} + - name: Build run: | - cmake -B ./build_release \ - -DCMAKE_CXX_COMPILER="g++-12" \ - -DCMAKE_BUILD_TYPE=Release \ - -DUSEARCH_BUILD_CTEST=1 \ - -DUSEARCH_USE_OPENMP=1 \ - -DUSEARCH_USE_SIMSIMD=1 \ - -DUSEARCH_USE_JEMALLOC=1 \ - -DUSEARCH_BUILD_BENCHMARK=0 && - make -C ./build_release -j - - name: Run tests - run: ./build_release/test_c + export CC=${{ matrix.compiler }} + export CXX=${{ matrix.compiler }}++ + cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST=1 -DUSEARCH_BUILD_CTEST=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=1 + cmake --build build --config Debug + - name: Run C++ tests + run: ./build/test_cpp + - name: Run C tests + run: ./build/test_c - test_cpp: - name: Test C++ - runs-on: ubuntu-latest + test_cpp_c_macos: + name: Test C++ MacOS + runs-on: macos-latest steps: - uses: actions/checkout@v3 with: ref: main-dev - run: git submodule update --init --recursive - - name: Prepare Environment + + - name: Install Dependencies run: | - sudo apt update && - sudo apt install -y cmake g++-12 build-essential libjemalloc-dev + brew update + brew install cmake jemalloc + - name: Build run: | - cmake -B ./build_release \ - -DCMAKE_CXX_COMPILER="g++-12" \ - -DCMAKE_BUILD_TYPE=Release \ - -DUSEARCH_BUILD_TEST=1 \ - -DUSEARCH_USE_OPENMP=1 \ - -DUSEARCH_USE_SIMSIMD=1 \ - -DUSEARCH_USE_JEMALLOC=1 \ - -DUSEARCH_BUILD_BENCHMARK=0 && - make -C ./build_release -j - - name: Run tests - run: ./build_release/test - + cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST=1 -DUSEARCH_BUILD_CTEST=1 + cmake --build build --config Debug + - name: Run C++ tests + run: ./build/test + - name: Run C tests + run: ./build/test_c + + test_cpp_c_windows: + name: Test C++ Windows + runs-on: windows-latest + steps: + - uses: actions/checkout@v3 + with: + ref: main-dev + - run: git submodule update --init --recursive + + - name: Install Dependencies + run: choco install cmake jemalloc + + - name: Build + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST=1 -DUSEARCH_BUILD_CTEST=1 + cmake --build build --config Debug + - name: Run C++ tests + run: ./build/test + - name: Run C tests + run: ./build/test_c test_python_311: name: Test Python @@ -91,15 +112,7 @@ jobs: python -m pip install --upgrade pip pip install pytest numpy - name: Build locally on Ubuntu - run: | - export CC=/usr/bin/gcc-12 - export CXX=/usr/bin/g++-12 - export LD_LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/12/:$LD_LIBRARY_PATH - python -m pip install . - if: ${{ matrix.os == 'ubuntu-22.04' }} - - name: Build locally run: python -m pip install . - if: ${{ matrix.os != 'ubuntu-22.04' }} - name: Test with PyTest run: pytest python/scripts/ -s -x @@ -128,13 +141,7 @@ jobs: pip install pytest numpy - name: Build locally - run: | - export CC=/usr/bin/gcc-12 - export CXX=/usr/bin/g++-12 - export LD_LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/12/:$LD_LIBRARY_PATH - python -m pip install . - if: ${{ matrix.os == 'ubuntu-22.04' }} - + run: python -m pip install . - name: Test with PyTest run: pytest python/scripts/ -s -x @@ -148,10 +155,7 @@ jobs: - uses: actions/setup-node@v3 with: node-version: 18 - - run: | - export CC=/usr/bin/gcc-12 - export CXX=/usr/bin/g++-12 - npm install + - run: npm install - run: npm ci - run: npm test @@ -179,10 +183,7 @@ jobs: - name: Setup Gradle uses: gradle/gradle-build-action@v2.4.2 - name: Execute Gradle build - run: - export CC=/usr/bin/gcc-12 - export CXX=/usr/bin/g++-12 - gradle clean build + run: gradle clean build test_swift: name: Test ObjC & Swift @@ -253,14 +254,12 @@ jobs: - name: Prepare environment run: | sudo apt update && - sudo apt install -y cmake g++-12 build-essential libjemalloc-dev + sudo apt install -y build-essential - name: Build library run: | cd golang/ - export CC=/usr/bin/gcc-12 - export CXX=/usr/bin/g++-12 - make -C ../c libusearch_c.so + make USEARCH_USE_OPENMP=0 -C ../c libusearch_c.so sudo mv ../c/libusearch_c.so /usr/local/lib/libusearch.a sudo mv ../c/usearch.h /usr/local/include/usearch.h diff --git a/.gitignore b/.gitignore index b1c32252..46e268fa 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ datasets bin *.usearch +.cache # C++ builds build From 1c2f3b2ce752e4ba5963cce5eb71b2e324522bef Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sun, 1 Oct 2023 09:16:55 -0700 Subject: [PATCH 5/8] Make: Apple's Clang has no leak sanitizer --- CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f8cf4e1..f27d7b72 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -198,7 +198,6 @@ function (setup_target TARGET_NAME) -g> $<$:-g -fsanitize=address - -fsanitize=leak -fsanitize=alignment -fsanitize=undefined > @@ -214,13 +213,18 @@ function (setup_target TARGET_NAME) PRIVATE $<$:-g -fsanitize=address - -fsanitize=leak -fsanitize=alignment -fsanitize=undefined > -fPIC ) + # Check if the compiler is AppleClang, and if not, add the leak sanitizer + if (NOT CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "AppleClang") + target_compile_options(${TARGET_NAME} PRIVATE $<$:-fsanitize=leak>) + target_link_options(${TARGET_NAME} PRIVATE $<$:-fsanitize=leak>) + endif () + if (USEARCH_USE_OPENMP) target_link_libraries(${TARGET_NAME} PRIVATE OpenMP::OpenMP_CXX) endif () From f2695822ece851e874df9761d7d987a5a1953e13 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sun, 1 Oct 2023 09:21:16 -0700 Subject: [PATCH 6/8] Make: Reduce pre-release dependencies --- .github/workflows/prerelease.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index b35ce186..3aa9d911 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -38,8 +38,7 @@ jobs: - name: Build run: | - export CC=${{ matrix.compiler }} - export CXX=${{ matrix.compiler }}++ + export CXX=${{ matrix.compiler }} cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST=1 -DUSEARCH_BUILD_CTEST=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=1 cmake --build build --config Debug - name: Run C++ tests @@ -59,7 +58,7 @@ jobs: - name: Install Dependencies run: | brew update - brew install cmake jemalloc + brew install cmake - name: Build run: | @@ -80,7 +79,7 @@ jobs: - run: git submodule update --init --recursive - name: Install Dependencies - run: choco install cmake jemalloc + run: choco install cmake - name: Build run: | From 121896648a30db4b834b36f5f26623fdae25b92a Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sun, 1 Oct 2023 21:20:37 -0700 Subject: [PATCH 7/8] Add: Two-level JS binding --- docs/compilation.md | 2 +- docs/conf.py | 2 +- include/usearch/index_plugins.hpp | 8 + javascript/docs.js | 111 ------- javascript/lib.cpp | 509 ++++++++++++------------------ javascript/usearch.d.ts | 193 ----------- javascript/usearch.js | 442 +++++++++++++++++++++++++- javascript/usearch.test.js | 54 ++-- package.json | 4 - tsconfig.json | 8 - 10 files changed, 672 insertions(+), 661 deletions(-) delete mode 100644 javascript/docs.js delete mode 100644 javascript/usearch.d.ts delete mode 100644 tsconfig.json diff --git a/docs/compilation.md b/docs/compilation.md index ac07b983..ee0b4d1e 100644 --- a/docs/compilation.md +++ b/docs/compilation.md @@ -94,7 +94,7 @@ WebAssembly: ```sh emcmake cmake -B ./build -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -s TOTAL_MEMORY=64MB" && emmake make -C ./build -node ./build/test.js +node ./build/usearch.test.js ``` If you don't yet have `emcmake` installed: diff --git a/docs/conf.py b/docs/conf.py index dc57c331..ed00ee82 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -50,4 +50,4 @@ breathe_projects = {"USearch": "../build/xml"} breathe_default_project = "USearch" -js_source_path = "../javascript/docs.js" +js_source_path = "../javascript/usearch.js" diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp index 12911dac..bba10f71 100644 --- a/include/usearch/index_plugins.hpp +++ b/include/usearch/index_plugins.hpp @@ -346,6 +346,10 @@ inline expected_gt scalar_kind_from_name(char const* name, std::s return parsed; } +inline expected_gt scalar_kind_from_name(char const* name) { + return scalar_kind_from_name(name, std::strlen(name)); +} + inline expected_gt metric_from_name(char const* name, std::size_t len) { expected_gt parsed; if (str_equals(name, len, "l2sq") || str_equals(name, len, "euclidean_sq")) { @@ -370,6 +374,10 @@ inline expected_gt metric_from_name(char const* name, std::size_t return parsed; } +inline expected_gt metric_from_name(char const* name) { + return metric_from_name(name, std::strlen(name)); +} + inline float f16_to_f32(std::uint16_t u16) noexcept { #if USEARCH_USE_NATIVE_F16 f16_native_t f16; diff --git a/javascript/docs.js b/javascript/docs.js deleted file mode 100644 index 2f533cea..00000000 --- a/javascript/docs.js +++ /dev/null @@ -1,111 +0,0 @@ -/** Search result object. */ -class Matches { - /** - * @param {BigUint64Array} keys - The keys of the nearest neighbors found. - * @param {Float32Array} distances - The distances of the nearest neighbors found. - * @param {bigint} count - The count of nearest neighbors found. - */ - constructor(keys, distances, count) { - this.keys = keys; - this.distances = distances; - this.count = count; - } -} - -/** K-Approximate Nearest Neighbors search index. */ -class Index { - /** - * Constructs a new index. - * - * @param {bigint} dimensions - * @param {string} metric - * @param {string} quantization - * @param {bigint} capacity - * @param {bigint} connectivity - * @param {bigint} expansion_add - * @param {bigint} expansion_search - */ - constructor( - dimensions, - metric, - quantization, - capacity, - connectivity, - expansion_add, - expansion_search - ) {} - - /** - * Returns the dimensionality of vectors. - * @return {bigint} The dimensionality of vectors. - */ - dimensions() {} - - /** - * Returns the bigint of vectors currently indexed. - * @return {bigint} The bigint of vectors currently indexed. - */ - size() {} - - /** - * Returns index capacity. - * @return {bigint} The capacity of index. - */ - capacity() {} - - /** - * Returns connectivity. - * @return {bigint} The connectivity of index. - */ - connectivity() {} - - /** - * Write index to a file. - * @param {string} path File path to write. - */ - save(path) {} - - /** - * Load index from a file. - * @param {string} path File path to read. - */ - load(path) {} - - /** - * View index from a file, without loading into RAM. - * @param {string} path File path to read. - */ - view(path) {} - - /** - * Add n vectors of dimension d to the index. - * - * @param {bigint | bigint[]} keys Input identifiers for every vector. - * @param {Float32Array | Float32Array[]} mat Input matrix, matrix of size n * d. - */ - add(keys, mat) {} - - /** - * Query n vectors of dimension d to the index. Return at most k vectors for each. - * If there are not enough results for a query, the result array is padded with -1s. - * - * @param {Float32Array} mat Input vectors to search, matrix of size n * d. - * @param {bigint} k The bigint of nearest neighbors to search for. - * @return {Matches} Output of the search result. - */ - search(mat, k) {} - - /** - * Check if an entry is contained in the index. - * - * @param {bigint} key Identifier to look up. - */ - contains(key) {} - - /** - * Remove a vector from the index. - * - * @param {bigint} key Input identifier for every vector to be removed. - */ - remove(key) {} -} diff --git a/javascript/lib.cpp b/javascript/lib.cpp index 5a8353b7..ee933e0b 100644 --- a/javascript/lib.cpp +++ b/javascript/lib.cpp @@ -20,10 +20,10 @@ using namespace unum::usearch; using namespace unum; -class Index : public Napi::ObjectWrap { +class CompiledIndex : public Napi::ObjectWrap { public: static Napi::Object Init(Napi::Env env, Napi::Object exports); - Index(Napi::CallbackInfo const& ctx); + CompiledIndex(Napi::CallbackInfo const& ctx); private: Napi::Value GetDimensions(Napi::CallbackInfo const& ctx); @@ -39,405 +39,294 @@ class Index : public Napi::ObjectWrap { Napi::Value Search(Napi::CallbackInfo const& ctx); Napi::Value Remove(Napi::CallbackInfo const& ctx); Napi::Value Contains(Napi::CallbackInfo const& ctx); + Napi::Value Count(Napi::CallbackInfo const& ctx); std::unique_ptr native_; }; -Napi::Object Index::Init(Napi::Env env, Napi::Object exports) { +Napi::Object CompiledIndex::Init(Napi::Env env, Napi::Object exports) { Napi::Function func = DefineClass( // - env, "Index", + env, "CompiledIndex", { - InstanceMethod("dimensions", &Index::GetDimensions), - InstanceMethod("size", &Index::GetSize), - InstanceMethod("capacity", &Index::GetCapacity), - InstanceMethod("connectivity", &Index::GetConnectivity), - InstanceMethod("add", &Index::Add), - InstanceMethod("search", &Index::Search), - InstanceMethod("remove", &Index::Remove), - InstanceMethod("contains", &Index::Contains), - InstanceMethod("save", &Index::Save), - InstanceMethod("load", &Index::Load), - InstanceMethod("view", &Index::View), + InstanceMethod("dimensions", &CompiledIndex::GetDimensions), + InstanceMethod("size", &CompiledIndex::GetSize), + InstanceMethod("capacity", &CompiledIndex::GetCapacity), + InstanceMethod("connectivity", &CompiledIndex::GetConnectivity), + InstanceMethod("add", &CompiledIndex::Add), + InstanceMethod("search", &CompiledIndex::Search), + InstanceMethod("remove", &CompiledIndex::Remove), + InstanceMethod("contains", &CompiledIndex::Contains), + InstanceMethod("count", &CompiledIndex::Count), + InstanceMethod("save", &CompiledIndex::Save), + InstanceMethod("load", &CompiledIndex::Load), + InstanceMethod("view", &CompiledIndex::View), }); Napi::FunctionReference* constructor = new Napi::FunctionReference(); *constructor = Napi::Persistent(func); env.SetInstanceData(constructor); - exports.Set("Index", func); + exports.Set("CompiledIndex", func); return exports; } -Index::Index(Napi::CallbackInfo const& ctx) : Napi::ObjectWrap(ctx) { - Napi::Env env = ctx.Env(); +std::size_t napi_argument_to_size(Napi::Value v) { + return static_cast(v.As().DoubleValue()); +} - int length = ctx.Length(); - if (length == 0 || length >= 2 || !ctx[0].IsObject()) { - Napi::TypeError::New(env, "Pass args as named objects: dimensions: uint, capacity: uint, metric: str") - .ThrowAsJavaScriptException(); - return; - } +CompiledIndex::CompiledIndex(Napi::CallbackInfo const& ctx) : Napi::ObjectWrap(ctx) { - bool lossless = true; - Napi::Object params = ctx[0].As(); - std::size_t dimensions = - params.Has("dimensions") ? params.Get("dimensions").As().Uint64Value(&lossless) : 0; - - index_limits_t limits; - std::size_t connectivity = default_connectivity(); - std::size_t expansion_add = default_expansion_add(); - std::size_t expansion_search = default_expansion_search(); - - if (params.Has("capacity")) - limits.members = params.Get("capacity").As().Uint64Value(&lossless); - if (params.Has("connectivity")) - connectivity = params.Get("connectivity").As().Uint64Value(&lossless); - if (params.Has("expansion_add")) - expansion_add = params.Get("expansion_add").As().Uint64Value(&lossless); - if (params.Has("expansion_search")) - expansion_search = params.Get("expansion_search").As().Uint64Value(&lossless); - if (!lossless) { - Napi::TypeError::New(env, "Arguments must be unsigned integers").ThrowAsJavaScriptException(); - return; - } - - scalar_kind_t quantization = scalar_kind_t::f32_k; - if (params.Has("quantization")) { - std::string quantization_str = params.Get("quantization").As().Utf8Value(); - expected_gt expected = scalar_kind_from_name(quantization_str.c_str(), quantization_str.size()); - if (!expected) { - Napi::TypeError::New(env, expected.error.release()).ThrowAsJavaScriptException(); - return; - } - quantization = *expected; - } - - // By default we use the Inner Product similarity - metric_kind_t metric_kind = metric_kind_t::ip_k; - if (params.Has("metric")) { - std::string metric_str = params.Get("metric").As().Utf8Value(); - expected_gt expected = metric_from_name(metric_str.c_str(), metric_str.size()); - if (!expected) { - Napi::TypeError::New(env, expected.error.release()).ThrowAsJavaScriptException(); - return; - } - metric_kind = *expected; - } + // Directly assign the parameters without checks + std::size_t dimensions = napi_argument_to_size(ctx[0]); + metric_kind_t metric_kind = metric_from_name(ctx[1].As().Utf8Value().c_str()); + scalar_kind_t quantization = scalar_kind_from_name(ctx[2].As().Utf8Value().c_str()); + std::size_t connectivity = napi_argument_to_size(ctx[3]); + std::size_t expansion_add = napi_argument_to_size(ctx[4]); + std::size_t expansion_search = napi_argument_to_size(ctx[5]); + bool multi = ctx[6].As().Value(); metric_punned_t metric(dimensions, metric_kind, quantization); index_dense_config_t config(connectivity, expansion_add, expansion_search); + config.multi = multi; + native_.reset(new index_dense_t(index_dense_t::make(metric, config))); - native_->reserve(limits); + if (!native_) + Napi::Error::New(ctx.Env(), "Out of memory!").ThrowAsJavaScriptException(); } -Napi::Value Index::GetDimensions(Napi::CallbackInfo const& ctx) { - return Napi::BigInt::New(ctx.Env(), static_cast(native_->dimensions())); +Napi::Value CompiledIndex::GetDimensions(Napi::CallbackInfo const& ctx) { + return Napi::Number::New(ctx.Env(), static_cast(native_->dimensions())); } -Napi::Value Index::GetSize(Napi::CallbackInfo const& ctx) { - return Napi::BigInt::New(ctx.Env(), static_cast(native_->size())); +Napi::Value CompiledIndex::GetConnectivity(Napi::CallbackInfo const& ctx) { + return Napi::Number::New(ctx.Env(), static_cast(native_->connectivity())); } -Napi::Value Index::GetConnectivity(Napi::CallbackInfo const& ctx) { - return Napi::BigInt::New(ctx.Env(), static_cast(native_->connectivity())); +Napi::Value CompiledIndex::GetSize(Napi::CallbackInfo const& ctx) { + return Napi::Number::New(ctx.Env(), static_cast(native_->size())); } -Napi::Value Index::GetCapacity(Napi::CallbackInfo const& ctx) { - return Napi::BigInt::New(ctx.Env(), static_cast(native_->capacity())); +Napi::Value CompiledIndex::GetCapacity(Napi::CallbackInfo const& ctx) { + return Napi::Number::New(ctx.Env(), static_cast(native_->capacity())); } -void Index::Save(Napi::CallbackInfo const& ctx) { - Napi::Env env = ctx.Env(); - - int length = ctx.Length(); - if (length == 0 || !ctx[0].IsString()) { - Napi::TypeError::New(env, "Function expects a string path argument").ThrowAsJavaScriptException(); - return; - } - +void CompiledIndex::Save(Napi::CallbackInfo const& ctx) { try { std::string path = ctx[0].As(); auto result = native_->save(path.c_str()); if (!result) - return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException(); + Napi::TypeError::New(ctx.Env(), result.error.release()).ThrowAsJavaScriptException(); } catch (...) { - Napi::TypeError::New(env, "Serialization failed").ThrowAsJavaScriptException(); + Napi::TypeError::New(ctx.Env(), "Serialization failed").ThrowAsJavaScriptException(); } } -void Index::Load(Napi::CallbackInfo const& ctx) { - Napi::Env env = ctx.Env(); - - int length = ctx.Length(); - if (length == 0 || !ctx[0].IsString()) { - Napi::TypeError::New(env, "Function expects a string path argument").ThrowAsJavaScriptException(); - return; - } - +void CompiledIndex::Load(Napi::CallbackInfo const& ctx) { try { std::string path = ctx[0].As(); auto result = native_->load(path.c_str()); if (!result) - return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException(); + Napi::TypeError::New(ctx.Env(), result.error.release()).ThrowAsJavaScriptException(); } catch (...) { - Napi::TypeError::New(env, "Loading failed").ThrowAsJavaScriptException(); + Napi::TypeError::New(ctx.Env(), "Loading failed").ThrowAsJavaScriptException(); } } -void Index::View(Napi::CallbackInfo const& ctx) { - Napi::Env env = ctx.Env(); - - int length = ctx.Length(); - if (length == 0 || !ctx[0].IsString()) { - Napi::TypeError::New(env, "Function expects a string path argument").ThrowAsJavaScriptException(); - return; - } +void CompiledIndex::View(Napi::CallbackInfo const& ctx) { try { std::string path = ctx[0].As(); auto result = native_->view(path.c_str()); if (!result) - return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException(); + Napi::TypeError::New(ctx.Env(), result.error.release()).ThrowAsJavaScriptException(); } catch (...) { - Napi::TypeError::New(env, "Memory-mapping failed").ThrowAsJavaScriptException(); + Napi::TypeError::New(ctx.Env(), "Memory-mapping failed").ThrowAsJavaScriptException(); } } -void Index::Add(Napi::CallbackInfo const& ctx) { - Napi::Env env = ctx.Env(); - - if (ctx.Length() < 2) - return Napi::TypeError::New(env, "Expects at least two arguments").ThrowAsJavaScriptException(); +void CompiledIndex::Add(Napi::CallbackInfo const& ctx) { + // Extract keys and vectors from arguments + Napi::BigUint64Array keys = ctx[0].As(); + std::size_t tasks = keys.ElementLength(); - using key_t = typename index_dense_t::key_t; - std::size_t index_dimensions = native_->dimensions(); + // Ensure there is enough capacity + if (native_->size() + tasks >= native_->capacity()) + native_->reserve(ceil2(native_->size() + tasks)); - auto add = [&](Napi::BigInt key_js, Napi::Float32Array vector_js) { - bool lossless = true; - key_t key = static_cast(key_js.Uint64Value(&lossless)); - if (!lossless) - return Napi::TypeError::New(env, "Keys must be unsigned integers").ThrowAsJavaScriptException(); - - float const* vector = vector_js.Data(); - std::size_t dimensions = static_cast(vector_js.ElementLength()); - - if (dimensions != index_dimensions) - return Napi::TypeError::New(env, "Wrong number of dimensions").ThrowAsJavaScriptException(); - - try { - auto result = native_->add(key, vector); - if (!result) - return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException(); - - } catch (std::bad_alloc const&) { - return Napi::TypeError::New(env, "Out of memory").ThrowAsJavaScriptException(); - } catch (...) { - return Napi::TypeError::New(env, "Insertion failed").ThrowAsJavaScriptException(); - } + // Create an instance of the executor with the default number of threads + auto run_parallel = [&](auto vectors) { + executor_stl_t executor; + executor.fixed(tasks, [&](std::size_t /*thread_idx*/, std::size_t task_idx) { + native_->add(static_cast(keys[task_idx]), vectors + task_idx * native_->dimensions()); + }); }; - if (ctx[0].IsArray() && ctx[1].IsArray()) { - Napi::Array keys_js = ctx[0].As(); - Napi::Array vectors_js = ctx[1].As(); - auto length = keys_js.Length(); - - if (length != vectors_js.Length()) - return Napi::TypeError::New(env, "The number of keys must match the number of vectors") - .ThrowAsJavaScriptException(); - - if (native_->size() + length >= native_->capacity()) - if (!native_->reserve(ceil2(native_->size() + length))) - return Napi::TypeError::New(env, "Out of memory!").ThrowAsJavaScriptException(); - - for (std::size_t i = 0; i < length; i++) { - Napi::Value key_js = keys_js[i]; - Napi::Value vector_js = vectors_js[i]; - add(key_js.As(), vector_js.As()); - } - - } else if (ctx[0].IsBigInt() && ctx[1].IsTypedArray()) { - if (native_->size() + 1 >= native_->capacity()) - native_->reserve(ceil2(native_->size() + 1)); - add(ctx[0].As(), ctx[1].As()); - } else - return Napi::TypeError::New(env, "Invalid argument type, expects integral key(s) and float vector(s)") + Napi::TypedArray vectors = ctx[1].As(); + if (vectors.TypedArrayType() == napi_float32_array) { + run_parallel(vectors.As().Data()); + } else if (vectors.TypedArrayType() == napi_float64_array) { + run_parallel(vectors.As().Data()); + } else if (vectors.TypedArrayType() == napi_int8_array) { + run_parallel(vectors.As().Data()); + } else { + Napi::TypeError::New(ctx.Env(), + "Unsupported TypedArray. Supported types are Float32Array, Float64Array, and Int8Array.") .ThrowAsJavaScriptException(); + } } -Napi::Value Index::Search(Napi::CallbackInfo const& ctx) { +Napi::Value CompiledIndex::Search(Napi::CallbackInfo const& ctx) { Napi::Env env = ctx.Env(); - if (ctx.Length() < 2 || !ctx[0].IsTypedArray() || !ctx[1].IsBigInt()) { - Napi::TypeError::New(env, "Expects a and the number of wanted results").ThrowAsJavaScriptException(); - return {}; - } - - Napi::Float32Array vector_js = ctx[0].As(); - Napi::BigInt wanted_js = ctx[1].As(); - - float const* vector = vector_js.Data(); - std::size_t dimensions = static_cast(vector_js.ElementLength()); - if (dimensions != native_->dimensions()) { - Napi::TypeError::New(env, "Wrong number of dimensions").ThrowAsJavaScriptException(); - return {}; - } + Napi::TypedArray queries = ctx[0].As(); + std::size_t tasks = queries.ElementLength() / native_->dimensions(); + std::size_t wanted = napi_argument_to_size(ctx[1]); - bool lossless = true; - std::uint64_t wanted = wanted_js.Uint64Value(&lossless); - if (!lossless) { - Napi::TypeError::New(env, "Wanted number of matches must be an unsigned integer").ThrowAsJavaScriptException(); - return {}; - } + auto run_parallel = [&](auto vectors) { + Napi::Array result_js = Napi::Array::New(env, 3); + Napi::BigUint64Array matches_js = Napi::BigUint64Array::New(env, tasks * wanted); + Napi::Float32Array distances_js = Napi::Float32Array::New(env, tasks * wanted); + Napi::BigUint64Array counts_js = Napi::BigUint64Array::New(env, tasks); - using key_t = typename index_dense_t::key_t; - Napi::TypedArrayOf matches_js = Napi::TypedArrayOf::New(env, wanted); - static_assert(std::is_same::value, "Matches.key interface expects BigUint64Array"); - Napi::Float32Array distances_js = Napi::Float32Array::New(env, wanted); - try { + auto matches_data = matches_js.Data(); + auto distances_data = distances_js.Data(); + auto counts_data = counts_js.Data(); - auto result = native_->search(vector, wanted); - if (!result) { - Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException(); - return {}; + try { + executor_stl_t executor; + executor.fixed(tasks, [&](std::size_t /*thread_idx*/, std::size_t task_idx) { + auto result = native_->search(vectors + task_idx * native_->dimensions(), wanted); + if (!result) { + // Handle the error appropriately + // For example, log the error or set some flag in the result_js object + } else { + counts_data[task_idx] = result.dump_to(matches_data + task_idx * native_->dimensions(), + distances_data + task_idx * native_->dimensions()); + } + }); + } catch (std::bad_alloc const&) { + Napi::TypeError::New(env, "Out of memory").ThrowAsJavaScriptException(); + } catch (...) { + Napi::TypeError::New(env, "Search failed").ThrowAsJavaScriptException(); } - std::uint64_t count = result.dump_to(matches_js.Data(), distances_js.Data()); - Napi::Object result_js = Napi::Object::New(env); - result_js.Set("keys", matches_js); - result_js.Set("distances", distances_js); - result_js.Set("count", Napi::BigInt::New(env, count)); + result_js.Set(0u, matches_js); + result_js.Set(1u, distances_js); + result_js.Set(2u, counts_js); return result_js; - } catch (std::bad_alloc const&) { - Napi::TypeError::New(env, "Out of memory").ThrowAsJavaScriptException(); - return {}; - } catch (...) { - Napi::TypeError::New(env, "Search failed").ThrowAsJavaScriptException(); - return {}; + }; + + if (queries.TypedArrayType() == napi_float32_array) { + return run_parallel(queries.As().Data()); + } else if (queries.TypedArrayType() == napi_float64_array) { + return run_parallel(queries.As().Data()); + } else if (queries.TypedArrayType() == napi_int8_array) { + return run_parallel(queries.As().Data()); + } else { + Napi::TypeError::New(env, + "Unsupported TypedArray. Supported types are Float32Array, Float64Array, and Int8Array.") + .ThrowAsJavaScriptException(); + return env.Null(); } } -Napi::Value Index::Remove(Napi::CallbackInfo const& ctx) { +Napi::Value CompiledIndex::Remove(Napi::CallbackInfo const& ctx) { Napi::Env env = ctx.Env(); - if (ctx.Length() < 1 || !ctx[0].IsBigInt()) { - Napi::TypeError::New(env, "Expects an entry identifier").ThrowAsJavaScriptException(); - return {}; - } - - Napi::BigInt key_js = ctx[0].As(); - bool lossless = true; - std::uint64_t key = key_js.Uint64Value(&lossless); - if (!lossless) { - Napi::TypeError::New(env, "Identifier must be an unsigned integer").ThrowAsJavaScriptException(); - return {}; - } - - try { - auto result = native_->remove(key); - if (!result) { - Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException(); - return {}; - } - return Napi::Boolean::New(env, result.completed); - } catch (std::bad_alloc const&) { - Napi::TypeError::New(env, "Out of memory").ThrowAsJavaScriptException(); - return {}; - } catch (...) { - Napi::TypeError::New(env, "Search failed").ThrowAsJavaScriptException(); - return {}; + Napi::BigUint64Array keys = ctx[0].As(); + std::size_t length = keys.ElementLength(); + Napi::Array result = Napi::Array::New(env, length); + for (std::size_t i = 0; i < length; ++i) { + result[i] = Napi::Number::New(env, native_->remove(static_cast(keys[i])).completed); } + return result; } -Napi::Value Index::Contains(Napi::CallbackInfo const& ctx) { +Napi::Value CompiledIndex::Contains(Napi::CallbackInfo const& ctx) { Napi::Env env = ctx.Env(); - if (ctx.Length() < 1 || !ctx[0].IsBigInt()) { - Napi::TypeError::New(env, "Expects an entry identifier").ThrowAsJavaScriptException(); - return {}; - } - - Napi::BigInt key_js = ctx[0].As(); - bool lossless = true; - std::uint64_t key = key_js.Uint64Value(&lossless); - if (!lossless) { - Napi::TypeError::New(env, "Identifier must be an unsigned integer").ThrowAsJavaScriptException(); - return {}; - } - - try { - bool result = native_->contains(key); - return Napi::Boolean::New(env, result); - } catch (std::bad_alloc const&) { - Napi::TypeError::New(env, "Out of memory").ThrowAsJavaScriptException(); - return {}; - } catch (...) { - Napi::TypeError::New(env, "Search failed").ThrowAsJavaScriptException(); - return {}; - } + Napi::BigUint64Array keys = ctx[0].As(); + std::size_t length = keys.ElementLength(); + Napi::Array result = Napi::Array::New(env, length); + for (std::size_t i = 0; i < length; ++i) + result[i] = Napi::Boolean::New(env, native_->contains(static_cast(keys[i]))); + return result; } -Napi::Value ExactSearch(Napi::CallbackInfo const& info) { - Napi::Env env = info.Env(); - - // Validate the number of arguments and their types. - if (info.Length() < 5) { - Napi::TypeError::New(env, "Wrong number of arguments").ThrowAsJavaScriptException(); - return env.Null(); - } - - // Dataset - if (!info[0].IsTypedArray()) { - Napi::TypeError::New(env, "Dataset should be a TypedArray").ThrowAsJavaScriptException(); - return env.Null(); - } - Napi::TypedArray dataset = info[0].As(); - - // Queries - if (!info[1].IsTypedArray()) { - Napi::TypeError::New(env, "Queries should be a TypedArray").ThrowAsJavaScriptException(); - return env.Null(); - } - Napi::TypedArray queries = info[1].As(); - - // Dimensions - if (!info[2].IsBigInt()) { - Napi::TypeError::New(env, "Dimensions should be a BigInt").ThrowAsJavaScriptException(); - return env.Null(); - } - std::uint64_t dimensions = info[2].As().Uint64Value(); +Napi::Value CompiledIndex::Count(Napi::CallbackInfo const& ctx) { + Napi::Env env = ctx.Env(); + Napi::BigUint64Array keys = ctx[0].As(); + std::size_t length = keys.ElementLength(); + Napi::Array result = Napi::Array::New(env, length); + for (std::size_t i = 0; i < length; ++i) + result[i] = Napi::Boolean::New(env, native_->count(static_cast(keys[i]))); + return result; +} - // Count - if (!info[3].IsBigInt()) { - Napi::TypeError::New(env, "Count should be a BigInt").ThrowAsJavaScriptException(); - return env.Null(); - } - std::uint64_t count = info[3].As().Uint64Value(); +Napi::Value compiledExactSearch(Napi::CallbackInfo const& ctx) { + Napi::Env env = ctx.Env(); - // Metric - if (!info[4].IsString()) { - Napi::TypeError::New(env, "Metric should be a string").ThrowAsJavaScriptException(); - return env.Null(); + // Extracting parameters directly without additional type checks. + Napi::TypedArray dataset = ctx[0].As(); + Napi::ArrayBuffer datasetBuffer = dataset.ArrayBuffer(); + Napi::TypedArray queries = ctx[1].As(); + Napi::ArrayBuffer queriesBuffer = queries.ArrayBuffer(); + std::uint64_t dimensions = napi_argument_to_size(ctx[2]); + std::uint64_t wanted = napi_argument_to_size(ctx[3]); + metric_kind_t metric_kind = metric_from_name(ctx[4].As().Utf8Value().c_str()); + + scalar_kind_t quantization; + std::size_t bytes_per_scalar; + switch (queries.TypedArrayType()) { + case napi_float64_array: quantization = scalar_kind_t::f64_k, bytes_per_scalar = 8; break; + case napi_int8_array: quantization = scalar_kind_t::i8_k, bytes_per_scalar = 1; break; + default: quantization = scalar_kind_t::f32_k, bytes_per_scalar = 4; break; } - std::string metric_str = info[4].As().Utf8Value(); - metric_punned_t metric(dimensions, metric_kind_t::cos_k); // Adjust as needed based on the metric_str - // Perform exact search + metric_punned_t metric(dimensions, metric_kind, quantization); exact_search_t search; - auto results = search( // - (byte_t const*)dataset.Data(), dataset.ElementLength() / dimensions, dimensions * sizeof(float), // - (byte_t const*)queries.Data(), queries.ElementLength() / dimensions, dimensions * sizeof(float), // - count, metric); - // Construct the result object - Napi::Object result = Napi::Object::New(env); + // Performing the exact search. + std::size_t dataset_size = dataset.ElementLength() / dimensions; + std::size_t queries_size = queries.ElementLength() / dimensions; + auto results = search( // + reinterpret_cast(datasetBuffer.Data()), // + dataset_size, // + dimensions * bytes_per_scalar, // + reinterpret_cast(queriesBuffer.Data()), // + queries_size, // + dimensions * bytes_per_scalar, // + wanted, metric); + + if (!results) + Napi::TypeError::New(env, "Out of memory").ThrowAsJavaScriptException(); - result.Set("keys", /* TODO: populate keys */); - result.Set("distances", /* TODO: populate distances */); - result.Set("count", Napi::BigInt::New(env, results.size())); + // Constructing the result object + Napi::Array result_js = Napi::Array::New(env, 3); + Napi::BigUint64Array matches_js = Napi::BigUint64Array::New(env, queries_size * wanted); + Napi::Float32Array distances_js = Napi::Float32Array::New(env, queries_size * wanted); + Napi::BigUint64Array counts_js = Napi::BigUint64Array::New(env, queries_size); + + auto matches_data = matches_js.Data(); + auto distances_data = distances_js.Data(); + auto counts_data = counts_js.Data(); + + // Export into JS buffers + for (std::size_t task_idx = 0; task_idx != queries_size; ++task_idx) { + auto result = results.at(task_idx); + counts_data[task_idx] = wanted; + for (std::size_t result_idx = 0; result_idx != wanted; ++result_idx) { + matches_data[task_idx * wanted + result_idx] = result[result_idx].offset; + distances_data[task_idx * wanted + result_idx] = result[result_idx].distance; + } + } - return result; + result_js.Set(0u, matches_js); + result_js.Set(1u, distances_js); + result_js.Set(2u, counts_js); + return result_js; } Napi::Object InitAll(Napi::Env env, Napi::Object exports) { - exports.Set("exactSearch", Napi::Function::New(env, ExactSearch)); - return Index::Init(env, exports); + exports.Set("compiledExactSearch", Napi::Function::New(env, compiledExactSearch)); + return CompiledIndex::Init(env, exports); } NODE_API_MODULE(usearch, InitAll) diff --git a/javascript/usearch.d.ts b/javascript/usearch.d.ts deleted file mode 100644 index 0049922e..00000000 --- a/javascript/usearch.d.ts +++ /dev/null @@ -1,193 +0,0 @@ - -export enum MetricKind { - Unknown = 'unknown', - Cos = 'cos', - IP = 'ip', - L2sq = 'l2sq', - Haversine = 'haversine', - Pearson = 'pearson', - Jaccard = 'jaccard', - Hamming = 'hamming', - Tanimoto = 'tanimoto', - Sorensen = 'sorensen' -} - -export enum ScalarKind { - Unknown = 'unknown', - F32 = 'f32', - F64 = 'f64', - F16 = 'f16', - I8 = 'i8', - B1 = 'b1' -} - -export type IntOrAlike = number | bigint; -export type Keys = BigUint64Array; -export type Distances = Float32Array; - -export type KeyOrKeys = bigint | bigint[] | BigUint64Array; -export type IndicatorOrIndicators = boolean | boolean[]; -export type CountOrCounts = bigint | BigUint64Array; -export type VectorOrVectors = Float32Array | Float64Array | Int8Array; - -/** Represents a set of search results */ -export interface Matches { - /** Keys of the nearest neighbors found (size: n*k). */ - keys: Keys; - /** Distances of the nearest neighbors found (size: n*k). */ - distances: Distances; -} - -/** Represents a set of batched search results */ -export class BatchMatches { - /** Keys of the nearest neighbors found (size: n*k). */ - keys: Keys; - /** Distances of the nearest neighbors found (size: n*k). */ - distances: Distances; - /** Counts of the nearest neighbors found (size: n*k). */ - counts: BigUint64Array; - /** Limit for search results per query. */ - k: bigint; - - /** Retrieve Matches object at the specified index in the batch. */ - get(i: IntOrAlike): Matches; -} - -/** K-Approximate Nearest Neighbors search index. */ -export class Index { - - /** - * Constructs a new index. - * - * @param {IntOrAlike} dimensions - * @param {MetricKind} metric - * @param {ScalarKind} quantization - * @param {IntOrAlike} capacity - * @param {IntOrAlike} connectivity - * @param {IntOrAlike} expansion_add - * @param {IntOrAlike} expansion_search - * @param {boolean} multi - */ - constructor( - dimensions: IntOrAlike, - metric: MetricKind, - quantization: ScalarKind, - capacity: IntOrAlike, - connectivity: IntOrAlike, - expansion_add: IntOrAlike, - expansion_search: IntOrAlike, - multi: boolean, - ); - - /** - * Returns the dimensionality of vectors. - * @return {bigint} The dimensionality of vectors. - */ - dimensions(): bigint; - - /** - * Returns the bigint of vectors currently indexed. - * @return {bigint} The number of vectors currently indexed. - */ - size(): bigint; - - /** - * Returns index capacity. - * @return {bigints} The capacity of index. - */ - capacity(): bigint; - - /** - * Returns connectivity. - * @return {bigint} The connectivity of index. - */ - connectivity(): bigint; - - /** - * Write index to a file. - * @param {string} path File path to write. - */ - save(path: string): void; - - /** - * Load index from a file. - * @param {string} path File path to read. - */ - load(path: string): void; - - /** - * View index from a file, without loading into RAM. - * @param {string} path File path to read. - */ - view(path: string): void; - - /** - * Add n vectors of dimension d to the index. - * - * @param {KeyOrKeys} keys Input identifiers for every vector. - * @param {VectorOrVectors} vectors Input matrix, matrix of size n * d. - */ - add(keys: KeyOrKeys, vectors: VectorOrVectors): void; - - /** - * Query n vectors of dimension d to the index. Return at most k vectors for each. - * If there are not enough results for a query, the result array is padded with -1s. - * - * @param {VectorOrVectors} vectors Input vectors to search, matrix of size n * d. - * @param {IntOrAlike} k The number of nearest neighbors to search for. - * @return {Matches | BatchMatches} Search results for one or more queries. - */ - search(vectors: VectorOrVectors, k: IntOrAlike): Matches | BatchMatches; - - /** - * Check if one or more entries are contained in the index. - * @param {KeyOrKeys} keys - Identifier(s) to look up. - * @return {IndicatorOrIndicators} - Returns true if the key is contained in the index, false otherwise when a single key is provided. - * Returns an array of booleans corresponding to the presence of each key in the index when an array of keys is provided. - */ - contains(keys: KeyOrKeys): IndicatorOrIndicators; - - /** - * Check if one or more entries are contained in the index. - * @param {KeyOrKeys} keys - Identifier(s) to look up. - * @return {CountOrCounts} - Number of vectors found per query. - */ - contains(keys: KeyOrKeys): CountOrCounts; - - - /** - * Remove a vector from the index. - * - * @param {KeyOrKeys} keys Identifier(s) for every vector to be removed. - * @return {CountOrCounts} - Number of vectors deleted per query. - */ - remove(keys: KeyOrKeys): CountOrCounts; - -} - -/** - * Performs an exact search on the given dataset to find the best matching vectors for each query. - * - * @param {VectorOrVectors} dataset - The dataset containing vectors to be searched. It should be a flat array representing a matrix of size `n * dimensions`, where `n` is the number of vectors, and `dimensions` is the number of elements in each vector. - * @param {VectorOrVectors} queries - The queries containing vectors to search for in the dataset. It should be a flat array representing a matrix of size `m * dimensions`, where `m` is the number of query vectors, and `dimensions` is the number of elements in each vector. - * @param {IntOrAlike} dimensions - The dimensionality of the vectors in both the dataset and the queries. It defines the number of elements in each vector. - * @param {IntOrAlike} count - The number of nearest neighbors to return for each query. If the dataset contains fewer vectors than the specified count, the result will contain only the available vectors. - * @param {MetricKind} metric - The distance metric to be used for the search. It should be one of the supported metric strings, for example, "euclidean" for Euclidean distance, "cosine" for Cosine distance, etc. - * @return {Matches} - Returns a `Matches` object containing the results of the search. The `keys` field contains the indices of the matching vectors in the dataset, the `distances` field contains the distances between the query and the matching vectors, and the `count` field contains the actual number of matches found for each query. - * - * @example - * const dataset = new VectorOrVectors([1.0, 2.0, 3.0, 4.0]); // Two vectors: [1.0, 2.0] and [3.0, 4.0] - * const queries = new VectorOrVectors([1.5, 2.5]); // One vector: [1.5, 2.5] - * const dimensions = BigInt(2); - * const count = BigInt(1); - * const metric = "euclidean"; - * - * const result = exactSearch(dataset, queries, dimensions, count, metric); - * // result might be: - * // { - * // keys: BigUint64Array [ 1n ], - * // distances: VectorOrVectors [ some_value ], - * // count: 1n - * // } - */ -export function exactSearch(dataset: VectorOrVectors, queries: VectorOrVectors, dimensions: IntOrAlike, count: IntOrAlike, metric: MetricKind): Matches | BatchMatches; diff --git a/javascript/usearch.js b/javascript/usearch.js index 3d4a85ff..066a2356 100644 --- a/javascript/usearch.js +++ b/javascript/usearch.js @@ -1,2 +1,440 @@ -const usearch = require('bindings')('usearch'); -module.exports = usearch; \ No newline at end of file +const compiled = require('bindings')('usearch'); + +/** + * Enumeration representing the various metric kinds used to measure the distance between vectors in the index. + * @enum {string} + * @readonly + */ +const MetricKind = { + Unknown: 'unknown', + Cos: 'cos', + IP: 'ip', + L2sq: 'l2sq', + Haversine: 'haversine', + Pearson: 'pearson', + Jaccard: 'jaccard', + Hamming: 'hamming', + Tanimoto: 'tanimoto', + Sorensen: 'sorensen' +}; + +/** + * Enumeration representing the various scalar kinds used to define the type of scalar values in vectors. + * @enum {string} + * @readonly + */ +const ScalarKind = { + Unknown: 'unknown', + F32: 'f32', + F64: 'f64', + F16: 'f16', + I8: 'i8', + B1: 'b1' +}; + +/** + * Represents a set of search results. + */ +class Matches { + /** + * Constructs a Matches object. + * + * @param {BigUint64Array} keys - The keys of the nearest neighbors found. + * @param {Float32Array} distances - The distances of the nearest neighbors found. + */ + constructor(keys, distances) { + this.keys = keys; + this.distances = distances; + } +} + +/** + * Represents a set of batched search results. + */ +class BatchMatches { + /** + * Constructs a BatchMatches object. + * + * @param {BigUint64Array} keys - The keys of the nearest neighbors found in the batch. + * @param {Float32Array} distances - The distances of the nearest neighbors found in the batch. + * @param {BigUint64Array} counts - The number of neighbors found for each query in the batch. + * @param {bigint} k - The limit for search results per query in the batch. + */ + constructor(keys, distances, counts, k) { + this.keys = keys; + this.distances = distances; + this.counts = counts; + this.k = k; + } + + /** + * Retrieves a Matches object at the specified index in the batch. + * + * @param {number} i - The index at which to retrieve the Matches object. + * @returns {Matches} - A Matches object representing the search results at the specified index in the batch. + */ + get(i) { + const index = Number(i) * Number(this.k); + const count = Number(this.counts[i]); + const keysSlice = this.keys.slice(index, index + count); + const distancesSlice = this.distances.slice(index, index + count); + return new Matches(keysSlice, distancesSlice); + } +} + +function isOneKey(keys) { + return typeof keys === 'number' || typeof keys === 'bigint'; +} + +function normalizeKeys(keys) { + if (isOneKey(keys)) { + keys = BigUint64Array.of(BigInt(keys)); + } else if (Array.isArray(keys)) { + keys = keys.map(key => { + if (typeof key !== 'bigint' && typeof key !== 'number') + throw new Error("All keys must be integers or bigints."); + return BigInt(key); + }); + keys = BigUint64Array.from(keys); + } else if (!(keys instanceof BigUint64Array)) { + throw new Error("Keys must be a number, bigint, an array of numbers or bigints, or a BigUint64Array."); + } + return keys; +} + +function isVector(vectors) { + return vectors instanceof Float32Array || vectors instanceof Float64Array || vectors instanceof Int8Array; +} + +function normalizeVectors(vectors, dimensions, targetType = Float32Array) { + let flattenedVectors; + if (isVector(vectors)) { + flattenedVectors = (vectors.constructor === targetType) ? vectors : new targetType(vectors); + } else if (Array.isArray(vectors)) { + let totalLength = 0; + for (const vec of vectors) totalLength += vec.length; + + flattenedVectors = new targetType(totalLength); + let offset = 0; + for (const vec of vectors) { + flattenedVectors.set(vec, offset); + offset += vec.length; + } + } else { + throw new Error("Vectors must be a TypedArray or an array of arrays."); + } + + if (flattenedVectors.length % dimensions !== 0) + throw new Error("The size of the flattened vectors must be a multiple of the dimension of the vectors."); + + return flattenedVectors; +} + + +class Index { + + /** + * Constructs a new index. + * + * @param {number} dimensionsOrConfigs + * @param {MetricKind} [metric=MetricKind.Cos] - Optional, default is 'cos'. + * @param {ScalarKind} [quantization=ScalarKind.F32] - Optional, default is 'f32'. + * @param {number} [connectivity=0] - Optional, default is 0. + * @param {number} [expansion_add=0] - Optional, default is 0. + * @param {number} [expansion_search=0] - Optional, default is 0. + * @param {boolean} [multi=false] - Optional, default is false. + * @throws Will throw an error if any of the parameters are of incorrect type or invalid value. + */ + constructor(dimensionsOrConfigs, metric = MetricKind.Cos, quantization = ScalarKind.F32, connectivity = 0, expansion_add = 0, expansion_search = 0, multi = false) { + let dimensions; + if (typeof dimensionsOrConfigs === 'object' && dimensionsOrConfigs !== null) { + // Parameters are provided as an object + ({ dimensions, metric = MetricKind.Cos, quantization = ScalarKind.F32, connectivity = 0, expansion_add = 0, expansion_search = 0, multi = false } = dimensionsOrConfigs); + } else if (typeof dimensionsOrConfigs === 'number' || typeof dimensionsOrConfigs === 'bigint') { + // Parameters are provided as individual arguments + dimensions = dimensionsOrConfigs; + } else { + throw new Error("Invalid arguments. Expected either individual arguments or a single object argument."); + } + + if (!Number.isInteger(dimensions) || !Number.isInteger(connectivity) || !Number.isInteger(expansion_add) || !Number.isInteger(expansion_search) || dimensions <= 0 || connectivity < 0 || expansion_add < 0 || expansion_search < 0) { + throw new Error("`dimensions`, `connectivity`, `expansion_add`, and `expansion_search` must be non-negative integers, with `dimensions` being positive."); + } + + if (typeof multi !== 'boolean') { + throw new Error("`multi` must be a boolean value."); + } + + if (!Object.values(MetricKind).includes(metric)) { + throw new Error(`Invalid metric: ${metric}. It must be one of: ${Object.values(MetricKind).join(', ')}`); + } + + if (!Object.values(ScalarKind).includes(quantization)) { + throw new Error(`Invalid quantization: ${quantization}. It must be one of: ${Object.values(ScalarKind).join(', ')}`); + } + + this._compiledIndex = new compiled.CompiledIndex(dimensions, metric, quantization, connectivity, expansion_add, expansion_search, multi); + } + + /** + * Add vectors to the index. + * + * This method accepts vectors and their corresponding keys for indexing. + * Each key should correspond to a vector. If a single key is provided, + * it is broadcasted to match the number of provided vectors. + * + * Vectors should be provided as a flat typed array representing a matrix + * where each row is a vector to be indexed. The matrix should have a size + * of n * d, where n is the number of vectors, and d is the dimensionality + * of the vectors. + * + * Keys should be provided as a BigInt or an array-like object of BigInts + * representing the unique identifier for each vector. + * + * @param {bigint|bigint[]|BigUint64Array} keys - Input identifiers for every vector. + * If a single key is provided, it is associated with all provided vectors. + * @param {Float32Array|Float64Array|Int8Array} vectors - Input matrix representing vectors, + * matrix of size n * d, where n is the number of vectors, and d is their dimensionality. + * @throws Will throw an error if the length of keys doesn't match the number of vectors + * or if it's not a single key. + */ + add(keys, vectors) { + let normalizedKeys = normalizeKeys(keys); + let normalizedVectors = normalizeVectors(vectors, this._compiledIndex.dimensions()); + let countVectors = normalizedVectors.length / this._compiledIndex.dimensions(); + + // If a single key is provided but there are multiple vectors, + // broadcast the single key value to match the number of vectors + if (normalizedKeys.length === 1 && countVectors > 1) { + normalizedKeys = BigUint64Array.from({ length: countVectors }, () => normalizedKeys[0]); + } else if (normalizedKeys.length !== countVectors) { + throw new Error(`The length of keys (${normalizedKeys.length}) must match the number of vectors (${countVectors}) or be a single key.`); + } + + // Call the compiled method + this._compiledIndex.add(normalizedKeys, normalizedVectors); + } + + /** + * Perform a k-nearest neighbor search on the index. + * + * This method accepts a matrix of query vectors and returns the closest vectors + * from the index for each query. The method returns an object containing the keys, + * distances, and counts of the matches found. + * + * Vectors should be provided as a flat typed array representing a matrix where + * each row is a vector. The matrix should be of size n * d, where n is the + * number of query vectors, and d is their dimensionality. + * + * The parameter `k` specifies the number of nearest neighbors to return for each + * query vector. If there are not enough results for a query, the result array is + * padded with -1s. + * + * @param {Float32Array|Float64Array|Int8Array|Array>} vectors - Input matrix representing query vectors, can be a TypedArray or an array of arrays. + * @param {number} k - The number of nearest neighbors to search for each query vector. + * @return {Matches|BatchMatches} - Search results for one or more queries, containing keys, distances, and counts of the matches found. + * @throws Will throw an error if `k` is not a positive integer or if the size of the vectors is not a multiple of dimensions. + * @throws Will throw an error if `vectors` is not a valid input type (TypedArray or an array of arrays) or if its flattened size is not a multiple of dimensions. + */ + search(vectors, k) { + if (typeof k !== 'number' || k <= 0) { + throw new Error("`k` must be a positive integer representing the number of nearest neighbors to search for."); + } + + const normalizedVectors = normalizeVectors(vectors, this._compiledIndex.dimensions()); + + // Call the compiled method and create Matches or BatchMatches object with the result + const result = this._compiledIndex.search(normalizedVectors, k); + const countInQueries = normalizedVectors.length / Number(this._compiledIndex.dimensions()); + if (countInQueries === 1) { + return new Matches(result[0], result[1]); + } else { + return new BatchMatches(result[0], result[1], result[2], k); + } + } + + /** + * Verifies the presence of one or more keys in the index. + * + * This method accepts one or multiple keys as input and returns a boolean or + * an array of booleans indicating whether each key is present in the index. + * + * @param {bigint|bigint[]|BigUint64Array} keys - The identifier(s) of the vector(s) to be checked for presence in the index. + * @return {boolean|boolean[]} - Returns true if a single key is contained in the index, false otherwise. Returns an array of booleans corresponding to the presence of each key in the index when multiple keys are provided. + * @throws Will throw an error if keys are not integers. + */ + contains(keys) { + let normalizedKeys = normalizeKeys(keys); + let normalizedResults = this._compiledIndex.contains(normalizedKeys); + if (isOneKey(keys)) + return normalizedResults[0]; + else + return normalizedResults; + } + + /** + * Counts the number of times keys shows up in the index. + * + * @param {bigint|bigint[]|BigUint64Array} keys - The identifier(s) of the vector(s) to be enumerated. + * @return {number|number[]} - Returns the number of vectors found when a single key is provided. Returns an array of big integers corresponding to the number of vectors found for each key when multiple keys are provided. + * @throws Will throw an error if keys are not integers. + */ + count(keys) { + let normalizedKeys = normalizeKeys(keys); + let normalizedResults = this._compiledIndex.count(normalizedKeys); + if (isOneKey(keys)) + return normalizedResults[0]; + else + return normalizedResults; + } + + /** + * Removes one or multiple vectors from the index. + * + * This method accepts one or multiple keys as input and removes the corresponding vectors from the index. + * It returns the number of vectors actually removed for each key provided. + * + * @param {bigint|bigint[]|BigUint64Array} keys - The identifier(s) of the vector(s) to be removed. + * @return {number|number[]} - Returns the number of vectors deleted when a single key is provided. Returns an array of big integers corresponding to the number of vectors deleted for each key when multiple keys are provided. + * @throws Will throw an error if keys are not integers. + */ + remove(keys) { + let normalizedKeys = normalizeKeys(keys); + normalizedResults = this._compiledIndex.remove(normalizedKeys); + if (isOneKey(keys)) + return normalizedResults[0]; + else + return normalizedResults; + } + + /** + * Returns the dimensionality of vectors. + * @return {number} The dimensionality of vectors. + */ + dimensions() { return this._compiledIndex.dimensions() } + + /** + * Returns connectivity. + * @return {number} The connectivity of index. + */ + connectivity() { return this._compiledIndex.connectivity() } + + /** + * Returns the number of vectors currently indexed. + * @return {number} The number of vectors currently indexed. + */ + size() { return this._compiledIndex.size() } + + /** + * Returns index capacity. + * @return {number} The capacity of index. + */ + capacity() { return this._compiledIndex.capacity() } + + /** + * Write index to a file. + * @param {string} path File path to write. + * @throws Will throw an error if `path` is not a string. + */ + save(path) { + if (typeof path !== 'string') throw new Error("`path` must be a string representing the file path to write."); + this._compiledIndex.save(path); + } + + /** + * Load index from a file. + * @param {string} path File path to read. + * @throws Will throw an error if `path` is not a string. + */ + load(path) { + if (typeof path !== 'string') throw new Error("`path` must be a string representing the file path to read."); + this._compiledIndex.load(path); + } + + /** + * View index from a file, without loading into RAM. + * @param {string} path File path to read. + * @throws Will throw an error if `path` is not a string. + */ + view(path) { + if (typeof path !== 'string') throw new Error("`path` must be a string representing the file path to read."); + this._compiledIndex.view(path); + } +} + +/** + * Performs an exact search on the given dataset to find the best matching vectors for each query. + * + * @param {Float32Array|Float64Array|Int8Array|Array>} dataset - The dataset containing vectors to be searched. It can be a TypedArray or an array of arrays. + * @param {Float32Array|Float64Array|Int8Array|Array>} queries - The queries containing vectors to search for in the dataset. It can be a TypedArray or an array of arrays. + * @param {number} dimensions - The dimensionality of the vectors in both the dataset and the queries. It defines the number of elements in each vector. + * @param {number} count - The number of nearest neighbors to return for each query. If the dataset contains fewer vectors than the specified count, the result will contain only the available vectors. + * @param {MetricKind} metric - The distance metric to be used for the search. + * @return {Matches|BatchMatches} - Returns a `Matches` or `BatchMatches` object containing the results of the search. + * @throws Will throw an error if `dimensions` and `count` are not positive integers. + * @throws Will throw an error if `metric` is not a valid MetricKind. + * @throws Will throw an error if `dataset` and `queries` are not valid input types (TypedArray or an array of arrays). + * @throws Will throw an error if the sizes of the flattened `dataset` and `queries` are not multiples of `dimensions`. + * @throws Will throw an error if `count` is greater than the number of vectors in the `dataset`. + * + * @example + * const dataset = [[1.0, 2.0], [3.0, 4.0]]; // Two vectors: [1.0, 2.0] and [3.0, 4.0] + * const queries = [[1.5, 2.5]]; // One vector: [1.5, 2.5] + * const dimensions = 2; // The number of elements in each vector. + * const count = 1; // The number of nearest neighbors to return for each query. + * const metric = MetricKind.IP; // Using the Inner Product distance metric. + * + * const result = exactSearch(dataset, queries, dimensions, count, metric); + * // result might be: + * // { + * // keys: BigUint64Array [ 1n ], + * // distances: Float32Array [ some_value ], + * // } + */ +function exactSearch(dataset, queries, dimensions, count, metric) { + + // Validate and normalize the dimensions and count + dimensions = Number(dimensions); + count = Number(count); + if (count <= 0 || dimensions <= 0) { + throw new Error("Dimensions and count must be positive integers."); + } + + // Validate metric + if (!Object.values(MetricKind).includes(metric)) { + throw new Error(`Invalid metric: ${metric}. It must be one of: ${Object.values(MetricKind).join(', ')}`); + } + + // Flatten and normalize dataset and queries if they are arrays of arrays + let targetType; + if (dataset instanceof Float64Array) targetType = Float64Array; + else if (dataset instanceof Int8Array) targetType = Int8Array; + else targetType = Float32Array; // default to Float32Array if dataset is not Float64Array or Int8Array + + dataset = normalizeVectors(dataset, dimensions, targetType); + queries = normalizeVectors(queries, dimensions, targetType); + const countInDataset = dataset.length / dimensions; + const countInQueries = queries.length / dimensions; + if (count > countInDataset) { + throw new Error("Count must be equal or smaller than the number of vectors in the dataset."); + } + + // Call the compiled function with the normalized input + const result = compiled.exactSearch(dataset, queries, dimensions, count, metric); + + // Create and return a Matches or BatchMatches object with the result + if (countInQueries == 1) { + return new Matches(result[0], result[1]); + } else { + return new BatchMatches(result[0], result[1], result[2], count); + } +} + +module.exports = { + Index, + MetricKind, + ScalarKind, + Matches, + BatchMatches, + exactSearch, +}; diff --git a/javascript/usearch.test.js b/javascript/usearch.test.js index aae25263..a75eadc9 100644 --- a/javascript/usearch.test.js +++ b/javascript/usearch.test.js @@ -1,45 +1,37 @@ const test = require('node:test'); const assert = require('node:assert'); -const usearch = require('bindings')('usearch'); +const usearch = require('./usearch.js'); test('Single-entry operations', () => { - const index = new usearch.Index({ - metric: 'l2sq', - dimensions: 2n, - connectivity: 16n, - }); - - assert.equal(index.connectivity(), 16n, 'connectivity should be 16'); - assert.equal(index.dimensions(), 2n, 'dimensions should be 2'); - assert.equal(index.size(), 0n, 'initial size should be 0'); - - index.add(15n, new Float32Array([ 10, 20 ])); - index.add(16n, new Float32Array([ 10, 25 ])); - - assert.equal(index.size(), 2n, 'size after adding elements should be 2'); - assert.equal(index.contains(15n), true, 'entry must be present after insertion'); - - const results = index.search(new Float32Array([ 13, 14 ]), 2n); - - assert.deepEqual(results.keys, new BigUint64Array([ 15n, 16n ]), 'keys should be 15 and 16'); - assert.deepEqual(results.distances, new Float32Array([ 45, 130 ]), 'distances should be 45 and 130'); + const index = new usearch.Index(2, 'l2sq'); + + assert.equal(index.connectivity(), 16, 'connectivity should be 16'); + assert.equal(index.dimensions(), 2, 'dimensions should be 2'); + assert.equal(index.size(), 0, 'initial size should be 0'); + + index.add(15n, new Float32Array([10, 20])); + index.add(16n, new Float32Array([10, 25])); + + assert.equal(index.size(), 2, 'size after adding elements should be 2'); + assert.equal(index.contains(15), true, 'entry must be present after insertion'); + + const results = index.search(new Float32Array([13, 14]), 2); + + assert.deepEqual(results.keys, new BigUint64Array([15n, 16n]), 'keys should be 15 and 16'); + assert.deepEqual(results.distances, new Float32Array([45, 130]), 'distances should be 45 and 130'); }); test('Batch operations', () => { - const indexBatch = new usearch.Index({ - metric: 'l2sq', - dimensions: 2n, - connectivity: 16n, - }); + const indexBatch = new usearch.Index(2, 'l2sq'); const keys = [15n, 16n]; - const vectors = [new Float32Array([ 10, 20 ]), new Float32Array([ 10, 25 ])]; + const vectors = [new Float32Array([10, 20]), new Float32Array([10, 25])]; indexBatch.add(keys, vectors); assert.equal(indexBatch.size(), 2, 'size after adding batch should be 2'); - - const results = indexBatch.search(new Float32Array([ 13, 14 ]), 2n); - assert.deepEqual(results.keys, new BigUint64Array([ 15n, 16n ]), 'keys should be 15 and 16'); - assert.deepEqual(results.distances, new Float32Array([ 45, 130 ]), 'distances should be 45 and 130'); + const results = indexBatch.search(new Float32Array([13, 14]), 2); + + assert.deepEqual(results.keys, new BigUint64Array([15n, 16n]), 'keys should be 15 and 16'); + assert.deepEqual(results.distances, new Float32Array([45, 130]), 'distances should be 45 and 130'); }); diff --git a/package.json b/package.json index a0ea889e..3e4e0321 100644 --- a/package.json +++ b/package.json @@ -14,10 +14,6 @@ "email": "info@unum.cloud" }, "main": "javascript/usearch.js", - "types": "javascript/usearch.d.ts", - "files": [ - "javascript/usearch.d.ts" - ], "gypfile": true, "engines": { "node": "~10 >=10.20 || >=12.17" diff --git a/tsconfig.json b/tsconfig.json deleted file mode 100644 index dc7df0d4..00000000 --- a/tsconfig.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "typedocOptions": { - "entryPoints": [ - "javascript/usearch.d.ts" - ], - "out": "docs" - } -} \ No newline at end of file From 229b80572e3b8e07a5b28a504430f1cb386a1a73 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sun, 1 Oct 2023 21:24:51 -0700 Subject: [PATCH 8/8] Make: Refresh compilers for pre-release tests --- .github/workflows/prerelease.yml | 57 ++++++++++++++++++++++---------- .vscode/settings.json | 1 + CMakeLists.txt | 19 +++++++---- binding.gyp | 4 +-- c/CMakeLists.txt | 4 +-- c/README.md | 6 ++-- cpp/CMakeLists.txt | 5 ++- docs/compilation.md | 14 ++++---- 8 files changed, 68 insertions(+), 42 deletions(-) diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 3aa9d911..4f43b800 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -17,14 +17,10 @@ permissions: jobs: - test_cpp_c_ubuntu: - name: Test C++ Ubuntu (${{ matrix.compiler }}) + test_cpp_c_ubuntu_gcc: + name: Test C++ Ubuntu (GCC) runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - compiler: [g++-9, g++-10, g++-11, clang-10, clang-11, clang-12] - + steps: - uses: actions/checkout@v3 with: @@ -32,20 +28,47 @@ jobs: - run: git submodule update --init --recursive - name: Install Dependencies + run: sudo apt update && sudo apt install -y cmake build-essential libjemalloc-dev libomp-dev g++-12 + + - name: Build run: | - sudo apt update && - sudo apt install -y cmake build-essential libjemalloc-dev ${{ matrix.compiler }} + export CC=gcc-12 + export CXX=g++-12 + cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=1 + cmake --build build --config Debug + + - name: Run C++ tests + run: ./build/test_cpp + - name: Run C tests + run: ./build/test_c + + + test_cpp_c_ubuntu_clang: + name: Test C++ Ubuntu (Clang) + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + ref: main-dev + - run: git submodule update --init --recursive + + - name: Install Dependencies + run: sudo apt update && sudo apt install -y cmake build-essential libjemalloc-dev clang-15 - name: Build run: | - export CXX=${{ matrix.compiler }} - cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST=1 -DUSEARCH_BUILD_CTEST=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=1 + export CC=clang-15 + export CXX=clang++-15 + cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=1 cmake --build build --config Debug + - name: Run C++ tests run: ./build/test_cpp - name: Run C tests run: ./build/test_c + test_cpp_c_macos: name: Test C++ MacOS runs-on: macos-latest @@ -59,13 +82,13 @@ jobs: run: | brew update brew install cmake - + - name: Build run: | - cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST=1 -DUSEARCH_BUILD_CTEST=1 + cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 cmake --build build --config Debug - name: Run C++ tests - run: ./build/test + run: ./build/test_cpp - name: Run C tests run: ./build/test_c @@ -83,12 +106,12 @@ jobs: - name: Build run: | - cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST=1 -DUSEARCH_BUILD_CTEST=1 + cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 cmake --build build --config Debug - name: Run C++ tests - run: ./build/test + run: .\build\test_cpp - name: Run C tests - run: ./build/test_c + run: .\build\test_c test_python_311: name: Test Python diff --git a/.vscode/settings.json b/.vscode/settings.json index c8f37b24..d3821cb8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -118,6 +118,7 @@ "downcasted", "Downcasting", "dtype", + "DUSEARCH", "emcmake", "equi", "equidimensional", diff --git a/CMakeLists.txt b/CMakeLists.txt index f27d7b72..f0182de7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,10 +21,10 @@ option(USEARCH_USE_SIMSIMD "Use SimSIMD hardware-accelerated metrics" OFF) option(USEARCH_USE_JEMALLOC "Use JeMalloc for faster memory allocations" OFF) option(USEARCH_USE_NATIVE_F16 "Use native half-precision types" OFF) -option(USEARCH_BUILD_TEST "Compile a native unit test in C++" ${USEARCH_IS_MAIN_PROJECT}) -option(USEARCH_BUILD_BENCHMARK "Compile a native benchmark in C++" ${USEARCH_IS_MAIN_PROJECT}) -option(USEARCH_BUILD_CLIB "Compile a native library for the C 99 interface" OFF) -option(USEARCH_BUILD_CTEST "Compile a test for the C 99 interface" OFF) +option(USEARCH_BUILD_TEST_CPP "Compile a native unit test in C++" ${USEARCH_IS_MAIN_PROJECT}) +option(USEARCH_BUILD_BENCH_CPP "Compile a native benchmark in C++" ${USEARCH_IS_MAIN_PROJECT}) +option(USEARCH_BUILD_LIB_C "Compile a native library for the C 99 interface" OFF) +option(USEARCH_BUILD_TEST_C "Compile a test for the C 99 interface" OFF) option(USEARCH_BUILD_WOLFRAM "Compile Wolfram Language bindings" OFF) # Includes @@ -220,7 +220,10 @@ function (setup_target TARGET_NAME) ) # Check if the compiler is AppleClang, and if not, add the leak sanitizer - if (NOT CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "AppleClang") + if (CMAKE_HOST_SYSTEM_NAME MATCHES "Darwin") + # It's likely AppleClang Adjust options as needed for AppleClang + else () + # It's likely LLVM Clang target_compile_options(${TARGET_NAME} PRIVATE $<$:-fsanitize=leak>) target_link_options(${TARGET_NAME} PRIVATE $<$:-fsanitize=leak>) endif () @@ -255,6 +258,8 @@ function (setup_target TARGET_NAME) endif () target_include_directories(${TARGET_NAME} PRIVATE ${USEARCH_HEADER_INCLUDES}) + set_target_properties(${TARGET_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}) + set_target_properties(${TARGET_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}) set_target_properties(${TARGET_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) set_target_properties(${TARGET_NAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) set_target_properties(${TARGET_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) @@ -272,7 +277,7 @@ function (setup_target TARGET_NAME) endfunction () -if (${USEARCH_BUILD_TEST} OR ${USEARCH_BUILD_BENCHMARK}) +if (${USEARCH_BUILD_TEST_CPP} OR ${USEARCH_BUILD_BENCH_CPP}) add_subdirectory(cpp) endif () @@ -284,6 +289,6 @@ if (${USEARCH_BUILD_WASM}) add_subdirectory(wasm) endif () -if (${USEARCH_BUILD_CLIB} OR ${USEARCH_BUILD_CTEST}) +if (${USEARCH_BUILD_LIB_C} OR ${USEARCH_BUILD_TEST_C}) add_subdirectory(c) endif () diff --git a/binding.gyp b/binding.gyp index 733a496d..5a6665c7 100644 --- a/binding.gyp +++ b/binding.gyp @@ -20,7 +20,7 @@ "-fexceptions", "-Wno-unknown-pragmas", "-Wno-maybe-uninitialized", - "-std=c++11", + "-std=c++17", ], "xcode_settings": { "GCC_ENABLE_CPP_EXCEPTIONS": "YES", @@ -30,7 +30,7 @@ "msvs_settings": { "VCCLCompilerTool": { "ExceptionHandling": 1, - "AdditionalOptions": ["-std:c++11"], + "AdditionalOptions": ["-std:c++17"], } }, } diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 79816a4a..709ddc51 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -1,4 +1,4 @@ -if (USEARCH_BUILD_CLIB) +if (USEARCH_BUILD_TEST_C) add_executable(test_c test.c lib.cpp) setup_target(test_c) include(CTest) @@ -8,7 +8,7 @@ endif () # This article discusses a better way to allow building either static or shared libraries: # https://alexreinking.com/blog/building-a-dual-shared-and-static-library-with-cmake.html -if (USEARCH_BUILD_CTEST) +if (USEARCH_BUILD_LIB_C) add_library(usearch_c SHARED lib.cpp) add_library(usearch_static_c STATIC lib.cpp) diff --git a/c/README.md b/c/README.md index 7e082b2d..34ef80ca 100644 --- a/c/README.md +++ b/c/README.md @@ -3,14 +3,12 @@ ## Installation The simplest form to integrate is to copy the contents of `usearch/c/` into your project. -To build the library `libusearch_static_c` and `libusearch_c`, pass enable the `USEARCH_BUILD_CLIB` CMake option: +To build the library `libusearch_static_c` and `libusearch_c`, pass enable the `USEARCH_BUILD_LIB_C` CMake option: ```bash -cmake -DUSEARCH_BUILD_CLIB=1 -DUSEARCH_BUILD_CTEST=1 -DUSEARCH_BUILD_STATIC=0 -DUSEARCH_BUILD_TEST=0 -DUSEARCH_BUILD_BENCHMARK=0 .. +cmake -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_BENCH_CPP=0 .. ``` -You can choose between a static and dynamic library by setting `USEARCH_BUILD_STATIC`. - ## Quickstart ```c diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0ed75807..27d78723 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -1,5 +1,4 @@ - -if (USEARCH_BUILD_TEST) +if (USEARCH_BUILD_TEST_CPP) add_executable(test_cpp test.cpp) setup_target(test_cpp) include(CTest) @@ -7,7 +6,7 @@ if (USEARCH_BUILD_TEST) add_test(NAME test_cpp COMMAND test_cpp) endif () -if (USEARCH_BUILD_BENCHMARK) +if (USEARCH_BUILD_BENCH_CPP) include(clipp) add_executable(bench_cpp bench.cpp) setup_target(bench_cpp) diff --git a/docs/compilation.md b/docs/compilation.md index ee0b4d1e..2b830f2f 100644 --- a/docs/compilation.md +++ b/docs/compilation.md @@ -19,10 +19,10 @@ cmake -B ./build_release \ -DUSEARCH_USE_OPENMP=1 \ -DUSEARCH_USE_SIMSIMD=1 \ -DUSEARCH_USE_JEMALLOC=1 \ - -DUSEARCH_BUILD_TEST=1 \ - -DUSEARCH_BUILD_BENCHMARK=1 \ - -DUSEARCH_BUILD_CTEST=1 \ - -DUSEARCH_BUILD_CLIB=1 \ + -DUSEARCH_BUILD_TEST_CPP=1 \ + -DUSEARCH_BUILD_BENCH_CPP=1 \ + -DUSEARCH_BUILD_LIB_C=1 \ + -DUSEARCH_BUILD_TEST_C=1 \ && \ make -C ./build_release -j ``` @@ -36,8 +36,8 @@ cmake -B ./build_release \ -DCMAKE_CXX_COMPILER="/opt/homebrew/opt/llvm/bin/clang++" \ -DUSEARCH_USE_OPENMP=1 \ -DUSEARCH_USE_SIMSIMD=1 \ - -DUSEARCH_BUILD_BENCHMARK=1 \ - -DUSEARCH_BUILD_TEST=1 \ + -DUSEARCH_BUILD_BENCH_CPP=1 \ + -DUSEARCH_BUILD_TEST_CPP=1 \ && \ make -C ./build_release -j ``` @@ -168,7 +168,7 @@ make USEARCH_USE_OPENMP=1 USEARCH_USE_SIMSIMD=1 -C ./c libusearch_c.so Using CMake: ```sh -cmake -B ./build_release -DUSEARCH_BUILD_CLIB=1 && make -C ./build_release -j +cmake -B ./build_release -DUSEARCH_BUILD_LIB_C=1 && make -C ./build_release -j ``` Testing on MacOS and Linux: