diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 1677b260..4f43b800 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -16,60 +16,102 @@ permissions: contents: read jobs: - - test_c: - name: Test C + + test_cpp_c_ubuntu_gcc: + name: Test C++ Ubuntu (GCC) runs-on: ubuntu-latest + steps: - uses: actions/checkout@v3 with: ref: main-dev - run: git submodule update --init --recursive - - name: Prepare Environment - run: | - sudo apt update && - sudo apt install -y cmake build-essential libjemalloc-dev + + - name: Install Dependencies + run: sudo apt update && sudo apt install -y cmake build-essential libjemalloc-dev libomp-dev g++-12 + - name: Build run: | - cmake -B ./build_release \ - -DCMAKE_BUILD_TYPE=Release \ - -DUSEARCH_USE_OPENMP=1 \ - -DUSEARCH_USE_SIMSIMD=1 \ - -DUSEARCH_USE_JEMALLOC=1 \ - -DUSEARCH_USE_NATIVE_F16=0 \ - -DUSEARCH_BUILD_CLIB=1 \ - -DUSEARCH_BUILD_CTEST=1 \ - -DUSEARCH_BUILD_BENCHMARK=0 && - make -C ./build_release -j - - name: Run tests - run: ./build_release/test_c + export CC=gcc-12 + export CXX=g++-12 + cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=1 + cmake --build build --config Debug + + - name: Run C++ tests + run: ./build/test_cpp + - name: Run C tests + run: ./build/test_c + - test_cpp: - name: Test C++ + test_cpp_c_ubuntu_clang: + name: Test C++ Ubuntu (Clang) runs-on: ubuntu-latest + steps: - uses: actions/checkout@v3 with: ref: main-dev - run: git submodule update --init --recursive - - name: Prepare Environment + + - name: Install Dependencies + run: sudo apt update && sudo apt install -y cmake build-essential libjemalloc-dev clang-15 + + - name: Build run: | - sudo apt update && - sudo apt install -y cmake build-essential libjemalloc-dev + export CC=clang-15 + export CXX=clang++-15 + cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=1 + cmake --build build --config Debug + + - name: Run C++ tests + run: ./build/test_cpp + - name: Run C tests + run: ./build/test_c + + + test_cpp_c_macos: + name: Test C++ MacOS + runs-on: macos-latest + steps: + - uses: actions/checkout@v3 + with: + ref: main-dev + - run: git submodule update --init --recursive + + - name: Install Dependencies + run: | + brew update + brew install cmake + - name: Build run: | - cmake -B ./build_release \ - -DCMAKE_BUILD_TYPE=Release \ - -DUSEARCH_USE_OPENMP=1 \ - -DUSEARCH_USE_SIMSIMD=1 \ - -DUSEARCH_USE_JEMALLOC=1 \ - -DUSEARCH_USE_NATIVE_F16=0 \ - -DUSEARCH_BUILD_TEST=1 \ - -DUSEARCH_BUILD_BENCHMARK=0 && - make -C ./build_release -j - - name: Run tests - run: ./build_release/test_cpp - + cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 + cmake --build build --config Debug + - name: Run C++ tests + run: ./build/test_cpp + - name: Run C tests + run: ./build/test_c + + test_cpp_c_windows: + name: Test C++ Windows + runs-on: windows-latest + steps: + - uses: actions/checkout@v3 + with: + ref: main-dev + - run: git submodule update --init --recursive + + - name: Install Dependencies + run: choco install cmake + + - name: Build + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Debug -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 + cmake --build build --config Debug + - name: Run C++ tests + run: .\build\test_cpp + - name: Run C tests + run: .\build\test_c test_python_311: name: Test Python @@ -92,15 +134,7 @@ jobs: python -m pip install --upgrade pip pip install pytest numpy - name: Build locally on Ubuntu - run: | - export CC=/usr/bin/gcc-12 - export CXX=/usr/bin/g++-12 - export LD_LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/12/:$LD_LIBRARY_PATH - python -m pip install . - if: ${{ matrix.os == 'ubuntu-22.04' }} - - name: Build locally run: python -m pip install . - if: ${{ matrix.os != 'ubuntu-22.04' }} - name: Test with PyTest run: pytest python/scripts/ -s -x @@ -129,13 +163,7 @@ jobs: pip install pytest numpy - name: Build locally - run: | - export CC=/usr/bin/gcc-12 - export CXX=/usr/bin/g++-12 - export LD_LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/12/:$LD_LIBRARY_PATH - python -m pip install . - if: ${{ matrix.os == 'ubuntu-22.04' }} - + run: python -m pip install . - name: Test with PyTest run: pytest python/scripts/ -s -x @@ -149,10 +177,7 @@ jobs: - uses: actions/setup-node@v3 with: node-version: 18 - - run: | - export CC=/usr/bin/gcc-12 - export CXX=/usr/bin/g++-12 - npm install + - run: npm install - run: npm ci - run: npm test @@ -180,10 +205,7 @@ jobs: - name: Setup Gradle uses: gradle/gradle-build-action@v2.4.2 - name: Execute Gradle build - run: - export CC=/usr/bin/gcc-12 - export CXX=/usr/bin/g++-12 - gradle clean build + run: gradle clean build test_swift: name: Test ObjC & Swift @@ -254,14 +276,14 @@ jobs: - name: Prepare environment run: | sudo apt update && - sudo apt install -y cmake build-essential libjemalloc-dev + sudo apt install -y build-essential - name: Build library run: | cd golang/ make USEARCH_USE_OPENMP=0 USEARCH_USE_NATIVE_F16=0 USEARCH_USE_SIMSIMD=1 -C ../c libusearch_c.so - sudo mv ../c/libusearch_c.so /usr/local/lib/libusearch_c.a - sudo mv ../c/usearch.h /usr/local/include/usearch.h + sudo cp ../c/libusearch_c.so /usr/local/lib/libusearch_c.a + sudo cp ../c/usearch.h /usr/local/include/usearch.h - name: Run test run: | diff --git a/.gitignore b/.gitignore index 9d15404c..8610d077 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ datasets bin *.usearch +.cache # C++ builds build diff --git a/.vscode/settings.json b/.vscode/settings.json index a9054712..d3821cb8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -113,10 +113,13 @@ "BLAS", "Cdecl", "cflags", + "cibuildwheel", "citerator", "downcasted", "Downcasting", "dtype", + "DUSEARCH", + "emcmake", "equi", "equidimensional", "FAISS", @@ -153,6 +156,7 @@ "usearch", "usecases", "Vardanian", + "vectorize", "Xunit" ], "autoDocstring.docstringFormat": "sphinx", diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 896774c7..3265c920 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -3,7 +3,7 @@ "tasks": [ { "label": "Linux Build C++ Debug", - "command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && make -C ./build_debug", + "command": "cmake -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && make -C ./build_debug", "args": [], "type": "shell", "problemMatcher": [ @@ -12,7 +12,7 @@ }, { "label": "Linux Build C++ Release", - "command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DCMAKE_BUILD_TYPE=RelWithDebInfo -B ./build_release && make -C ./build_release", + "command": "cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -B ./build_release && make -C ./build_release", "args": [], "type": "shell", "problemMatcher": [ @@ -21,13 +21,13 @@ }, { "label": "MacOS Build C++ Debug", - "command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && make -C ./build_debug", + "command": "cmake -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && make -C ./build_debug", "args": [], "type": "shell", }, { "label": "MacOS Build C++ Release", - "command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -B ./build_release && make -C ./build_release", + "command": "cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -B ./build_release && make -C ./build_release", "args": [], "type": "shell" } diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f8cf4e1..f0182de7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,10 +21,10 @@ option(USEARCH_USE_SIMSIMD "Use SimSIMD hardware-accelerated metrics" OFF) option(USEARCH_USE_JEMALLOC "Use JeMalloc for faster memory allocations" OFF) option(USEARCH_USE_NATIVE_F16 "Use native half-precision types" OFF) -option(USEARCH_BUILD_TEST "Compile a native unit test in C++" ${USEARCH_IS_MAIN_PROJECT}) -option(USEARCH_BUILD_BENCHMARK "Compile a native benchmark in C++" ${USEARCH_IS_MAIN_PROJECT}) -option(USEARCH_BUILD_CLIB "Compile a native library for the C 99 interface" OFF) -option(USEARCH_BUILD_CTEST "Compile a test for the C 99 interface" OFF) +option(USEARCH_BUILD_TEST_CPP "Compile a native unit test in C++" ${USEARCH_IS_MAIN_PROJECT}) +option(USEARCH_BUILD_BENCH_CPP "Compile a native benchmark in C++" ${USEARCH_IS_MAIN_PROJECT}) +option(USEARCH_BUILD_LIB_C "Compile a native library for the C 99 interface" OFF) +option(USEARCH_BUILD_TEST_C "Compile a test for the C 99 interface" OFF) option(USEARCH_BUILD_WOLFRAM "Compile Wolfram Language bindings" OFF) # Includes @@ -198,7 +198,6 @@ function (setup_target TARGET_NAME) -g> $<$:-g -fsanitize=address - -fsanitize=leak -fsanitize=alignment -fsanitize=undefined > @@ -214,13 +213,21 @@ function (setup_target TARGET_NAME) PRIVATE $<$:-g -fsanitize=address - -fsanitize=leak -fsanitize=alignment -fsanitize=undefined > -fPIC ) + # Check if the compiler is AppleClang, and if not, add the leak sanitizer + if (CMAKE_HOST_SYSTEM_NAME MATCHES "Darwin") + # It's likely AppleClang Adjust options as needed for AppleClang + else () + # It's likely LLVM Clang + target_compile_options(${TARGET_NAME} PRIVATE $<$:-fsanitize=leak>) + target_link_options(${TARGET_NAME} PRIVATE $<$:-fsanitize=leak>) + endif () + if (USEARCH_USE_OPENMP) target_link_libraries(${TARGET_NAME} PRIVATE OpenMP::OpenMP_CXX) endif () @@ -251,6 +258,8 @@ function (setup_target TARGET_NAME) endif () target_include_directories(${TARGET_NAME} PRIVATE ${USEARCH_HEADER_INCLUDES}) + set_target_properties(${TARGET_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}) + set_target_properties(${TARGET_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}) set_target_properties(${TARGET_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) set_target_properties(${TARGET_NAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) set_target_properties(${TARGET_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) @@ -268,7 +277,7 @@ function (setup_target TARGET_NAME) endfunction () -if (${USEARCH_BUILD_TEST} OR ${USEARCH_BUILD_BENCHMARK}) +if (${USEARCH_BUILD_TEST_CPP} OR ${USEARCH_BUILD_BENCH_CPP}) add_subdirectory(cpp) endif () @@ -280,6 +289,6 @@ if (${USEARCH_BUILD_WASM}) add_subdirectory(wasm) endif () -if (${USEARCH_BUILD_CLIB} OR ${USEARCH_BUILD_CTEST}) +if (${USEARCH_BUILD_LIB_C} OR ${USEARCH_BUILD_TEST_C}) add_subdirectory(c) endif () diff --git a/binding.gyp b/binding.gyp index 4c7fc952..5a6665c7 100644 --- a/binding.gyp +++ b/binding.gyp @@ -10,6 +10,7 @@ "simsimd/include", ], "dependencies": [" scalar_kind_from_name(char const* name, std::s return parsed; } +inline expected_gt scalar_kind_from_name(char const* name) { + return scalar_kind_from_name(name, std::strlen(name)); +} + inline expected_gt metric_from_name(char const* name, std::size_t len) { expected_gt parsed; if (str_equals(name, len, "l2sq") || str_equals(name, len, "euclidean_sq")) { @@ -370,6 +374,10 @@ inline expected_gt metric_from_name(char const* name, std::size_t return parsed; } +inline expected_gt metric_from_name(char const* name) { + return metric_from_name(name, std::strlen(name)); +} + inline float f16_to_f32(std::uint16_t u16) noexcept { #if USEARCH_USE_NATIVE_F16 f16_native_t f16; diff --git a/javascript/docs.js b/javascript/docs.js deleted file mode 100644 index 2f533cea..00000000 --- a/javascript/docs.js +++ /dev/null @@ -1,111 +0,0 @@ -/** Search result object. */ -class Matches { - /** - * @param {BigUint64Array} keys - The keys of the nearest neighbors found. - * @param {Float32Array} distances - The distances of the nearest neighbors found. - * @param {bigint} count - The count of nearest neighbors found. - */ - constructor(keys, distances, count) { - this.keys = keys; - this.distances = distances; - this.count = count; - } -} - -/** K-Approximate Nearest Neighbors search index. */ -class Index { - /** - * Constructs a new index. - * - * @param {bigint} dimensions - * @param {string} metric - * @param {string} quantization - * @param {bigint} capacity - * @param {bigint} connectivity - * @param {bigint} expansion_add - * @param {bigint} expansion_search - */ - constructor( - dimensions, - metric, - quantization, - capacity, - connectivity, - expansion_add, - expansion_search - ) {} - - /** - * Returns the dimensionality of vectors. - * @return {bigint} The dimensionality of vectors. - */ - dimensions() {} - - /** - * Returns the bigint of vectors currently indexed. - * @return {bigint} The bigint of vectors currently indexed. - */ - size() {} - - /** - * Returns index capacity. - * @return {bigint} The capacity of index. - */ - capacity() {} - - /** - * Returns connectivity. - * @return {bigint} The connectivity of index. - */ - connectivity() {} - - /** - * Write index to a file. - * @param {string} path File path to write. - */ - save(path) {} - - /** - * Load index from a file. - * @param {string} path File path to read. - */ - load(path) {} - - /** - * View index from a file, without loading into RAM. - * @param {string} path File path to read. - */ - view(path) {} - - /** - * Add n vectors of dimension d to the index. - * - * @param {bigint | bigint[]} keys Input identifiers for every vector. - * @param {Float32Array | Float32Array[]} mat Input matrix, matrix of size n * d. - */ - add(keys, mat) {} - - /** - * Query n vectors of dimension d to the index. Return at most k vectors for each. - * If there are not enough results for a query, the result array is padded with -1s. - * - * @param {Float32Array} mat Input vectors to search, matrix of size n * d. - * @param {bigint} k The bigint of nearest neighbors to search for. - * @return {Matches} Output of the search result. - */ - search(mat, k) {} - - /** - * Check if an entry is contained in the index. - * - * @param {bigint} key Identifier to look up. - */ - contains(key) {} - - /** - * Remove a vector from the index. - * - * @param {bigint} key Input identifier for every vector to be removed. - */ - remove(key) {} -} diff --git a/javascript/lib.cpp b/javascript/lib.cpp index 95778007..ee933e0b 100644 --- a/javascript/lib.cpp +++ b/javascript/lib.cpp @@ -20,10 +20,10 @@ using namespace unum::usearch; using namespace unum; -class Index : public Napi::ObjectWrap { +class CompiledIndex : public Napi::ObjectWrap { public: static Napi::Object Init(Napi::Env env, Napi::Object exports); - Index(Napi::CallbackInfo const& ctx); + CompiledIndex(Napi::CallbackInfo const& ctx); private: Napi::Value GetDimensions(Napi::CallbackInfo const& ctx); @@ -39,340 +39,294 @@ class Index : public Napi::ObjectWrap { Napi::Value Search(Napi::CallbackInfo const& ctx); Napi::Value Remove(Napi::CallbackInfo const& ctx); Napi::Value Contains(Napi::CallbackInfo const& ctx); + Napi::Value Count(Napi::CallbackInfo const& ctx); std::unique_ptr native_; }; -Napi::Object Index::Init(Napi::Env env, Napi::Object exports) { +Napi::Object CompiledIndex::Init(Napi::Env env, Napi::Object exports) { Napi::Function func = DefineClass( // - env, "Index", + env, "CompiledIndex", { - InstanceMethod("dimensions", &Index::GetDimensions), - InstanceMethod("size", &Index::GetSize), - InstanceMethod("capacity", &Index::GetCapacity), - InstanceMethod("connectivity", &Index::GetConnectivity), - InstanceMethod("add", &Index::Add), - InstanceMethod("search", &Index::Search), - InstanceMethod("remove", &Index::Remove), - InstanceMethod("contains", &Index::Contains), - InstanceMethod("save", &Index::Save), - InstanceMethod("load", &Index::Load), - InstanceMethod("view", &Index::View), + InstanceMethod("dimensions", &CompiledIndex::GetDimensions), + InstanceMethod("size", &CompiledIndex::GetSize), + InstanceMethod("capacity", &CompiledIndex::GetCapacity), + InstanceMethod("connectivity", &CompiledIndex::GetConnectivity), + InstanceMethod("add", &CompiledIndex::Add), + InstanceMethod("search", &CompiledIndex::Search), + InstanceMethod("remove", &CompiledIndex::Remove), + InstanceMethod("contains", &CompiledIndex::Contains), + InstanceMethod("count", &CompiledIndex::Count), + InstanceMethod("save", &CompiledIndex::Save), + InstanceMethod("load", &CompiledIndex::Load), + InstanceMethod("view", &CompiledIndex::View), }); Napi::FunctionReference* constructor = new Napi::FunctionReference(); *constructor = Napi::Persistent(func); env.SetInstanceData(constructor); - exports.Set("Index", func); + exports.Set("CompiledIndex", func); return exports; } -Index::Index(Napi::CallbackInfo const& ctx) : Napi::ObjectWrap(ctx) { - Napi::Env env = ctx.Env(); - - int length = ctx.Length(); - if (length == 0 || length >= 2 || !ctx[0].IsObject()) { - Napi::TypeError::New(env, "Pass args as named objects: dimensions: uint, capacity: uint, metric: str") - .ThrowAsJavaScriptException(); - return; - } - - bool lossless = true; - Napi::Object params = ctx[0].As(); - std::size_t dimensions = - params.Has("dimensions") ? params.Get("dimensions").As().Uint64Value(&lossless) : 0; - - index_limits_t limits; - std::size_t connectivity = default_connectivity(); - std::size_t expansion_add = default_expansion_add(); - std::size_t expansion_search = default_expansion_search(); - - if (params.Has("capacity")) - limits.members = params.Get("capacity").As().Uint64Value(&lossless); - if (params.Has("connectivity")) - connectivity = params.Get("connectivity").As().Uint64Value(&lossless); - if (params.Has("expansion_add")) - expansion_add = params.Get("expansion_add").As().Uint64Value(&lossless); - if (params.Has("expansion_search")) - expansion_search = params.Get("expansion_search").As().Uint64Value(&lossless); - if (!lossless) { - Napi::TypeError::New(env, "Arguments must be unsigned integers").ThrowAsJavaScriptException(); - return; - } +std::size_t napi_argument_to_size(Napi::Value v) { + return static_cast(v.As().DoubleValue()); +} - scalar_kind_t quantization = scalar_kind_t::f32_k; - if (params.Has("quantization")) { - std::string quantization_str = params.Get("quantization").As().Utf8Value(); - expected_gt expected = scalar_kind_from_name(quantization_str.c_str(), quantization_str.size()); - if (!expected) { - Napi::TypeError::New(env, expected.error.release()).ThrowAsJavaScriptException(); - return; - } - quantization = *expected; - } +CompiledIndex::CompiledIndex(Napi::CallbackInfo const& ctx) : Napi::ObjectWrap(ctx) { - // By default we use the Inner Product similarity - metric_kind_t metric_kind = metric_kind_t::ip_k; - if (params.Has("metric")) { - std::string metric_str = params.Get("metric").As().Utf8Value(); - expected_gt expected = metric_from_name(metric_str.c_str(), metric_str.size()); - if (!expected) { - Napi::TypeError::New(env, expected.error.release()).ThrowAsJavaScriptException(); - return; - } - metric_kind = *expected; - } + // Directly assign the parameters without checks + std::size_t dimensions = napi_argument_to_size(ctx[0]); + metric_kind_t metric_kind = metric_from_name(ctx[1].As().Utf8Value().c_str()); + scalar_kind_t quantization = scalar_kind_from_name(ctx[2].As().Utf8Value().c_str()); + std::size_t connectivity = napi_argument_to_size(ctx[3]); + std::size_t expansion_add = napi_argument_to_size(ctx[4]); + std::size_t expansion_search = napi_argument_to_size(ctx[5]); + bool multi = ctx[6].As().Value(); metric_punned_t metric(dimensions, metric_kind, quantization); index_dense_config_t config(connectivity, expansion_add, expansion_search); + config.multi = multi; + native_.reset(new index_dense_t(index_dense_t::make(metric, config))); - native_->reserve(limits); + if (!native_) + Napi::Error::New(ctx.Env(), "Out of memory!").ThrowAsJavaScriptException(); } -Napi::Value Index::GetDimensions(Napi::CallbackInfo const& ctx) { - return Napi::BigInt::New(ctx.Env(), static_cast(native_->dimensions())); +Napi::Value CompiledIndex::GetDimensions(Napi::CallbackInfo const& ctx) { + return Napi::Number::New(ctx.Env(), static_cast(native_->dimensions())); } -Napi::Value Index::GetSize(Napi::CallbackInfo const& ctx) { - return Napi::BigInt::New(ctx.Env(), static_cast(native_->size())); +Napi::Value CompiledIndex::GetConnectivity(Napi::CallbackInfo const& ctx) { + return Napi::Number::New(ctx.Env(), static_cast(native_->connectivity())); } -Napi::Value Index::GetConnectivity(Napi::CallbackInfo const& ctx) { - return Napi::BigInt::New(ctx.Env(), static_cast(native_->connectivity())); +Napi::Value CompiledIndex::GetSize(Napi::CallbackInfo const& ctx) { + return Napi::Number::New(ctx.Env(), static_cast(native_->size())); } -Napi::Value Index::GetCapacity(Napi::CallbackInfo const& ctx) { - return Napi::BigInt::New(ctx.Env(), static_cast(native_->capacity())); +Napi::Value CompiledIndex::GetCapacity(Napi::CallbackInfo const& ctx) { + return Napi::Number::New(ctx.Env(), static_cast(native_->capacity())); } -void Index::Save(Napi::CallbackInfo const& ctx) { - Napi::Env env = ctx.Env(); - - int length = ctx.Length(); - if (length == 0 || !ctx[0].IsString()) { - Napi::TypeError::New(env, "Function expects a string path argument").ThrowAsJavaScriptException(); - return; - } - +void CompiledIndex::Save(Napi::CallbackInfo const& ctx) { try { std::string path = ctx[0].As(); auto result = native_->save(path.c_str()); if (!result) - return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException(); + Napi::TypeError::New(ctx.Env(), result.error.release()).ThrowAsJavaScriptException(); } catch (...) { - Napi::TypeError::New(env, "Serialization failed").ThrowAsJavaScriptException(); + Napi::TypeError::New(ctx.Env(), "Serialization failed").ThrowAsJavaScriptException(); } } -void Index::Load(Napi::CallbackInfo const& ctx) { - Napi::Env env = ctx.Env(); - - int length = ctx.Length(); - if (length == 0 || !ctx[0].IsString()) { - Napi::TypeError::New(env, "Function expects a string path argument").ThrowAsJavaScriptException(); - return; - } - +void CompiledIndex::Load(Napi::CallbackInfo const& ctx) { try { std::string path = ctx[0].As(); auto result = native_->load(path.c_str()); if (!result) - return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException(); + Napi::TypeError::New(ctx.Env(), result.error.release()).ThrowAsJavaScriptException(); } catch (...) { - Napi::TypeError::New(env, "Loading failed").ThrowAsJavaScriptException(); + Napi::TypeError::New(ctx.Env(), "Loading failed").ThrowAsJavaScriptException(); } } -void Index::View(Napi::CallbackInfo const& ctx) { - Napi::Env env = ctx.Env(); - - int length = ctx.Length(); - if (length == 0 || !ctx[0].IsString()) { - Napi::TypeError::New(env, "Function expects a string path argument").ThrowAsJavaScriptException(); - return; - } +void CompiledIndex::View(Napi::CallbackInfo const& ctx) { try { std::string path = ctx[0].As(); auto result = native_->view(path.c_str()); if (!result) - return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException(); + Napi::TypeError::New(ctx.Env(), result.error.release()).ThrowAsJavaScriptException(); } catch (...) { - Napi::TypeError::New(env, "Memory-mapping failed").ThrowAsJavaScriptException(); + Napi::TypeError::New(ctx.Env(), "Memory-mapping failed").ThrowAsJavaScriptException(); } } -void Index::Add(Napi::CallbackInfo const& ctx) { - Napi::Env env = ctx.Env(); - - if (ctx.Length() < 2) - return Napi::TypeError::New(env, "Expects at least two arguments").ThrowAsJavaScriptException(); - - using key_t = typename index_dense_t::key_t; - std::size_t index_dimensions = native_->dimensions(); - - auto add = [&](Napi::BigInt key_js, Napi::Float32Array vector_js) { - bool lossless = true; - key_t key = static_cast(key_js.Uint64Value(&lossless)); - if (!lossless) - return Napi::TypeError::New(env, "Keys must be unsigned integers").ThrowAsJavaScriptException(); +void CompiledIndex::Add(Napi::CallbackInfo const& ctx) { + // Extract keys and vectors from arguments + Napi::BigUint64Array keys = ctx[0].As(); + std::size_t tasks = keys.ElementLength(); - float const* vector = vector_js.Data(); - std::size_t dimensions = static_cast(vector_js.ElementLength()); + // Ensure there is enough capacity + if (native_->size() + tasks >= native_->capacity()) + native_->reserve(ceil2(native_->size() + tasks)); - if (dimensions != index_dimensions) - return Napi::TypeError::New(env, "Wrong number of dimensions").ThrowAsJavaScriptException(); - - try { - auto result = native_->add(key, vector); - if (!result) - return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException(); - - } catch (std::bad_alloc const&) { - return Napi::TypeError::New(env, "Out of memory").ThrowAsJavaScriptException(); - } catch (...) { - return Napi::TypeError::New(env, "Insertion failed").ThrowAsJavaScriptException(); - } + // Create an instance of the executor with the default number of threads + auto run_parallel = [&](auto vectors) { + executor_stl_t executor; + executor.fixed(tasks, [&](std::size_t /*thread_idx*/, std::size_t task_idx) { + native_->add(static_cast(keys[task_idx]), vectors + task_idx * native_->dimensions()); + }); }; - if (ctx[0].IsArray() && ctx[1].IsArray()) { - Napi::Array keys_js = ctx[0].As(); - Napi::Array vectors_js = ctx[1].As(); - auto length = keys_js.Length(); - - if (length != vectors_js.Length()) - return Napi::TypeError::New(env, "The number of keys must match the number of vectors") - .ThrowAsJavaScriptException(); - - if (native_->size() + length >= native_->capacity()) - if (!native_->reserve(ceil2(native_->size() + length))) - return Napi::TypeError::New(env, "Out of memory!").ThrowAsJavaScriptException(); - - for (std::size_t i = 0; i < length; i++) { - Napi::Value key_js = keys_js[i]; - Napi::Value vector_js = vectors_js[i]; - add(key_js.As(), vector_js.As()); - } - - } else if (ctx[0].IsBigInt() && ctx[1].IsTypedArray()) { - if (native_->size() + 1 >= native_->capacity()) - native_->reserve(ceil2(native_->size() + 1)); - add(ctx[0].As(), ctx[1].As()); - } else - return Napi::TypeError::New(env, "Invalid argument type, expects integral key(s) and float vector(s)") + Napi::TypedArray vectors = ctx[1].As(); + if (vectors.TypedArrayType() == napi_float32_array) { + run_parallel(vectors.As().Data()); + } else if (vectors.TypedArrayType() == napi_float64_array) { + run_parallel(vectors.As().Data()); + } else if (vectors.TypedArrayType() == napi_int8_array) { + run_parallel(vectors.As().Data()); + } else { + Napi::TypeError::New(ctx.Env(), + "Unsupported TypedArray. Supported types are Float32Array, Float64Array, and Int8Array.") .ThrowAsJavaScriptException(); + } } -Napi::Value Index::Search(Napi::CallbackInfo const& ctx) { +Napi::Value CompiledIndex::Search(Napi::CallbackInfo const& ctx) { Napi::Env env = ctx.Env(); - if (ctx.Length() < 2 || !ctx[0].IsTypedArray() || !ctx[1].IsBigInt()) { - Napi::TypeError::New(env, "Expects a and the number of wanted results").ThrowAsJavaScriptException(); - return {}; - } + Napi::TypedArray queries = ctx[0].As(); + std::size_t tasks = queries.ElementLength() / native_->dimensions(); + std::size_t wanted = napi_argument_to_size(ctx[1]); - Napi::Float32Array vector_js = ctx[0].As(); - Napi::BigInt wanted_js = ctx[1].As(); - - float const* vector = vector_js.Data(); - std::size_t dimensions = static_cast(vector_js.ElementLength()); - if (dimensions != native_->dimensions()) { - Napi::TypeError::New(env, "Wrong number of dimensions").ThrowAsJavaScriptException(); - return {}; - } - - bool lossless = true; - std::uint64_t wanted = wanted_js.Uint64Value(&lossless); - if (!lossless) { - Napi::TypeError::New(env, "Wanted number of matches must be an unsigned integer").ThrowAsJavaScriptException(); - return {}; - } + auto run_parallel = [&](auto vectors) { + Napi::Array result_js = Napi::Array::New(env, 3); + Napi::BigUint64Array matches_js = Napi::BigUint64Array::New(env, tasks * wanted); + Napi::Float32Array distances_js = Napi::Float32Array::New(env, tasks * wanted); + Napi::BigUint64Array counts_js = Napi::BigUint64Array::New(env, tasks); - using key_t = typename index_dense_t::key_t; - Napi::TypedArrayOf matches_js = Napi::TypedArrayOf::New(env, wanted); - static_assert(std::is_same::value, "Matches.key interface expects BigUint64Array"); - Napi::Float32Array distances_js = Napi::Float32Array::New(env, wanted); - try { + auto matches_data = matches_js.Data(); + auto distances_data = distances_js.Data(); + auto counts_data = counts_js.Data(); - auto result = native_->search(vector, wanted); - if (!result) { - Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException(); - return {}; + try { + executor_stl_t executor; + executor.fixed(tasks, [&](std::size_t /*thread_idx*/, std::size_t task_idx) { + auto result = native_->search(vectors + task_idx * native_->dimensions(), wanted); + if (!result) { + // Handle the error appropriately + // For example, log the error or set some flag in the result_js object + } else { + counts_data[task_idx] = result.dump_to(matches_data + task_idx * native_->dimensions(), + distances_data + task_idx * native_->dimensions()); + } + }); + } catch (std::bad_alloc const&) { + Napi::TypeError::New(env, "Out of memory").ThrowAsJavaScriptException(); + } catch (...) { + Napi::TypeError::New(env, "Search failed").ThrowAsJavaScriptException(); } - std::uint64_t count = result.dump_to(matches_js.Data(), distances_js.Data()); - Napi::Object result_js = Napi::Object::New(env); - result_js.Set("keys", matches_js); - result_js.Set("distances", distances_js); - result_js.Set("count", Napi::BigInt::New(env, count)); + result_js.Set(0u, matches_js); + result_js.Set(1u, distances_js); + result_js.Set(2u, counts_js); return result_js; - } catch (std::bad_alloc const&) { - Napi::TypeError::New(env, "Out of memory").ThrowAsJavaScriptException(); - return {}; - } catch (...) { - Napi::TypeError::New(env, "Search failed").ThrowAsJavaScriptException(); - return {}; + }; + + if (queries.TypedArrayType() == napi_float32_array) { + return run_parallel(queries.As().Data()); + } else if (queries.TypedArrayType() == napi_float64_array) { + return run_parallel(queries.As().Data()); + } else if (queries.TypedArrayType() == napi_int8_array) { + return run_parallel(queries.As().Data()); + } else { + Napi::TypeError::New(env, + "Unsupported TypedArray. Supported types are Float32Array, Float64Array, and Int8Array.") + .ThrowAsJavaScriptException(); + return env.Null(); } } -Napi::Value Index::Remove(Napi::CallbackInfo const& ctx) { +Napi::Value CompiledIndex::Remove(Napi::CallbackInfo const& ctx) { Napi::Env env = ctx.Env(); - if (ctx.Length() < 1 || !ctx[0].IsBigInt()) { - Napi::TypeError::New(env, "Expects an entry identifier").ThrowAsJavaScriptException(); - return {}; + Napi::BigUint64Array keys = ctx[0].As(); + std::size_t length = keys.ElementLength(); + Napi::Array result = Napi::Array::New(env, length); + for (std::size_t i = 0; i < length; ++i) { + result[i] = Napi::Number::New(env, native_->remove(static_cast(keys[i])).completed); } + return result; +} - Napi::BigInt key_js = ctx[0].As(); - bool lossless = true; - std::uint64_t key = key_js.Uint64Value(&lossless); - if (!lossless) { - Napi::TypeError::New(env, "Identifier must be an unsigned integer").ThrowAsJavaScriptException(); - return {}; - } +Napi::Value CompiledIndex::Contains(Napi::CallbackInfo const& ctx) { + Napi::Env env = ctx.Env(); + Napi::BigUint64Array keys = ctx[0].As(); + std::size_t length = keys.ElementLength(); + Napi::Array result = Napi::Array::New(env, length); + for (std::size_t i = 0; i < length; ++i) + result[i] = Napi::Boolean::New(env, native_->contains(static_cast(keys[i]))); + return result; +} - try { - auto result = native_->remove(key); - if (!result) { - Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException(); - return {}; - } - return Napi::Boolean::New(env, result.completed); - } catch (std::bad_alloc const&) { - Napi::TypeError::New(env, "Out of memory").ThrowAsJavaScriptException(); - return {}; - } catch (...) { - Napi::TypeError::New(env, "Search failed").ThrowAsJavaScriptException(); - return {}; - } +Napi::Value CompiledIndex::Count(Napi::CallbackInfo const& ctx) { + Napi::Env env = ctx.Env(); + Napi::BigUint64Array keys = ctx[0].As(); + std::size_t length = keys.ElementLength(); + Napi::Array result = Napi::Array::New(env, length); + for (std::size_t i = 0; i < length; ++i) + result[i] = Napi::Boolean::New(env, native_->count(static_cast(keys[i]))); + return result; } -Napi::Value Index::Contains(Napi::CallbackInfo const& ctx) { +Napi::Value compiledExactSearch(Napi::CallbackInfo const& ctx) { Napi::Env env = ctx.Env(); - if (ctx.Length() < 1 || !ctx[0].IsBigInt()) { - Napi::TypeError::New(env, "Expects an entry identifier").ThrowAsJavaScriptException(); - return {}; - } - Napi::BigInt key_js = ctx[0].As(); - bool lossless = true; - std::uint64_t key = key_js.Uint64Value(&lossless); - if (!lossless) { - Napi::TypeError::New(env, "Identifier must be an unsigned integer").ThrowAsJavaScriptException(); - return {}; + // Extracting parameters directly without additional type checks. + Napi::TypedArray dataset = ctx[0].As(); + Napi::ArrayBuffer datasetBuffer = dataset.ArrayBuffer(); + Napi::TypedArray queries = ctx[1].As(); + Napi::ArrayBuffer queriesBuffer = queries.ArrayBuffer(); + std::uint64_t dimensions = napi_argument_to_size(ctx[2]); + std::uint64_t wanted = napi_argument_to_size(ctx[3]); + metric_kind_t metric_kind = metric_from_name(ctx[4].As().Utf8Value().c_str()); + + scalar_kind_t quantization; + std::size_t bytes_per_scalar; + switch (queries.TypedArrayType()) { + case napi_float64_array: quantization = scalar_kind_t::f64_k, bytes_per_scalar = 8; break; + case napi_int8_array: quantization = scalar_kind_t::i8_k, bytes_per_scalar = 1; break; + default: quantization = scalar_kind_t::f32_k, bytes_per_scalar = 4; break; } - try { - bool result = native_->contains(key); - return Napi::Boolean::New(env, result); - } catch (std::bad_alloc const&) { + metric_punned_t metric(dimensions, metric_kind, quantization); + exact_search_t search; + + // Performing the exact search. + std::size_t dataset_size = dataset.ElementLength() / dimensions; + std::size_t queries_size = queries.ElementLength() / dimensions; + auto results = search( // + reinterpret_cast(datasetBuffer.Data()), // + dataset_size, // + dimensions * bytes_per_scalar, // + reinterpret_cast(queriesBuffer.Data()), // + queries_size, // + dimensions * bytes_per_scalar, // + wanted, metric); + + if (!results) Napi::TypeError::New(env, "Out of memory").ThrowAsJavaScriptException(); - return {}; - } catch (...) { - Napi::TypeError::New(env, "Search failed").ThrowAsJavaScriptException(); - return {}; + + // Constructing the result object + Napi::Array result_js = Napi::Array::New(env, 3); + Napi::BigUint64Array matches_js = Napi::BigUint64Array::New(env, queries_size * wanted); + Napi::Float32Array distances_js = Napi::Float32Array::New(env, queries_size * wanted); + Napi::BigUint64Array counts_js = Napi::BigUint64Array::New(env, queries_size); + + auto matches_data = matches_js.Data(); + auto distances_data = distances_js.Data(); + auto counts_data = counts_js.Data(); + + // Export into JS buffers + for (std::size_t task_idx = 0; task_idx != queries_size; ++task_idx) { + auto result = results.at(task_idx); + counts_data[task_idx] = wanted; + for (std::size_t result_idx = 0; result_idx != wanted; ++result_idx) { + matches_data[task_idx * wanted + result_idx] = result[result_idx].offset; + distances_data[task_idx * wanted + result_idx] = result[result_idx].distance; + } } + + result_js.Set(0u, matches_js); + result_js.Set(1u, distances_js); + result_js.Set(2u, counts_js); + return result_js; } -Napi::Object InitAll(Napi::Env env, Napi::Object exports) { return Index::Init(env, exports); } +Napi::Object InitAll(Napi::Env env, Napi::Object exports) { + exports.Set("compiledExactSearch", Napi::Function::New(env, compiledExactSearch)); + return CompiledIndex::Init(env, exports); +} NODE_API_MODULE(usearch, InitAll) diff --git a/javascript/usearch.d.ts b/javascript/usearch.d.ts deleted file mode 100644 index a4bf7fcb..00000000 --- a/javascript/usearch.d.ts +++ /dev/null @@ -1,110 +0,0 @@ - -/** Search result object. */ -export interface Matches { - /** The keys of the nearest neighbors found, size n*k. */ - keys: BigUint64Array, - /** The distances of the nearest neighbors found, size n*k. */ - distances: Float32Array, - /** The distances of the nearest neighbors found, size n*k. */ - count: bigint -} - -/** K-Approximate Nearest Neighbors search index. */ -export class Index { - - /** - * Constructs a new index. - * - * @param {bigint} dimensions - * @param {string} metric - * @param {string} quantization - * @param {bigint} capacity - * @param {bigint} connectivity - * @param {bigint} expansion_add - * @param {bigint} expansion_search - */ - constructor( - dimensions: bigint, - metric: string, - quantization: string, - capacity: bigint, - connectivity: bigint, - expansion_add: bigint, - expansion_search: bigint - ); - - /** - * Returns the dimensionality of vectors. - * @return {bigint} The dimensionality of vectors. - */ - dimensions(): bigint; - - /** - * Returns the bigint of vectors currently indexed. - * @return {bigint} The bigint of vectors currently indexed. - */ - size(): bigint; - - /** - * Returns index capacity. - * @return {bigints} The capacity of index. - */ - capacity(): bigint; - - /** - * Returns connectivity. - * @return {bigint} The connectivity of index. - */ - connectivity(): bigint; - - /** - * Write index to a file. - * @param {string} path File path to write. - */ - save(path: string): void; - - /** - * Load index from a file. - * @param {string} path File path to read. - */ - load(path: string): void; - - /** - * View index from a file, without loading into RAM. - * @param {string} path File path to read. - */ - load(path: string): void; - - /** - * Add n vectors of dimension d to the index. - * - * @param {bigint | bigint[]} keys Input identifiers for every vector. - * @param {Float32Array | Float32Array[]} mat Input matrix, matrix of size n * d. - */ - add(keys: bigint | bigint[], mat: Float32Array | Float32Array[]): void; - - /** - * Query n vectors of dimension d to the index. Return at most k vectors for each. - * If there are not enough results for a query, the result array is padded with -1s. - * - * @param {Float32Array} mat Input vectors to search, matrix of size n * d. - * @param {bigint} k The bigint of nearest neighbors to search for. - * @return {Matches} Output of the search result. - */ - search(mat: Float32Array, k: bigint): Matches; - - /** - * Check if an entry is contained in the index. - * - * @param {bigint} key Identifier to look up. - */ - contains(key: bigint): boolean; - - /** - * Remove a vector from the index. - * - * @param {bigint} key Input identifier for every vector to be removed. - */ - remove(key: bigint): boolean; - -} \ No newline at end of file diff --git a/javascript/usearch.js b/javascript/usearch.js index 3d4a85ff..066a2356 100644 --- a/javascript/usearch.js +++ b/javascript/usearch.js @@ -1,2 +1,440 @@ -const usearch = require('bindings')('usearch'); -module.exports = usearch; \ No newline at end of file +const compiled = require('bindings')('usearch'); + +/** + * Enumeration representing the various metric kinds used to measure the distance between vectors in the index. + * @enum {string} + * @readonly + */ +const MetricKind = { + Unknown: 'unknown', + Cos: 'cos', + IP: 'ip', + L2sq: 'l2sq', + Haversine: 'haversine', + Pearson: 'pearson', + Jaccard: 'jaccard', + Hamming: 'hamming', + Tanimoto: 'tanimoto', + Sorensen: 'sorensen' +}; + +/** + * Enumeration representing the various scalar kinds used to define the type of scalar values in vectors. + * @enum {string} + * @readonly + */ +const ScalarKind = { + Unknown: 'unknown', + F32: 'f32', + F64: 'f64', + F16: 'f16', + I8: 'i8', + B1: 'b1' +}; + +/** + * Represents a set of search results. + */ +class Matches { + /** + * Constructs a Matches object. + * + * @param {BigUint64Array} keys - The keys of the nearest neighbors found. + * @param {Float32Array} distances - The distances of the nearest neighbors found. + */ + constructor(keys, distances) { + this.keys = keys; + this.distances = distances; + } +} + +/** + * Represents a set of batched search results. + */ +class BatchMatches { + /** + * Constructs a BatchMatches object. + * + * @param {BigUint64Array} keys - The keys of the nearest neighbors found in the batch. + * @param {Float32Array} distances - The distances of the nearest neighbors found in the batch. + * @param {BigUint64Array} counts - The number of neighbors found for each query in the batch. + * @param {bigint} k - The limit for search results per query in the batch. + */ + constructor(keys, distances, counts, k) { + this.keys = keys; + this.distances = distances; + this.counts = counts; + this.k = k; + } + + /** + * Retrieves a Matches object at the specified index in the batch. + * + * @param {number} i - The index at which to retrieve the Matches object. + * @returns {Matches} - A Matches object representing the search results at the specified index in the batch. + */ + get(i) { + const index = Number(i) * Number(this.k); + const count = Number(this.counts[i]); + const keysSlice = this.keys.slice(index, index + count); + const distancesSlice = this.distances.slice(index, index + count); + return new Matches(keysSlice, distancesSlice); + } +} + +function isOneKey(keys) { + return typeof keys === 'number' || typeof keys === 'bigint'; +} + +function normalizeKeys(keys) { + if (isOneKey(keys)) { + keys = BigUint64Array.of(BigInt(keys)); + } else if (Array.isArray(keys)) { + keys = keys.map(key => { + if (typeof key !== 'bigint' && typeof key !== 'number') + throw new Error("All keys must be integers or bigints."); + return BigInt(key); + }); + keys = BigUint64Array.from(keys); + } else if (!(keys instanceof BigUint64Array)) { + throw new Error("Keys must be a number, bigint, an array of numbers or bigints, or a BigUint64Array."); + } + return keys; +} + +function isVector(vectors) { + return vectors instanceof Float32Array || vectors instanceof Float64Array || vectors instanceof Int8Array; +} + +function normalizeVectors(vectors, dimensions, targetType = Float32Array) { + let flattenedVectors; + if (isVector(vectors)) { + flattenedVectors = (vectors.constructor === targetType) ? vectors : new targetType(vectors); + } else if (Array.isArray(vectors)) { + let totalLength = 0; + for (const vec of vectors) totalLength += vec.length; + + flattenedVectors = new targetType(totalLength); + let offset = 0; + for (const vec of vectors) { + flattenedVectors.set(vec, offset); + offset += vec.length; + } + } else { + throw new Error("Vectors must be a TypedArray or an array of arrays."); + } + + if (flattenedVectors.length % dimensions !== 0) + throw new Error("The size of the flattened vectors must be a multiple of the dimension of the vectors."); + + return flattenedVectors; +} + + +class Index { + + /** + * Constructs a new index. + * + * @param {number} dimensionsOrConfigs + * @param {MetricKind} [metric=MetricKind.Cos] - Optional, default is 'cos'. + * @param {ScalarKind} [quantization=ScalarKind.F32] - Optional, default is 'f32'. + * @param {number} [connectivity=0] - Optional, default is 0. + * @param {number} [expansion_add=0] - Optional, default is 0. + * @param {number} [expansion_search=0] - Optional, default is 0. + * @param {boolean} [multi=false] - Optional, default is false. + * @throws Will throw an error if any of the parameters are of incorrect type or invalid value. + */ + constructor(dimensionsOrConfigs, metric = MetricKind.Cos, quantization = ScalarKind.F32, connectivity = 0, expansion_add = 0, expansion_search = 0, multi = false) { + let dimensions; + if (typeof dimensionsOrConfigs === 'object' && dimensionsOrConfigs !== null) { + // Parameters are provided as an object + ({ dimensions, metric = MetricKind.Cos, quantization = ScalarKind.F32, connectivity = 0, expansion_add = 0, expansion_search = 0, multi = false } = dimensionsOrConfigs); + } else if (typeof dimensionsOrConfigs === 'number' || typeof dimensionsOrConfigs === 'bigint') { + // Parameters are provided as individual arguments + dimensions = dimensionsOrConfigs; + } else { + throw new Error("Invalid arguments. Expected either individual arguments or a single object argument."); + } + + if (!Number.isInteger(dimensions) || !Number.isInteger(connectivity) || !Number.isInteger(expansion_add) || !Number.isInteger(expansion_search) || dimensions <= 0 || connectivity < 0 || expansion_add < 0 || expansion_search < 0) { + throw new Error("`dimensions`, `connectivity`, `expansion_add`, and `expansion_search` must be non-negative integers, with `dimensions` being positive."); + } + + if (typeof multi !== 'boolean') { + throw new Error("`multi` must be a boolean value."); + } + + if (!Object.values(MetricKind).includes(metric)) { + throw new Error(`Invalid metric: ${metric}. It must be one of: ${Object.values(MetricKind).join(', ')}`); + } + + if (!Object.values(ScalarKind).includes(quantization)) { + throw new Error(`Invalid quantization: ${quantization}. It must be one of: ${Object.values(ScalarKind).join(', ')}`); + } + + this._compiledIndex = new compiled.CompiledIndex(dimensions, metric, quantization, connectivity, expansion_add, expansion_search, multi); + } + + /** + * Add vectors to the index. + * + * This method accepts vectors and their corresponding keys for indexing. + * Each key should correspond to a vector. If a single key is provided, + * it is broadcasted to match the number of provided vectors. + * + * Vectors should be provided as a flat typed array representing a matrix + * where each row is a vector to be indexed. The matrix should have a size + * of n * d, where n is the number of vectors, and d is the dimensionality + * of the vectors. + * + * Keys should be provided as a BigInt or an array-like object of BigInts + * representing the unique identifier for each vector. + * + * @param {bigint|bigint[]|BigUint64Array} keys - Input identifiers for every vector. + * If a single key is provided, it is associated with all provided vectors. + * @param {Float32Array|Float64Array|Int8Array} vectors - Input matrix representing vectors, + * matrix of size n * d, where n is the number of vectors, and d is their dimensionality. + * @throws Will throw an error if the length of keys doesn't match the number of vectors + * or if it's not a single key. + */ + add(keys, vectors) { + let normalizedKeys = normalizeKeys(keys); + let normalizedVectors = normalizeVectors(vectors, this._compiledIndex.dimensions()); + let countVectors = normalizedVectors.length / this._compiledIndex.dimensions(); + + // If a single key is provided but there are multiple vectors, + // broadcast the single key value to match the number of vectors + if (normalizedKeys.length === 1 && countVectors > 1) { + normalizedKeys = BigUint64Array.from({ length: countVectors }, () => normalizedKeys[0]); + } else if (normalizedKeys.length !== countVectors) { + throw new Error(`The length of keys (${normalizedKeys.length}) must match the number of vectors (${countVectors}) or be a single key.`); + } + + // Call the compiled method + this._compiledIndex.add(normalizedKeys, normalizedVectors); + } + + /** + * Perform a k-nearest neighbor search on the index. + * + * This method accepts a matrix of query vectors and returns the closest vectors + * from the index for each query. The method returns an object containing the keys, + * distances, and counts of the matches found. + * + * Vectors should be provided as a flat typed array representing a matrix where + * each row is a vector. The matrix should be of size n * d, where n is the + * number of query vectors, and d is their dimensionality. + * + * The parameter `k` specifies the number of nearest neighbors to return for each + * query vector. If there are not enough results for a query, the result array is + * padded with -1s. + * + * @param {Float32Array|Float64Array|Int8Array|Array>} vectors - Input matrix representing query vectors, can be a TypedArray or an array of arrays. + * @param {number} k - The number of nearest neighbors to search for each query vector. + * @return {Matches|BatchMatches} - Search results for one or more queries, containing keys, distances, and counts of the matches found. + * @throws Will throw an error if `k` is not a positive integer or if the size of the vectors is not a multiple of dimensions. + * @throws Will throw an error if `vectors` is not a valid input type (TypedArray or an array of arrays) or if its flattened size is not a multiple of dimensions. + */ + search(vectors, k) { + if (typeof k !== 'number' || k <= 0) { + throw new Error("`k` must be a positive integer representing the number of nearest neighbors to search for."); + } + + const normalizedVectors = normalizeVectors(vectors, this._compiledIndex.dimensions()); + + // Call the compiled method and create Matches or BatchMatches object with the result + const result = this._compiledIndex.search(normalizedVectors, k); + const countInQueries = normalizedVectors.length / Number(this._compiledIndex.dimensions()); + if (countInQueries === 1) { + return new Matches(result[0], result[1]); + } else { + return new BatchMatches(result[0], result[1], result[2], k); + } + } + + /** + * Verifies the presence of one or more keys in the index. + * + * This method accepts one or multiple keys as input and returns a boolean or + * an array of booleans indicating whether each key is present in the index. + * + * @param {bigint|bigint[]|BigUint64Array} keys - The identifier(s) of the vector(s) to be checked for presence in the index. + * @return {boolean|boolean[]} - Returns true if a single key is contained in the index, false otherwise. Returns an array of booleans corresponding to the presence of each key in the index when multiple keys are provided. + * @throws Will throw an error if keys are not integers. + */ + contains(keys) { + let normalizedKeys = normalizeKeys(keys); + let normalizedResults = this._compiledIndex.contains(normalizedKeys); + if (isOneKey(keys)) + return normalizedResults[0]; + else + return normalizedResults; + } + + /** + * Counts the number of times keys shows up in the index. + * + * @param {bigint|bigint[]|BigUint64Array} keys - The identifier(s) of the vector(s) to be enumerated. + * @return {number|number[]} - Returns the number of vectors found when a single key is provided. Returns an array of big integers corresponding to the number of vectors found for each key when multiple keys are provided. + * @throws Will throw an error if keys are not integers. + */ + count(keys) { + let normalizedKeys = normalizeKeys(keys); + let normalizedResults = this._compiledIndex.count(normalizedKeys); + if (isOneKey(keys)) + return normalizedResults[0]; + else + return normalizedResults; + } + + /** + * Removes one or multiple vectors from the index. + * + * This method accepts one or multiple keys as input and removes the corresponding vectors from the index. + * It returns the number of vectors actually removed for each key provided. + * + * @param {bigint|bigint[]|BigUint64Array} keys - The identifier(s) of the vector(s) to be removed. + * @return {number|number[]} - Returns the number of vectors deleted when a single key is provided. Returns an array of big integers corresponding to the number of vectors deleted for each key when multiple keys are provided. + * @throws Will throw an error if keys are not integers. + */ + remove(keys) { + let normalizedKeys = normalizeKeys(keys); + normalizedResults = this._compiledIndex.remove(normalizedKeys); + if (isOneKey(keys)) + return normalizedResults[0]; + else + return normalizedResults; + } + + /** + * Returns the dimensionality of vectors. + * @return {number} The dimensionality of vectors. + */ + dimensions() { return this._compiledIndex.dimensions() } + + /** + * Returns connectivity. + * @return {number} The connectivity of index. + */ + connectivity() { return this._compiledIndex.connectivity() } + + /** + * Returns the number of vectors currently indexed. + * @return {number} The number of vectors currently indexed. + */ + size() { return this._compiledIndex.size() } + + /** + * Returns index capacity. + * @return {number} The capacity of index. + */ + capacity() { return this._compiledIndex.capacity() } + + /** + * Write index to a file. + * @param {string} path File path to write. + * @throws Will throw an error if `path` is not a string. + */ + save(path) { + if (typeof path !== 'string') throw new Error("`path` must be a string representing the file path to write."); + this._compiledIndex.save(path); + } + + /** + * Load index from a file. + * @param {string} path File path to read. + * @throws Will throw an error if `path` is not a string. + */ + load(path) { + if (typeof path !== 'string') throw new Error("`path` must be a string representing the file path to read."); + this._compiledIndex.load(path); + } + + /** + * View index from a file, without loading into RAM. + * @param {string} path File path to read. + * @throws Will throw an error if `path` is not a string. + */ + view(path) { + if (typeof path !== 'string') throw new Error("`path` must be a string representing the file path to read."); + this._compiledIndex.view(path); + } +} + +/** + * Performs an exact search on the given dataset to find the best matching vectors for each query. + * + * @param {Float32Array|Float64Array|Int8Array|Array>} dataset - The dataset containing vectors to be searched. It can be a TypedArray or an array of arrays. + * @param {Float32Array|Float64Array|Int8Array|Array>} queries - The queries containing vectors to search for in the dataset. It can be a TypedArray or an array of arrays. + * @param {number} dimensions - The dimensionality of the vectors in both the dataset and the queries. It defines the number of elements in each vector. + * @param {number} count - The number of nearest neighbors to return for each query. If the dataset contains fewer vectors than the specified count, the result will contain only the available vectors. + * @param {MetricKind} metric - The distance metric to be used for the search. + * @return {Matches|BatchMatches} - Returns a `Matches` or `BatchMatches` object containing the results of the search. + * @throws Will throw an error if `dimensions` and `count` are not positive integers. + * @throws Will throw an error if `metric` is not a valid MetricKind. + * @throws Will throw an error if `dataset` and `queries` are not valid input types (TypedArray or an array of arrays). + * @throws Will throw an error if the sizes of the flattened `dataset` and `queries` are not multiples of `dimensions`. + * @throws Will throw an error if `count` is greater than the number of vectors in the `dataset`. + * + * @example + * const dataset = [[1.0, 2.0], [3.0, 4.0]]; // Two vectors: [1.0, 2.0] and [3.0, 4.0] + * const queries = [[1.5, 2.5]]; // One vector: [1.5, 2.5] + * const dimensions = 2; // The number of elements in each vector. + * const count = 1; // The number of nearest neighbors to return for each query. + * const metric = MetricKind.IP; // Using the Inner Product distance metric. + * + * const result = exactSearch(dataset, queries, dimensions, count, metric); + * // result might be: + * // { + * // keys: BigUint64Array [ 1n ], + * // distances: Float32Array [ some_value ], + * // } + */ +function exactSearch(dataset, queries, dimensions, count, metric) { + + // Validate and normalize the dimensions and count + dimensions = Number(dimensions); + count = Number(count); + if (count <= 0 || dimensions <= 0) { + throw new Error("Dimensions and count must be positive integers."); + } + + // Validate metric + if (!Object.values(MetricKind).includes(metric)) { + throw new Error(`Invalid metric: ${metric}. It must be one of: ${Object.values(MetricKind).join(', ')}`); + } + + // Flatten and normalize dataset and queries if they are arrays of arrays + let targetType; + if (dataset instanceof Float64Array) targetType = Float64Array; + else if (dataset instanceof Int8Array) targetType = Int8Array; + else targetType = Float32Array; // default to Float32Array if dataset is not Float64Array or Int8Array + + dataset = normalizeVectors(dataset, dimensions, targetType); + queries = normalizeVectors(queries, dimensions, targetType); + const countInDataset = dataset.length / dimensions; + const countInQueries = queries.length / dimensions; + if (count > countInDataset) { + throw new Error("Count must be equal or smaller than the number of vectors in the dataset."); + } + + // Call the compiled function with the normalized input + const result = compiled.exactSearch(dataset, queries, dimensions, count, metric); + + // Create and return a Matches or BatchMatches object with the result + if (countInQueries == 1) { + return new Matches(result[0], result[1]); + } else { + return new BatchMatches(result[0], result[1], result[2], count); + } +} + +module.exports = { + Index, + MetricKind, + ScalarKind, + Matches, + BatchMatches, + exactSearch, +}; diff --git a/javascript/usearch.test.js b/javascript/usearch.test.js index aae25263..a75eadc9 100644 --- a/javascript/usearch.test.js +++ b/javascript/usearch.test.js @@ -1,45 +1,37 @@ const test = require('node:test'); const assert = require('node:assert'); -const usearch = require('bindings')('usearch'); +const usearch = require('./usearch.js'); test('Single-entry operations', () => { - const index = new usearch.Index({ - metric: 'l2sq', - dimensions: 2n, - connectivity: 16n, - }); - - assert.equal(index.connectivity(), 16n, 'connectivity should be 16'); - assert.equal(index.dimensions(), 2n, 'dimensions should be 2'); - assert.equal(index.size(), 0n, 'initial size should be 0'); - - index.add(15n, new Float32Array([ 10, 20 ])); - index.add(16n, new Float32Array([ 10, 25 ])); - - assert.equal(index.size(), 2n, 'size after adding elements should be 2'); - assert.equal(index.contains(15n), true, 'entry must be present after insertion'); - - const results = index.search(new Float32Array([ 13, 14 ]), 2n); - - assert.deepEqual(results.keys, new BigUint64Array([ 15n, 16n ]), 'keys should be 15 and 16'); - assert.deepEqual(results.distances, new Float32Array([ 45, 130 ]), 'distances should be 45 and 130'); + const index = new usearch.Index(2, 'l2sq'); + + assert.equal(index.connectivity(), 16, 'connectivity should be 16'); + assert.equal(index.dimensions(), 2, 'dimensions should be 2'); + assert.equal(index.size(), 0, 'initial size should be 0'); + + index.add(15n, new Float32Array([10, 20])); + index.add(16n, new Float32Array([10, 25])); + + assert.equal(index.size(), 2, 'size after adding elements should be 2'); + assert.equal(index.contains(15), true, 'entry must be present after insertion'); + + const results = index.search(new Float32Array([13, 14]), 2); + + assert.deepEqual(results.keys, new BigUint64Array([15n, 16n]), 'keys should be 15 and 16'); + assert.deepEqual(results.distances, new Float32Array([45, 130]), 'distances should be 45 and 130'); }); test('Batch operations', () => { - const indexBatch = new usearch.Index({ - metric: 'l2sq', - dimensions: 2n, - connectivity: 16n, - }); + const indexBatch = new usearch.Index(2, 'l2sq'); const keys = [15n, 16n]; - const vectors = [new Float32Array([ 10, 20 ]), new Float32Array([ 10, 25 ])]; + const vectors = [new Float32Array([10, 20]), new Float32Array([10, 25])]; indexBatch.add(keys, vectors); assert.equal(indexBatch.size(), 2, 'size after adding batch should be 2'); - - const results = indexBatch.search(new Float32Array([ 13, 14 ]), 2n); - assert.deepEqual(results.keys, new BigUint64Array([ 15n, 16n ]), 'keys should be 15 and 16'); - assert.deepEqual(results.distances, new Float32Array([ 45, 130 ]), 'distances should be 45 and 130'); + const results = indexBatch.search(new Float32Array([13, 14]), 2); + + assert.deepEqual(results.keys, new BigUint64Array([15n, 16n]), 'keys should be 15 and 16'); + assert.deepEqual(results.distances, new Float32Array([45, 130]), 'distances should be 45 and 130'); }); diff --git a/package.json b/package.json index b3254287..3e4e0321 100644 --- a/package.json +++ b/package.json @@ -2,13 +2,18 @@ "name": "usearch", "version": "2.6.0", "description": "Smaller & Faster Single-File Vector Search Engine from Unum", - "author": "Ash Vardanian", + "author": "Ash Vardanian (https://ashvardanian.com/)", "license": "Apache 2.0", - "main": "javascript/usearch.js", + "homepage": "https://unum-cloud.github.io/usearch/", "repository": { "type": "git", "url": "https://github.com/unum-cloud/usearch.git" }, + "bugs": { + "url": "https://github.com/unum-cloud/usearch/issues", + "email": "info@unum.cloud" + }, + "main": "javascript/usearch.js", "gypfile": true, "engines": { "node": "~10 >=10.20 || >=12.17" @@ -28,4 +33,4 @@ "semantic-release": "^21.1.2", "typescript": "^5.1.6" } -} +} \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json deleted file mode 100644 index dc7df0d4..00000000 --- a/tsconfig.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "typedocOptions": { - "entryPoints": [ - "javascript/usearch.d.ts" - ], - "out": "docs" - } -} \ No newline at end of file