Skip to content

Commit

Permalink
Rebase with main
Browse files Browse the repository at this point in the history
Signed-off-by: Ryan Bogan <[email protected]>
  • Loading branch information
ryanbogan committed Mar 14, 2024
2 parents 70a2f31 + b7bdda4 commit cf049ea
Show file tree
Hide file tree
Showing 35 changed files with 842 additions and 185 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ jobs:
if lscpu | grep -i avx2
then
echo "avx2 available on system"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Dsimd.enabled=true"
su `id -un 1000` -c "whoami && java -version && ./gradlew build"
else
echo "avx2 not available on system"
su `id -un 1000` -c "whoami && java -version && ./gradlew build"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Dsimd.enabled=false"
fi
Expand Down Expand Up @@ -101,10 +101,10 @@ jobs:
if sysctl -n machdep.cpu.features machdep.cpu.leaf7_features | grep -i AVX2
then
echo "avx2 available on system"
./gradlew build -Dsimd.enabled=true
./gradlew build
else
echo "avx2 not available on system"
./gradlew build
./gradlew build -Dsimd.enabled=false
fi
Build-k-NN-Windows:
Expand Down Expand Up @@ -158,5 +158,5 @@ jobs:
- name: Run build
run: |
./gradlew.bat build
./gradlew.bat build -D'simd.enabled=false'
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Features
### Enhancements
* Optize Faiss Query With Filters: Reduce iteration and memory for id filter [#1402](https://github.com/opensearch-project/k-NN/pull/1402)
* Detect AVX2 Dynamically on the System [#1502](https://github.com/opensearch-project/k-NN/pull/1502)
* Validate zero vector when using cosine metric [#1501](https://github.com/opensearch-project/k-NN/pull/1501)
* Persist model definition in model metadata [#1527] (https://github.com/opensearch-project/k-NN/pull/1527)
### Bug Fixes
* Disable sdc table for HNSWPQ read-only indices [#1518](https://github.com/opensearch-project/k-NN/pull/1518)
* Switch SpaceType.INNERPRODUCT's vector similarity function to MAXIMUM_INNER_PRODUCT [#1532](https://github.com/opensearch-project/k-NN/pull/1532)
### Infrastructure
* Manually install zlib for win CI [#1513](https://github.com/opensearch-project/k-NN/pull/1513)
### Documentation
Expand Down
4 changes: 2 additions & 2 deletions DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -238,9 +238,9 @@ If you want to make a custom patch on JNI library
4. Make a change in `jni/CmakeLists.txt`, `.github/workflows/CI.yml` to apply the patch during build

### Enable SIMD Optimization
SIMD(Single Instruction/Multiple Data) Optimization can be enabled by setting this optional parameter `simd.enabled` to `true` which boosts the performance
SIMD(Single Instruction/Multiple Data) Optimization is enabled by default on Linux and Mac which boosts the performance
by enabling `AVX2` on `x86 architecture` and `NEON` on `ARM64 architecture` while building the Faiss library. But to enable SIMD, the underlying processor
should support this (AVX2 or NEON). So, by default it is set to `false`.
should support this (AVX2 or NEON). It can be disabled by setting the parameter `simd.enabled` to `false`. As of now, it is not supported on Windows OS.

```
# While building OpenSearch k-NN
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/osb/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ ijson==2.6.1
# via opensearch-benchmark
importlib-metadata==4.11.3
# via jsonschema
jinja2==2.11.3
jinja2==3.1.3
# via opensearch-benchmark
jsonschema==3.1.1
# via opensearch-benchmark
Expand Down
6 changes: 5 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ buildscript {
version_qualifier = System.getProperty("build.version_qualifier", "")
opensearch_group = "org.opensearch"
isSnapshot = "true" == System.getProperty("build.snapshot", "true")
simd_enabled = System.getProperty("simd.enabled", "false")
simd_enabled = System.getProperty("simd.enabled", "true")

version_tokens = opensearch_version.tokenize('-')
opensearch_build = version_tokens[0] + '.0'
Expand Down Expand Up @@ -287,6 +287,10 @@ dependencies {
testImplementation group: 'org.objenesis', name: 'objenesis', version: '3.2'
testImplementation group: 'net.bytebuddy', name: 'byte-buddy-agent', version: '1.14.7'
testFixturesImplementation "org.opensearch:common-utils:${version}"
implementation 'com.github.oshi:oshi-core:6.4.13'
api "net.java.dev.jna:jna:5.13.0"
api "net.java.dev.jna:jna-platform:5.13.0"
implementation 'org.slf4j:slf4j-api:1.7.36'

zipArchive group: 'org.opensearch.plugin', name:'opensearch-security', version: "${opensearch_build}"
}
Expand Down
6 changes: 4 additions & 2 deletions jni/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ if (${CONFIG_FAISS} STREQUAL ON OR ${CONFIG_ALL} STREQUAL ON OR ${CONFIG_TEST} S
set(BUILD_TESTING OFF) # Avoid building faiss tests
set(BLA_STATIC ON) # Statically link BLAS

if(NOT SIMD_ENABLED)
set(SIMD_ENABLED false) # set default value as false if the argument is not set
if(NOT DEFINED SIMD_ENABLED)
set(SIMD_ENABLED true) # set default value as true if the argument is not set
endif()

if(${CMAKE_SYSTEM_NAME} STREQUAL Windows OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64" OR NOT ${SIMD_ENABLED})
Expand All @@ -122,6 +122,7 @@ if (${CONFIG_FAISS} STREQUAL ON OR ${CONFIG_ALL} STREQUAL ON OR ${CONFIG_TEST} S
else()
set(FAISS_OPT_LEVEL avx2) # Keep optimization level as avx2 to improve performance on Linux and Mac.
set(TARGET_LINK_FAISS_LIB faiss_avx2)
string(PREPEND LIB_EXT "_avx2") # Prepend "_avx2" to lib extension to create the library as "libopensearchknn_faiss_avx2.so" on linux and "libopensearchknn_faiss_avx2.jnilib" on mac
endif()

if (${CMAKE_SYSTEM_NAME} STREQUAL Darwin)
Expand Down Expand Up @@ -160,6 +161,7 @@ if (${CONFIG_FAISS} STREQUAL ON OR ${CONFIG_ALL} STREQUAL ON OR ${CONFIG_TEST} S
if (EXISTS ${PATCH_FILE})
message(STATUS "Applying custom patches.")
execute_process(COMMAND git apply --ignore-space-change --ignore-whitespace --3way ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss/0001-Custom-patch-to-support-multi-vector.patch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss ERROR_VARIABLE ERROR_MSG RESULT_VARIABLE RESULT_CODE)

if(RESULT_CODE)
message(FATAL_ERROR "Failed to apply patch:\n${ERROR_MSG}")
endif()
Expand Down
24 changes: 8 additions & 16 deletions scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,14 @@ fi

# Build k-NN lib and plugin through gradle tasks
cd $work_dir
# Gradle build is used here to replace gradle assemble due to build will also call cmake and make before generating jars
./gradlew build --no-daemon --refresh-dependencies -x integTest -DskipTests=true -Dopensearch.version=$VERSION -Dbuild.snapshot=$SNAPSHOT -Dbuild.version_qualifier=$QUALIFIER
./gradlew build --no-daemon --refresh-dependencies -x integTest -x test -Dopensearch.version=$VERSION -Dbuild.snapshot=$SNAPSHOT -Dbuild.version_qualifier=$QUALIFIER
./gradlew :buildJniLib -Dsimd.enabled=false

if [ "$PLATFORM" != "windows" ] && [ "$ARCHITECTURE" = "x64" ]; then
echo "Building k-NN library after enabling AVX2"
./gradlew :buildJniLib -Dsimd.enabled=true
fi

./gradlew publishPluginZipPublicationToZipStagingRepository -Dopensearch.version=$VERSION -Dbuild.snapshot=$SNAPSHOT -Dbuild.version_qualifier=$QUALIFIER
./gradlew publishPluginZipPublicationToMavenLocal -Dbuild.snapshot=$SNAPSHOT -Dbuild.version_qualifier=$QUALIFIER -Dopensearch.version=$VERSION

Expand Down Expand Up @@ -150,20 +156,6 @@ cd $distributions
zip -ur $zipPath lib
cd $work_dir

if [ "$PLATFORM" != "windows" ]; then
echo "Building k-NN libraries after enabling SIMD"
./gradlew :buildJniLib -Dsimd.enabled=true
mkdir $distributions/lib_simd
cp -v $ompPath $distributions/lib_simd
cp -v ./jni/release/${libPrefix}* $distributions/lib_simd
ls -l $distributions/lib_simd

# Add lib_simd directory to the k-NN plugin zip
cd $distributions
zip -ur $zipPath lib_simd
cd $work_dir
fi

echo "COPY ${distributions}/*.zip"
mkdir -p $OUTPUT/plugins
cp -v ${distributions}/*.zip $OUTPUT/plugins
Expand Down
1 change: 1 addition & 0 deletions src/main/java/org/opensearch/knn/common/KNNConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ public class KNNConstants {
// Lib names
private static final String JNI_LIBRARY_PREFIX = "opensearchknn_";
public static final String FAISS_JNI_LIBRARY_NAME = JNI_LIBRARY_PREFIX + FAISS_NAME;
public static final String FAISS_AVX2_JNI_LIBRARY_NAME = JNI_LIBRARY_PREFIX + FAISS_NAME + "_avx2";
public static final String NMSLIB_JNI_LIBRARY_NAME = JNI_LIBRARY_PREFIX + NMSLIB_NAME;

// API Constants
Expand Down
83 changes: 83 additions & 0 deletions src/main/java/org/opensearch/knn/common/KNNValidationUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/

package org.opensearch.knn.common;

import java.util.Locale;
import lombok.AccessLevel;
import lombok.NoArgsConstructor;
import org.opensearch.knn.index.VectorDataType;

import static org.opensearch.knn.common.KNNConstants.VECTOR_DATA_TYPE_FIELD;

@NoArgsConstructor(access = AccessLevel.PRIVATE)
public class KNNValidationUtil {
/**
* Validate the float vector value and throw exception if it is not a number or not in the finite range.
*
* @param value float vector value
*/
public static void validateFloatVectorValue(float value) {
if (Float.isNaN(value)) {
throw new IllegalArgumentException("KNN vector values cannot be NaN");
}

if (Float.isInfinite(value)) {
throw new IllegalArgumentException("KNN vector values cannot be infinity");
}
}

/**
* Validate the float vector value in the byte range if it is a finite number,
* with no decimal values and in the byte range of [-128 to 127]. If not throw IllegalArgumentException.
*
* @param value float value in byte range
*/
public static void validateByteVectorValue(float value) {
validateFloatVectorValue(value);
if (value % 1 != 0) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"[%s] field was set as [%s] in index mapping. But, KNN vector values are floats instead of byte integers",
VECTOR_DATA_TYPE_FIELD,
VectorDataType.BYTE.getValue()
)

);
}
if ((int) value < Byte.MIN_VALUE || (int) value > Byte.MAX_VALUE) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"[%s] field was set as [%s] in index mapping. But, KNN vector values are not within in the byte range [%d, %d]",
VECTOR_DATA_TYPE_FIELD,
VectorDataType.BYTE.getValue(),
Byte.MIN_VALUE,
Byte.MAX_VALUE
)
);
}
}

/**
* Validate if the given vector size matches with the dimension provided in mapping.
*
* @param dimension dimension of vector
* @param vectorSize size of the vector
*/
public static void validateVectorDimension(int dimension, int vectorSize) {
if (dimension != vectorSize) {
String errorMessage = String.format(Locale.ROOT, "Vector dimension mismatch. Expected: %d, Given: %d", dimension, vectorSize);
throw new IllegalArgumentException(errorMessage);
}
}
}
45 changes: 45 additions & 0 deletions src/main/java/org/opensearch/knn/common/KNNVectorUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.common;

import java.util.Objects;
import lombok.AccessLevel;
import lombok.NoArgsConstructor;

@NoArgsConstructor(access = AccessLevel.PRIVATE)
public class KNNVectorUtil {
/**
* Check if all the elements of a given vector are zero
*
* @param vector the vector
* @return true if yes; otherwise false
*/
public static boolean isZeroVector(byte[] vector) {
Objects.requireNonNull(vector, "vector must not be null");
for (byte e : vector) {
if (e != 0) {
return false;
}
}
return true;
}

/**
* Check if all the elements of a given vector are zero
*
* @param vector the vector
* @return true if yes; otherwise false
*/
public static boolean isZeroVector(float[] vector) {
Objects.requireNonNull(vector, "vector must not be null");
for (float e : vector) {
if (e != 0f) {
return false;
}
}
return true;
}
}
15 changes: 14 additions & 1 deletion src/main/java/org/opensearch/knn/index/KNNSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ public class KNNSettings {
public static final String MODEL_INDEX_NUMBER_OF_REPLICAS = "knn.model.index.number_of_replicas";
public static final String MODEL_CACHE_SIZE_LIMIT = "knn.model.cache.size.limit";
public static final String ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD = "index.knn.advanced.filtered_exact_search_threshold";
public static final String KNN_FAISS_AVX2_DISABLED = "knn.faiss.avx2.disabled";

/**
* Default setting values
Expand Down Expand Up @@ -230,6 +231,9 @@ public class KNNSettings {
NodeScope,
Dynamic
);

public static final Setting<Boolean> KNN_FAISS_AVX2_DISABLED_SETTING = Setting.boolSetting(KNN_FAISS_AVX2_DISABLED, false, NodeScope);

/**
* Dynamic settings
*/
Expand Down Expand Up @@ -339,6 +343,10 @@ private Setting<?> getSetting(String key) {
return ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING;
}

if (KNN_FAISS_AVX2_DISABLED.equals(key)) {
return KNN_FAISS_AVX2_DISABLED_SETTING;
}

throw new IllegalArgumentException("Cannot find setting by key [" + key + "]");
}

Expand All @@ -355,7 +363,8 @@ public List<Setting<?>> getSettings() {
MODEL_INDEX_NUMBER_OF_SHARDS_SETTING,
MODEL_INDEX_NUMBER_OF_REPLICAS_SETTING,
MODEL_CACHE_SIZE_LIMIT_SETTING,
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING
ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD_SETTING,
KNN_FAISS_AVX2_DISABLED_SETTING
);
return Stream.concat(settings.stream(), dynamicCacheSettings.values().stream()).collect(Collectors.toList());
}
Expand All @@ -376,6 +385,10 @@ public static double getCircuitBreakerUnsetPercentage() {
return KNNSettings.state().getSettingValue(KNNSettings.KNN_CIRCUIT_BREAKER_UNSET_PERCENTAGE);
}

public static boolean isFaissAVX2Disabled() {
return KNNSettings.state().getSettingValue(KNNSettings.KNN_FAISS_AVX2_DISABLED);
}

public static Integer getFilteredExactSearchThreshold(final String indexName) {
return KNNSettings.state().clusterService.state()
.getMetadata()
Expand Down
Loading

0 comments on commit cf049ea

Please sign in to comment.