Skip to content

Commit

Permalink
Add new dynamic confidence interval configuration to scalar quantized…
Browse files Browse the repository at this point in the history
… format (#13445)

When int4 scalar quantization was merged, it added a new way to dynamically calculate quantiles.

However, when that was merged, I inadvertently changed the default behavior, where a null confidenceInterval would actually calculate the dynamic quantiles instead of doing the previous auto-setting to 1 - 1/(dim + 1).

This commit formalizes the dynamic quantile calculate through setting the confidenceInterval to 0, and preserves the previous behavior for null confidenceIntervals so that users upgrading will not see different quantiles than they would expect.
  • Loading branch information
benwtrent committed Jun 1, 2024
1 parent 46d4872 commit d433394
Show file tree
Hide file tree
Showing 8 changed files with 172 additions and 29 deletions.
4 changes: 4 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ New Features

* GITHUB#13414: Counts are always available in the result when using taxonomy facets. (Stefan Vodita)

* GITHUB#13445: Add new option when calculating scalar quantiles. The new option of setting `confidenceInterval` to
`0` will now dynamically determine the quantiles through a grid search over multiple quantiles calculated
by multiple intervals. (Ben Trent)

Improvements
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ public Lucene99HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth) {
* lte 4 bits will be compressed into a single byte. If false, the vectors will be stored as
* is. This provides a trade-off of memory usage and speed.
* @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
* it is calculated based on the vector field dimensions.
* it is calculated based on the vector field dimensions. When `0`, the quantiles are
* dynamically determined by sampling many confidence intervals and determining the most
* accurate pair.
* @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
* generated by this format to do the merge
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
/** The maximum confidence interval */
private static final float MAXIMUM_CONFIDENCE_INTERVAL = 1f;

/** Dynamic confidence interval */
public static final float DYNAMIC_CONFIDENCE_INTERVAL = 0f;

/**
* Controls the confidence interval used to scalar quantize the vectors the default value is
* calculated as `1-1/(vector_dimensions + 1)`
Expand All @@ -76,7 +79,8 @@ public Lucene99ScalarQuantizedVectorsFormat() {
* Constructs a format using the given graph construction parameters.
*
* @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
* it is calculated dynamically.
* it is calculated based on the vector dimension. When `0`, the quantiles are dynamically
* determined by sampling many confidence intervals and determining the most accurate pair.
* @param bits the number of bits to use for scalar quantization (must be between 1 and 8,
* inclusive)
* @param compress whether to compress the vectors, if true, the vectors that are quantized with
Expand All @@ -86,13 +90,15 @@ public Lucene99ScalarQuantizedVectorsFormat() {
public Lucene99ScalarQuantizedVectorsFormat(
Float confidenceInterval, int bits, boolean compress) {
if (confidenceInterval != null
&& confidenceInterval != DYNAMIC_CONFIDENCE_INTERVAL
&& (confidenceInterval < MINIMUM_CONFIDENCE_INTERVAL
|| confidenceInterval > MAXIMUM_CONFIDENCE_INTERVAL)) {
throw new IllegalArgumentException(
"confidenceInterval must be between "
+ MINIMUM_CONFIDENCE_INTERVAL
+ " and "
+ MAXIMUM_CONFIDENCE_INTERVAL
+ " or 0"
+ "; confidenceInterval="
+ confidenceInterval);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -346,12 +346,19 @@ private static class FieldEntry implements Accountable {
if (size > 0) {
if (versionMeta < Lucene99ScalarQuantizedVectorsFormat.VERSION_ADD_BITS) {
int floatBits = input.readInt(); // confidenceInterval, unused
if (floatBits == -1) {
if (floatBits == -1) { // indicates a null confidence interval
throw new CorruptIndexException(
"Missing confidence interval for scalar quantizer", input);
}
this.bits = (byte) 7;
this.compress = false;
float confidenceInterval = Float.intBitsToFloat(floatBits);
// indicates a dynamic interval, which shouldn't be provided in this version
if (confidenceInterval
== Lucene99ScalarQuantizedVectorsFormat.DYNAMIC_CONFIDENCE_INTERVAL) {
throw new CorruptIndexException(
"Invalid confidence interval for scalar quantizer: " + confidenceInterval, input);
}
bits = (byte) 7;
compress = false;
float minQuantile = Float.intBitsToFloat(input.readInt());
float maxQuantile = Float.intBitsToFloat(input.readInt());
scalarQuantizer = new ScalarQuantizer(minQuantile, maxQuantile, (byte) 7);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.lucene.codecs.lucene99;

import static org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.DYNAMIC_CONFIDENCE_INTERVAL;
import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.QUANTIZED_VECTOR_COMPONENT;
import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.calculateDefaultConfidenceInterval;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
Expand Down Expand Up @@ -117,6 +118,9 @@ public Lucene99ScalarQuantizedVectorsWriter(
false,
rawVectorDelegate,
scorer);
if (confidenceInterval != null && confidenceInterval == 0) {
throw new IllegalArgumentException("confidenceInterval cannot be set to zero");
}
}

public Lucene99ScalarQuantizedVectorsWriter(
Expand Down Expand Up @@ -347,6 +351,7 @@ private void writeMeta(
meta.writeByte(bits);
meta.writeByte(compress ? (byte) 1 : (byte) 0);
} else {
assert confidenceInterval == null || confidenceInterval != DYNAMIC_CONFIDENCE_INTERVAL;
meta.writeInt(
Float.floatToIntBits(
confidenceInterval == null
Expand Down Expand Up @@ -667,22 +672,36 @@ public static ScalarQuantizer mergeAndRecalculateQuantiles(
doc = vectorValues.nextDoc()) {
numVectors++;
}
mergedQuantiles =
confidenceInterval == null
? ScalarQuantizer.fromVectorsAutoInterval(
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState),
fieldInfo.getVectorSimilarityFunction(),
numVectors,
bits)
: ScalarQuantizer.fromVectors(
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState),
confidenceInterval,
numVectors,
bits);
return buildScalarQuantizer(
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState),
numVectors,
fieldInfo.getVectorSimilarityFunction(),
confidenceInterval,
bits);
}
return mergedQuantiles;
}

static ScalarQuantizer buildScalarQuantizer(
FloatVectorValues floatVectorValues,
int numVectors,
VectorSimilarityFunction vectorSimilarityFunction,
Float confidenceInterval,
byte bits)
throws IOException {
if (confidenceInterval != null && confidenceInterval == DYNAMIC_CONFIDENCE_INTERVAL) {
return ScalarQuantizer.fromVectorsAutoInterval(
floatVectorValues, vectorSimilarityFunction, numVectors, bits);
}
return ScalarQuantizer.fromVectors(
floatVectorValues,
confidenceInterval == null
? calculateDefaultConfidenceInterval(floatVectorValues.dimension())
: confidenceInterval,
numVectors,
bits);
}

/**
* Returns true if the quantiles of the new quantization state are too far from the quantiles of
* the existing quantization state. This would imply that floating point values would slightly
Expand Down Expand Up @@ -785,14 +804,12 @@ void finish() throws IOException {
}
FloatVectorValues floatVectorValues = new FloatVectorWrapper(floatVectors, normalize);
ScalarQuantizer quantizer =
confidenceInterval == null
? ScalarQuantizer.fromVectorsAutoInterval(
floatVectorValues,
fieldInfo.getVectorSimilarityFunction(),
floatVectors.size(),
bits)
: ScalarQuantizer.fromVectors(
floatVectorValues, confidenceInterval, floatVectors.size(), bits);
buildScalarQuantizer(
floatVectorValues,
floatVectors.size(),
fieldInfo.getVectorSimilarityFunction(),
confidenceInterval,
bits);
minQuantile = quantizer.getLowerQuantile();
maxQuantile = quantizer.getUpperQuantile();
if (infoStream.isEnabled(QUANTIZED_VECTOR_COMPONENT)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
@Override
public void setUp() throws Exception {
bits = random().nextBoolean() ? 4 : 7;
confidenceInterval = random().nextBoolean() ? 0.99f : null;
confidenceInterval = random().nextBoolean() ? (0.9f + random().nextFloat() * 0.1f) : null;
if (random().nextBoolean()) {
confidenceInterval = 0f;
}
format =
new Lucene99HnswScalarQuantizedVectorsFormat(
Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN,
Expand Down Expand Up @@ -134,15 +137,17 @@ public void testQuantizedVectorsWriteAndRead() throws Exception {
vectors.add(randomVector(dim));
}
ScalarQuantizer scalarQuantizer =
confidenceInterval == null
confidenceInterval != null && confidenceInterval == 0f
? ScalarQuantizer.fromVectorsAutoInterval(
new Lucene99ScalarQuantizedVectorsWriter.FloatVectorWrapper(vectors, normalize),
similarityFunction,
numVectors,
(byte) bits)
: ScalarQuantizer.fromVectors(
new Lucene99ScalarQuantizedVectorsWriter.FloatVectorWrapper(vectors, normalize),
confidenceInterval,
confidenceInterval == null
? Lucene99ScalarQuantizedVectorsFormat.calculateDefaultConfidenceInterval(dim)
: confidenceInterval,
numVectors,
(byte) bits);
float[] expectedCorrections = new float[numVectors];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
1,
bits,
compress,
null,
0f,
null);
}
};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.codecs.lucene99;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.quantization.ScalarQuantizer;

public class TestLucene99ScalarQuantizedVectorsWriter extends LuceneTestCase {

public void testBuildScalarQuantizerCosine() throws IOException {
assertScalarQuantizer(
new float[] {0.3234983f, 0.6236096f}, 0.9f, (byte) 7, VectorSimilarityFunction.COSINE);
assertScalarQuantizer(
new float[] {0.28759837f, 0.62449116f}, 0f, (byte) 7, VectorSimilarityFunction.COSINE);
assertScalarQuantizer(
new float[] {0.3234983f, 0.6236096f}, 0.9f, (byte) 4, VectorSimilarityFunction.COSINE);
assertScalarQuantizer(
new float[] {0.37247902f, 0.58848244f}, 0f, (byte) 4, VectorSimilarityFunction.COSINE);
}

public void testBuildScalarQuantizerDotProduct() throws IOException {
assertScalarQuantizer(
new float[] {0.3234983f, 0.6236096f}, 0.9f, (byte) 7, VectorSimilarityFunction.DOT_PRODUCT);
assertScalarQuantizer(
new float[] {0.28759837f, 0.62449116f}, 0f, (byte) 7, VectorSimilarityFunction.DOT_PRODUCT);
assertScalarQuantizer(
new float[] {0.3234983f, 0.6236096f}, 0.9f, (byte) 4, VectorSimilarityFunction.DOT_PRODUCT);
assertScalarQuantizer(
new float[] {0.37247902f, 0.58848244f}, 0f, (byte) 4, VectorSimilarityFunction.DOT_PRODUCT);
}

public void testBuildScalarQuantizerMIP() throws IOException {
assertScalarQuantizer(
new float[] {2.0f, 20.0f}, 0.9f, (byte) 7, VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
assertScalarQuantizer(
new float[] {2.4375f, 19.0625f},
0f,
(byte) 7,
VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
assertScalarQuantizer(
new float[] {2.0f, 20.0f}, 0.9f, (byte) 4, VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
assertScalarQuantizer(
new float[] {2.6875f, 19.0625f},
0f,
(byte) 4,
VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
}

public void testBuildScalarQuantizerEuclidean() throws IOException {
assertScalarQuantizer(
new float[] {2.0f, 20.0f}, 0.9f, (byte) 7, VectorSimilarityFunction.EUCLIDEAN);
assertScalarQuantizer(
new float[] {2.125f, 19.375f}, 0f, (byte) 7, VectorSimilarityFunction.EUCLIDEAN);
assertScalarQuantizer(
new float[] {2.0f, 20.0f}, 0.9f, (byte) 4, VectorSimilarityFunction.EUCLIDEAN);
assertScalarQuantizer(
new float[] {2.1875f, 19.0625f}, 0f, (byte) 4, VectorSimilarityFunction.EUCLIDEAN);
}

private void assertScalarQuantizer(
float[] expectedQuantiles,
Float confidenceInterval,
byte bits,
VectorSimilarityFunction vectorSimilarityFunction)
throws IOException {
List<float[]> vectors = new ArrayList<>(30);
for (int i = 0; i < 30; i++) {
float[] vector = new float[] {i, i + 1, i + 2, i + 3};
vectors.add(vector);
}
FloatVectorValues vectorValues =
new Lucene99ScalarQuantizedVectorsWriter.FloatVectorWrapper(
vectors,
vectorSimilarityFunction == VectorSimilarityFunction.COSINE
|| vectorSimilarityFunction == VectorSimilarityFunction.DOT_PRODUCT);
ScalarQuantizer scalarQuantizer =
Lucene99ScalarQuantizedVectorsWriter.buildScalarQuantizer(
vectorValues, 30, vectorSimilarityFunction, confidenceInterval, bits);
assertEquals(expectedQuantiles[0], scalarQuantizer.getLowerQuantile(), 0.0001f);
assertEquals(expectedQuantiles[1], scalarQuantizer.getUpperQuantile(), 0.0001f);
}
}

0 comments on commit d433394

Please sign in to comment.