Skip to content

Commit 46d4872

Browse files
committed
SimpleText[Float|Byte]VectorValues::scorer should return null when the vector values is empty (#13444)
This commit ensures that SimpleText[Float|Byte]VectorValues::scorer returns null when the vector values is empty, as per the scorer javadoc. Other KnnVectorsReader implementations have specialised empty implementations that do similar, e.g. OffHeapFloatVectorValues.EmptyOffHeapVectorValues. The VectorScorer interface in new in Lucene 9.11, see #13181 An existing test randomly hits this, but a new test has been added that exercises this code path consistently. It's also useful to verify other KnnVectorsReader implementations.
1 parent ac4ccbf commit 46d4872

File tree

7 files changed

+80
-0
lines changed

7 files changed

+80
-0
lines changed

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java

+3
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,9 @@ public float[] vectorValue(int targetOrd) throws IOException {
451451

452452
@Override
453453
public VectorScorer scorer(float[] target) {
454+
if (size() == 0) {
455+
return null;
456+
}
454457
OffHeapFloatVectorValues values = this.copy();
455458
return new VectorScorer() {
456459
@Override

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java

+3
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,9 @@ public float[] vectorValue(int targetOrd) throws IOException {
497497

498498
@Override
499499
public VectorScorer scorer(float[] target) {
500+
if (size == 0) {
501+
return null;
502+
}
500503
OffHeapFloatVectorValues values = this.copy();
501504
return new VectorScorer() {
502505
@Override

lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90HnswVectorsFormat.java

+5
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,9 @@ public void testSortedIndexBytes() throws Exception {
7373
public void testByteVectorScorerIteration() {
7474
// unimplemented
7575
}
76+
77+
@Override
78+
public void testEmptyByteVectorData() {
79+
// unimplemented
80+
}
7681
}

lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java

+5
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,9 @@ public void testSortedIndexBytes() throws Exception {
7272
public void testByteVectorScorerIteration() {
7373
// unimplemented
7474
}
75+
76+
@Override
77+
public void testEmptyByteVectorData() {
78+
// unimplemented
79+
}
7580
}

lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene92/TestLucene92HnswVectorsFormat.java

+5
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,9 @@ public void testSortedIndexBytes() throws Exception {
6262
public void testByteVectorScorerIteration() {
6363
// unimplemented
6464
}
65+
66+
@Override
67+
public void testEmptyByteVectorData() {
68+
// unimplemented
69+
}
6570
}

lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java

+6
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,9 @@ public int advance(int target) throws IOException {
395395

396396
@Override
397397
public VectorScorer scorer(float[] target) {
398+
if (size() == 0) {
399+
return null;
400+
}
398401
SimpleTextFloatVectorValues simpleTextFloatVectorValues =
399402
new SimpleTextFloatVectorValues(this);
400403
return new VectorScorer() {
@@ -504,6 +507,9 @@ public int advance(int target) throws IOException {
504507

505508
@Override
506509
public VectorScorer scorer(byte[] target) {
510+
if (size() == 0) {
511+
return null;
512+
}
507513
SimpleTextByteVectorValues simpleTextByteVectorValues = new SimpleTextByteVectorValues(this);
508514
return new VectorScorer() {
509515
@Override

lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java

+53
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717
package org.apache.lucene.tests.index;
1818

19+
import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
1920
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
2021

2122
import java.io.ByteArrayOutputStream;
@@ -847,6 +848,58 @@ public void testByteVectorScorerIteration() throws Exception {
847848
}
848849
}
849850

851+
public void testEmptyFloatVectorData() throws Exception {
852+
try (Directory dir = newDirectory();
853+
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
854+
var doc1 = new Document();
855+
doc1.add(new StringField("id", "0", Field.Store.NO));
856+
doc1.add(new KnnFloatVectorField("v", new float[] {2, 3, 5, 6}, DOT_PRODUCT));
857+
w.addDocument(doc1);
858+
859+
var doc2 = new Document();
860+
doc2.add(new StringField("id", "1", Field.Store.NO));
861+
w.addDocument(doc2);
862+
863+
w.deleteDocuments(new Term("id", Integer.toString(0)));
864+
w.commit();
865+
w.forceMerge(1);
866+
867+
try (DirectoryReader reader = DirectoryReader.open(w)) {
868+
LeafReader r = getOnlyLeafReader(reader);
869+
FloatVectorValues values = r.getFloatVectorValues("v");
870+
assertNotNull(values);
871+
assertEquals(0, values.size());
872+
assertNull(values.scorer(new float[] {2, 3, 5, 6}));
873+
}
874+
}
875+
}
876+
877+
public void testEmptyByteVectorData() throws Exception {
878+
try (Directory dir = newDirectory();
879+
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
880+
var doc1 = new Document();
881+
doc1.add(new StringField("id", "0", Field.Store.NO));
882+
doc1.add(new KnnByteVectorField("v", new byte[] {2, 3, 5, 6}, DOT_PRODUCT));
883+
w.addDocument(doc1);
884+
885+
var doc2 = new Document();
886+
doc2.add(new StringField("id", "1", Field.Store.NO));
887+
w.addDocument(doc2);
888+
889+
w.deleteDocuments(new Term("id", Integer.toString(0)));
890+
w.commit();
891+
w.forceMerge(1);
892+
893+
try (DirectoryReader reader = DirectoryReader.open(w)) {
894+
LeafReader r = getOnlyLeafReader(reader);
895+
ByteVectorValues values = r.getByteVectorValues("v");
896+
assertNotNull(values);
897+
assertEquals(0, values.size());
898+
assertNull(values.scorer(new byte[] {2, 3, 5, 6}));
899+
}
900+
}
901+
}
902+
850903
protected VectorSimilarityFunction randomSimilarity() {
851904
return VectorSimilarityFunction.values()[
852905
random().nextInt(VectorSimilarityFunction.values().length)];

0 commit comments

Comments
 (0)