From 614163a92e930baff5d3dfb78eee294a320e037d Mon Sep 17 00:00:00 2001 From: Navneet Verma Date: Mon, 7 Oct 2024 14:43:31 -0700 Subject: [PATCH] Fix lucene codec after lucene version bumped to 9.12 Signed-off-by: Navneet Verma --- CHANGELOG.md | 1 + .../codec/KNN9120Codec/KNN9120Codec.java | 61 +++++++++++++++++++ .../NativeEngines990KnnVectorsWriter.java | 2 +- .../knn/index/codec/KNNCodecVersion.java | 21 ++++++- 4 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120Codec.java diff --git a/CHANGELOG.md b/CHANGELOG.md index f615a78fb..7bc3019df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Documentation * Fix sed command in DEVELOPER_GUIDE.md to append a new line character '\n'. [#2181](https://github.com/opensearch-project/k-NN/pull/2181) ### Maintenance +* Fix lucene codec after lucene version bumped to 9.12. [#2195](https://github.com/opensearch-project/k-NN/pull/2195) ### Refactoring * Does not create additional KNNVectorValues in NativeEngines990KNNVectorWriter when quantization is not needed [#2133](https://github.com/opensearch-project/k-NN/pull/2133) diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120Codec.java b/src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120Codec.java new file mode 100644 index 000000000..a370197ec --- /dev/null +++ b/src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120Codec.java @@ -0,0 +1,61 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.KNN9120Codec; + +import lombok.Builder; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.opensearch.knn.index.codec.KNNCodecVersion; +import org.opensearch.knn.index.codec.KNNFormatFacade; + +/** + * KNN Codec that wraps the Lucene Codec which is part of Lucene 9.12 + */ +public class KNN9120Codec extends FilterCodec { + private static final KNNCodecVersion VERSION = KNNCodecVersion.V_9_12_0; + private final KNNFormatFacade knnFormatFacade; + private final PerFieldKnnVectorsFormat perFieldKnnVectorsFormat; + + /** + * No arg constructor that uses Lucene99 as the delegate + */ + public KNN9120Codec() { + this(VERSION.getDefaultCodecDelegate(), VERSION.getPerFieldKnnVectorsFormat()); + } + + /** + * Sole constructor. When subclassing this codec, create a no-arg ctor and pass the delegate codec + * and a unique name to this ctor. + * + * @param delegate codec that will perform all operations this codec does not override + * @param knnVectorsFormat per field format for KnnVector + */ + @Builder + protected KNN9120Codec(Codec delegate, PerFieldKnnVectorsFormat knnVectorsFormat) { + super(VERSION.getCodecName(), delegate); + knnFormatFacade = VERSION.getKnnFormatFacadeSupplier().apply(delegate); + perFieldKnnVectorsFormat = knnVectorsFormat; + } + + @Override + public DocValuesFormat docValuesFormat() { + return knnFormatFacade.docValuesFormat(); + } + + @Override + public CompoundFormat compoundFormat() { + return knnFormatFacade.compoundFormat(); + } + + @Override + public KnnVectorsFormat knnVectorsFormat() { + return perFieldKnnVectorsFormat; + } +} diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java index 2f22565c9..e33519130 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java @@ -67,7 +67,7 @@ public NativeEngines990KnnVectorsWriter(SegmentWriteState segmentWriteState, Fla public KnnFieldVectorsWriter addField(final FieldInfo fieldInfo) throws IOException { final NativeEngineFieldVectorsWriter newField = NativeEngineFieldVectorsWriter.create(fieldInfo, segmentWriteState.infoStream); fields.add(newField); - return flatVectorsWriter.addField(fieldInfo, newField); + return flatVectorsWriter.addField(fieldInfo); } /** diff --git a/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java b/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java index 419505aa2..4bbeceffb 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java @@ -12,12 +12,14 @@ import org.apache.lucene.backward_codecs.lucene94.Lucene94Codec; import org.apache.lucene.codecs.Codec; import org.apache.lucene.backward_codecs.lucene95.Lucene95Codec; -import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.backward_codecs.lucene99.Lucene99Codec; +import org.apache.lucene.codecs.lucene912.Lucene912Codec; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.opensearch.index.mapper.MapperService; import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat; import org.opensearch.knn.index.codec.KNN80Codec.KNN80DocValuesFormat; import org.opensearch.knn.index.codec.KNN910Codec.KNN910Codec; +import org.opensearch.knn.index.codec.KNN9120Codec.KNN9120Codec; import org.opensearch.knn.index.codec.KNN920Codec.KNN920Codec; import org.opensearch.knn.index.codec.KNN920Codec.KNN920PerFieldKnnVectorsFormat; import org.opensearch.knn.index.codec.KNN940Codec.KNN940Codec; @@ -110,9 +112,24 @@ public enum KNNCodecVersion { .knnVectorsFormat(new KNN990PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService))) .build(), KNN990Codec::new + ), + + V_9_12_0( + "KNN990Codec", + new Lucene912Codec(), + new KNN990PerFieldKnnVectorsFormat(Optional.empty()), + (delegate) -> new KNNFormatFacade( + new KNN80DocValuesFormat(delegate.docValuesFormat()), + new KNN80CompoundFormat(delegate.compoundFormat()) + ), + (userCodec, mapperService) -> KNN9120Codec.builder() + .delegate(userCodec) + .knnVectorsFormat(new KNN990PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService))) + .build(), + KNN9120Codec::new ); - private static final KNNCodecVersion CURRENT = V_9_9_0; + private static final KNNCodecVersion CURRENT = V_9_12_0; private final String codecName; private final Codec defaultCodecDelegate;