Skip to content

Commit

Permalink
Add nested field synthetic and tests for update, search, reindex, nested
Browse files Browse the repository at this point in the history
  • Loading branch information
luyuncheng committed May 14, 2024
1 parent 05439dd commit 469fbf3
Show file tree
Hide file tree
Showing 2 changed files with 495 additions and 84 deletions.
106 changes: 104 additions & 2 deletions src/main/java/org/opensearch/knn/index/fetch/KNNFetchSubPhase.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,36 @@
import lombok.Getter;
import lombok.extern.log4j.Log4j2;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
import org.opensearch.common.io.stream.BytesStreamOutput;
import org.opensearch.common.lucene.search.Queries;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.mapper.DocValueFetcher;
import org.opensearch.index.mapper.DocumentMapper;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.mapper.ObjectMapper;
import org.opensearch.index.mapper.ValueFetcher;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.mapper.KNNVectorFieldMapper;
import org.opensearch.search.SearchHit;
import org.opensearch.search.fetch.FetchContext;
import org.opensearch.search.fetch.FetchSubPhase;
import org.opensearch.search.fetch.FetchSubPhaseProcessor;
import org.opensearch.search.internal.ContextIndexSearcher;
import org.opensearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -94,8 +106,7 @@ public void process(HitContext hitContext) throws IOException {
}

if (hasNested) {
// TODO handle nested field
throw new UnsupportedOperationException("knn synthetic source do not support nested field");
syntheticNestedDocValues(mapperService, hitContext, maps);
}
for (DocValueField f : fields) {
if (maps.containsKey(f.field)) {
Expand All @@ -111,6 +122,97 @@ public void process(HitContext hitContext) throws IOException {
builder.value(maps);
hitContext.hit().sourceRef(BytesReference.bytes(builder));
}

protected void syntheticNestedDocValues(MapperService mapperService, HitContext hitContext, Map<String, Object> maps)
throws IOException {
DocumentMapper documentMapper = mapperService.documentMapper();
Map<String, ObjectMapper> mapperMap = documentMapper.objectMappers();
SearchHit hit = hitContext.hit();

for (ObjectMapper objectMapper : mapperMap.values()) {
if (objectMapper == null) {
continue;
}
if (!objectMapper.nested().isNested()) {
continue;
}
String path = objectMapper.fullPath();
for (DocValueField f : fields) {
if (!f.field.startsWith(path)) {
continue;
}
if (!maps.containsKey(path)) {
continue;
}

// path to nested field:
Object nestedObj = maps.get(path);
if (!(nestedObj instanceof ArrayList)) {
continue;
}
// nested array in one nested path
ArrayList nestedDocList = (ArrayList) nestedObj;

log.info(
"object mapper: nested:"
+ objectMapper.nested().isNested()
+ " Value:"
+ objectMapper.fullPath()
+ " field:"
+ f.field
);

Query parentFilter = Queries.newNonNestedFilter();
Query childFilter = objectMapper.nestedTypeFilter();
ContextIndexSearcher searcher = fetchContext.searcher();
final Weight childWeight = searcher.createWeight(searcher.rewrite(childFilter), ScoreMode.COMPLETE_NO_SCORES, 1f);

LeafReaderContext subReaderContext = hitContext.readerContext();
Scorer childScorer = childWeight.scorer(subReaderContext);
DocIdSetIterator childIter = childScorer.iterator();
BitSet parentBits = fetchContext.getQueryShardContext().bitsetFilter(parentFilter).getBitSet(subReaderContext);

int currentParent = hit.docId() - subReaderContext.docBase;
int previousParent = parentBits.prevSetBit(currentParent - 1);
int childDocId = childIter.advance(previousParent + 1);
SourceLookup nestedVecSourceLookup = new SourceLookup();

// when nested field only have vector field and exclude source, list is empty
boolean isEmpty = nestedDocList.isEmpty();

for (int offset = 0; childDocId < currentParent && childDocId != DocIdSetIterator.NO_MORE_DOCS; childDocId = childIter
.nextDoc(), offset++) {
nestedVecSourceLookup.setSegmentAndDocument(subReaderContext, childDocId);
List<Object> nestedVecDocValuesSource = f.fetcher.fetchValues(nestedVecSourceLookup);
if (nestedVecDocValuesSource == null || nestedVecDocValuesSource.isEmpty()) {
continue;
}
if (isEmpty) {
nestedDocList.add(new HashMap<String, Object>());
}
if (offset < nestedDocList.size()) {
Object o2 = nestedDocList.get(offset);
log.info("arraylist value:" + o2.getClass().getName());
if (o2 instanceof Map) {
Map<String, Object> o2map = (Map<String, Object>) o2;
String suffix = f.field.substring(path.length() + 1);
o2map.put(suffix, nestedVecDocValuesSource.get(0));
}
} else {
/**
* TODO nested field partial doc only have vector and source exclude
* this source map nestedDocList would out-of-order, can not fill the vector into right offset
* "nested_field" : [
* {"nested_vector": [2.6, 2.6]},
* {"nested_numeric": 2, "nested_vector": [3.1, 2.3]}
* ]
*/
throw new UnsupportedOperationException("Nested Field should not be empty");
}
}
}
}
}
}

private static class DocValueField {
Expand Down
Loading

0 comments on commit 469fbf3

Please sign in to comment.