Skip to content

Commit

Permalink
Split syntheticNestedFieldWithDocValues method into several sub-methods
Browse files Browse the repository at this point in the history
Signed-off-by: luyuncheng <[email protected]>
  • Loading branch information
luyuncheng committed Jul 7, 2024
1 parent 7be3a5c commit 0cc0543
Showing 1 changed file with 91 additions and 67 deletions.
158 changes: 91 additions & 67 deletions src/main/java/org/opensearch/knn/index/fetch/KNNFetchSubPhase.java
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ public void process(HitContext hitContext) throws IOException {
}

if (hasNested) {
syntheticNestedDocValues(mapperService, hitContext, maps);
syntheticNestedFieldWithDocValues(mapperService, hitContext, maps);
}
for (DocValueField f : fields) {
if (maps.containsKey(f.field)) {
Expand All @@ -123,11 +123,10 @@ public void process(HitContext hitContext) throws IOException {
hitContext.hit().sourceRef(BytesReference.bytes(builder));
}

protected void syntheticNestedDocValues(MapperService mapperService, HitContext hitContext, Map<String, Object> maps)
protected void syntheticNestedFieldWithDocValues(MapperService mapperService, HitContext hitContext, Map<String, Object> sourceMaps)
throws IOException {
DocumentMapper documentMapper = mapperService.documentMapper();
Map<String, ObjectMapper> mapperMap = documentMapper.objectMappers();
SearchHit hit = hitContext.hit();

for (ObjectMapper objectMapper : mapperMap.values()) {
if (objectMapper == null) {
Expand All @@ -138,19 +137,11 @@ protected void syntheticNestedDocValues(MapperService mapperService, HitContext
}
String path = objectMapper.fullPath();
for (DocValueField f : fields) {
if (!f.field.startsWith(path)) {
continue;
}
if (!maps.containsKey(path)) {
continue;
}

// path to nested field:
Object nestedObj = maps.get(path);
if (!(nestedObj instanceof ArrayList)) {
if (!checkNestedField(path, f, sourceMaps)) {
continue;
}
// nested array in one nested path
Object nestedObj = sourceMaps.get(path);
ArrayList nestedDocList = (ArrayList) nestedObj;

log.debug(
Expand All @@ -162,64 +153,97 @@ protected void syntheticNestedDocValues(MapperService mapperService, HitContext
+ f.field
);

Query parentFilter = Queries.newNonNestedFilter();
Query childFilter = objectMapper.nestedTypeFilter();
ContextIndexSearcher searcher = fetchContext.searcher();
final Weight childWeight = searcher.createWeight(searcher.rewrite(childFilter), ScoreMode.COMPLETE_NO_SCORES, 1f);

LeafReaderContext subReaderContext = hitContext.readerContext();
Scorer childScorer = childWeight.scorer(subReaderContext);
DocIdSetIterator childIter = childScorer.iterator();
BitSet parentBits = fetchContext.getQueryShardContext().bitsetFilter(parentFilter).getBitSet(subReaderContext);

int currentParent = hit.docId() - subReaderContext.docBase;
int previousParent = parentBits.prevSetBit(currentParent - 1);
int childDocId = childIter.advance(previousParent + 1);
SourceLookup nestedVecSourceLookup = new SourceLookup();

// when nested field only have vector field and exclude source, list is empty
boolean isEmpty = nestedDocList.isEmpty();

for (int offset = 0; childDocId < currentParent && childDocId != DocIdSetIterator.NO_MORE_DOCS; childDocId = childIter
.nextDoc(), offset++) {
nestedVecSourceLookup.setSegmentAndDocument(subReaderContext, childDocId);
List<Object> nestedVecDocValuesSource = f.fetcher.fetchValues(nestedVecSourceLookup);
if (nestedVecDocValuesSource == null || nestedVecDocValuesSource.isEmpty()) {
continue;
}
if (isEmpty) {
nestedDocList.add(new HashMap<String, Object>());
}
if (offset < nestedDocList.size()) {
Object o2 = nestedDocList.get(offset);
if (o2 instanceof Map) {
Map<String, Object> o2map = (Map<String, Object>) o2;
String suffix = f.field.substring(path.length() + 1);
o2map.put(suffix, nestedVecDocValuesSource.get(0));
}
} else {
/**
* TODO nested field partial doc only have vector and source exclude
* this source map nestedDocList would out-of-order, can not fill the vector into right offset
* "nested_field" : [
* {"nested_vector": [2.6, 2.6]},
* {"nested_numeric": 2, "nested_vector": [3.1, 2.3]}
* ]
*/

throw new UnsupportedOperationException(
String.format(
"\"Nested Path \"%s\" in Field \"%s\" with _ID \"%s\" can not be empty\"",
path,
f.field,
hit.getId()
)
);
}
innerProcessOneNestedField(objectMapper, hitContext, nestedDocList, f, path);
}
}
}

private void innerProcessOneNestedField(
ObjectMapper objectMapper,
HitContext hitContext,
ArrayList nestedDocList,
DocValueField f,
String path
) throws IOException {

BitSet parentBits = getParentDocBitSet(hitContext);
DocIdSetIterator childIter = getChildDocIdSetIterator(objectMapper, hitContext);
LeafReaderContext subReaderContext = hitContext.readerContext();

SearchHit hit = hitContext.hit();
int currentParent = hit.docId() - subReaderContext.docBase;
int previousParent = parentBits.prevSetBit(currentParent - 1);
int childDocId = childIter.advance(previousParent + 1);
SourceLookup nestedVecSourceLookup = new SourceLookup();

// when nested field only have vector field and exclude source, list is empty
boolean isEmpty = nestedDocList.isEmpty();

for (int offset = 0; childDocId < currentParent && childDocId != DocIdSetIterator.NO_MORE_DOCS; childDocId = childIter
.nextDoc(), offset++) {
nestedVecSourceLookup.setSegmentAndDocument(subReaderContext, childDocId);
List<Object> nestedVecDocValuesSource = f.fetcher.fetchValues(nestedVecSourceLookup);
if (nestedVecDocValuesSource == null || nestedVecDocValuesSource.isEmpty()) {
continue;
}
if (isEmpty) {
nestedDocList.add(new HashMap<String, Object>());
}
if (offset < nestedDocList.size()) {
Object sourceObj = nestedDocList.get(offset);
if (sourceObj instanceof Map) {
Map<String, Object> sourceMap = (Map<String, Object>) sourceObj;
String suffix = f.field.substring(path.length() + 1);
sourceMap.put(suffix, nestedVecDocValuesSource.get(0));
}
} else {
/**
* TODO nested field partial doc only have vector and source exclude
* this source map nestedDocList would out-of-order, can not fill the vector into right offset
* "nested_field" : [
* {"nested_vector": [2.6, 2.6]},
* {"nested_numeric": 2, "nested_vector": [3.1, 2.3]}
* ]
*/
throw new UnsupportedOperationException(
String.format("\"Nested Path \"%s\" in Field \"%s\" with _ID \"%s\" can not be empty\"", path, f.field, hit.getId())
);
}
}
}

private BitSet getParentDocBitSet(HitContext hitContext) throws IOException {
Query parentFilter = Queries.newNonNestedFilter();
LeafReaderContext subReaderContext = hitContext.readerContext();
BitSet parentBits = fetchContext.getQueryShardContext().bitsetFilter(parentFilter).getBitSet(subReaderContext);
return parentBits;
}

private DocIdSetIterator getChildDocIdSetIterator(ObjectMapper objectMapper, HitContext hitContext) throws IOException {
Query childFilter = objectMapper.nestedTypeFilter();
ContextIndexSearcher searcher = fetchContext.searcher();
LeafReaderContext subReaderContext = hitContext.readerContext();
final Weight childWeight = searcher.createWeight(searcher.rewrite(childFilter), ScoreMode.COMPLETE_NO_SCORES, 1f);
Scorer childScorer = childWeight.scorer(subReaderContext);
DocIdSetIterator childIter = childScorer.iterator();
return childIter;
}

private boolean checkNestedField(String path, DocValueField f, Map<String, Object> sourceMaps) {
if (!f.field.startsWith(path)) {
return false;
}
if (!sourceMaps.containsKey(path)) {
return false;
}

// path to nested field:
Object nestedObj = sourceMaps.get(path);
if (!(nestedObj instanceof ArrayList)) {
return false;
}
return true;
}
}

@Getter
Expand Down

0 comments on commit 0cc0543

Please sign in to comment.