Skip to content

Commit e8f9e61

Browse files
committed
Sort terms once when terms query is created for indexed and docValues keyword field.
Signed-off-by: Mikhail Khludnev <[email protected]>
1 parent 791faed commit e8f9e61

File tree

4 files changed

+60
-11
lines changed

4 files changed

+60
-11
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3939
- Create and attach interclusterTest and yamlRestTest code coverage reports to gradle check task([#19165](https://github.com/opensearch-project/OpenSearch/pull/19165))
4040
- Optimized date histogram aggregations by preventing unnecessary object allocations in date rounding utils ([19088](https://github.com/opensearch-project/OpenSearch/pull/19088))
4141
- Optimize source conversion in gRPC search hits using zero-copy BytesRef ([#19280](https://github.com/opensearch-project/OpenSearch/pull/19280))
42+
- Faster `terms` query creation for `keyword` field with index docValues enabled and terms were pre-sorted ([#19350](https://github.com/opensearch-project/OpenSearch/pull/19350))
4243

4344
### Fixed
4445
- Fix unnecessary refreshes on update preparation failures ([#15261](https://github.com/opensearch-project/OpenSearch/issues/15261))

server/src/main/java/org/opensearch/index/mapper/BytesRefsCollectionBuilder.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424

2525
/**
2626
* Purposed for passing terms into {@link TermInSetQuery}.
27-
* If the given terms are sorted already, it wrap it with a SortedSet stub.
28-
* Otherwise, it passes terms as list.
27+
* If the given terms are sorted already, it wraps terms with a SortedSet stub.
28+
* Otherwise, it passes terms as a list.
2929
*/
3030
public class BytesRefsCollectionBuilder implements Consumer<BytesRef>, Supplier<Collection<BytesRef>> {
3131

server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -500,22 +500,41 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
500500
return super.termsQuery(values, context);
501501
}
502502
BytesRefsCollectionBuilder iBytesRefs = new BytesRefsCollectionBuilder(values.size());
503-
BytesRefsCollectionBuilder dVByteRefs = new BytesRefsCollectionBuilder(values.size());
503+
BytesRefsCollectionBuilder dVByteRefs = null;
504504
for (int i = 0; i < values.size(); i++) {
505-
BytesRef idxBytes = indexedValueForSearch(values.get(i));
505+
Object value = values.get(i);
506+
BytesRef idxBytes = indexedValueForSearch(value);
506507
iBytesRefs.accept(idxBytes);
507-
BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(values.get(i)));
508-
dVByteRefs.accept(dvBytes);
508+
509+
Object rewritten = rewriteForDocValue(value);
510+
if (dVByteRefs == null) { // needs to check
511+
if (rewritten != value && !rewritten.equals(value)) {
512+
// first time index and dv are divergent
513+
dVByteRefs = new BytesRefsCollectionBuilder(values.size());
514+
for (int rewind = 0; rewind <= i; rewind++) {
515+
Object rewrittenOld = rewind < i ? rewriteForDocValue(values.get(rewind)) : rewritten;
516+
BytesRef dvBytesOld = indexedValueForSearch(rewrittenOld);
517+
dVByteRefs.accept(dvBytesOld);
518+
}
519+
}
520+
} else {
521+
BytesRef dvBytes = indexedValueForSearch(rewritten);
522+
dVByteRefs.accept(dvBytes);
523+
}
524+
}
525+
if (dVByteRefs == null) { // index and docValues are the same, pack them once
526+
return TermInSetQuery.newIndexOrDocValuesQuery(MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, name(), iBytesRefs.get());
527+
} else {
528+
Query indexQuery = new TermInSetQuery(MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, name(), iBytesRefs.get());
529+
Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), dVByteRefs.get());
530+
return new IndexOrDocValuesQuery(indexQuery, dvQuery);
509531
}
510-
Query indexQuery = new TermInSetQuery(MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, name(), iBytesRefs.get());
511-
Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), dVByteRefs.get());
512-
return new IndexOrDocValuesQuery(indexQuery, dvQuery);
513532
}
514533
// if we only have doc_values enabled, we construct a new query with doc_values re-written
515534
if (hasDocValues()) {
516535
BytesRefsCollectionBuilder bytesCollector = new BytesRefsCollectionBuilder(values.size());
517-
for (int i = 0; i < values.size(); i++) {
518-
BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(values.get(i)));
536+
for (Object value : values) {
537+
BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(value));
519538
bytesCollector.accept(dvBytes);
520539
}
521540
return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesCollector.get());

server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,35 @@ public void testTermsSortedQuery() {
229229
assertEquals(expectedDocValues, onlyDocValues.termsQuery(sortedStrings, null));
230230
}
231231

232+
public void testTermsQuery_WhenDocValuesTrickyRewrite() {
233+
MappedFieldType ft = new KeywordFieldType("field") {
234+
@Override
235+
protected Object rewriteForDocValue(Object value) {
236+
String strVal = (String) value;
237+
if (strVal.compareTo("a") >= 0) { // prepend letters in DV with underscore
238+
return "_" + strVal;
239+
} else {
240+
if (randomBoolean()) {
241+
return super.rewriteForDocValue(value);
242+
} else {
243+
return value;
244+
}
245+
}
246+
}
247+
};
248+
List<String> strings = List.of("1", "2", "3", "4", "5", "a", "b", "c", "d", "e");
249+
List<String> dvStrings = List.of("1", "2", "3", "4", "5", "_a", "_b", "_c", "_d", "_e");
250+
Query expected = new IndexOrDocValuesQuery(
251+
new TermInSetQuery("field", strings.stream().map(BytesRef::new).toList()),
252+
new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", dvStrings.stream().map(BytesRef::new).toList())
253+
);
254+
if (rarely()) {
255+
strings = new ArrayList<>(strings);
256+
Collections.shuffle(strings, random());
257+
}
258+
assertEquals(expected, ft.termsQuery(strings, MOCK_QSC_ENABLE_INDEX_DOC_VALUES));
259+
}
260+
232261
public void testExistsQuery() {
233262
{
234263
KeywordFieldType ft = new KeywordFieldType("field");

0 commit comments

Comments
 (0)