diff --git a/CHANGELOG.md b/CHANGELOG.md index 269988e3fd61d..cccd52e9fce45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Create and attach interclusterTest and yamlRestTest code coverage reports to gradle check task([#19165](https://github.com/opensearch-project/OpenSearch/pull/19165)) - Optimized date histogram aggregations by preventing unnecessary object allocations in date rounding utils ([19088](https://github.com/opensearch-project/OpenSearch/pull/19088)) - Optimize source conversion in gRPC search hits using zero-copy BytesRef ([#19280](https://github.com/opensearch-project/OpenSearch/pull/19280)) +- Faster `terms` query creation for `keyword` field with index and docValues enabled ([#19350](https://github.com/opensearch-project/OpenSearch/pull/19350)) - Allow plugins to copy folders into their config dir during installation ([#19343](https://github.com/opensearch-project/OpenSearch/pull/19343)) - Add failureaccess as runtime dependency to transport-grpc module ([#19339](https://github.com/opensearch-project/OpenSearch/pull/19339)) diff --git a/server/src/main/java/org/opensearch/index/mapper/BytesRefsCollectionBuilder.java b/server/src/main/java/org/opensearch/index/mapper/BytesRefsCollectionBuilder.java index 2fbb01b2ced25..c3a91f23972d0 100644 --- a/server/src/main/java/org/opensearch/index/mapper/BytesRefsCollectionBuilder.java +++ b/server/src/main/java/org/opensearch/index/mapper/BytesRefsCollectionBuilder.java @@ -24,8 +24,8 @@ /** * Purposed for passing terms into {@link TermInSetQuery}. - * If the given terms are sorted already, it wrap it with a SortedSet stub. - * Otherwise, it passes terms as list. + * If the given terms are sorted already, it wraps terms with a SortedSet stub. + * Otherwise, it passes terms as a list. */ public class BytesRefsCollectionBuilder implements Consumer, Supplier> { diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index 7ace516459763..3271f60a466ee 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -519,22 +519,41 @@ public Query termsQuery(List values, QueryShardContext context) { return super.termsQuery(values, context); } BytesRefsCollectionBuilder iBytesRefs = new BytesRefsCollectionBuilder(values.size()); - BytesRefsCollectionBuilder dVByteRefs = new BytesRefsCollectionBuilder(values.size()); + BytesRefsCollectionBuilder dVByteRefs = null; for (int i = 0; i < values.size(); i++) { - BytesRef idxBytes = indexedValueForSearch(values.get(i)); + Object value = values.get(i); + BytesRef idxBytes = indexedValueForSearch(value); iBytesRefs.accept(idxBytes); - BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(values.get(i))); - dVByteRefs.accept(dvBytes); + + Object rewritten = rewriteForDocValue(value); + if (dVByteRefs == null) { // needs to check + if (rewritten != value && !rewritten.equals(value)) { + // first time index and dv are divergent + dVByteRefs = new BytesRefsCollectionBuilder(values.size()); + for (int rewind = 0; rewind <= i; rewind++) { + Object rewrittenOld = rewind < i ? rewriteForDocValue(values.get(rewind)) : rewritten; + BytesRef dvBytesOld = indexedValueForSearch(rewrittenOld); + dVByteRefs.accept(dvBytesOld); + } + } + } else { + BytesRef dvBytes = indexedValueForSearch(rewritten); + dVByteRefs.accept(dvBytes); + } + } + if (dVByteRefs == null) { // index and docValues are the same, pack them once + return TermInSetQuery.newIndexOrDocValuesQuery(MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, name(), iBytesRefs.get()); + } else { + Query indexQuery = new TermInSetQuery(MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, name(), iBytesRefs.get()); + Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), dVByteRefs.get()); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); } - Query indexQuery = new TermInSetQuery(MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, name(), iBytesRefs.get()); - Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), dVByteRefs.get()); - return new IndexOrDocValuesQuery(indexQuery, dvQuery); } // if we only have doc_values enabled, we construct a new query with doc_values re-written if (hasDocValues()) { BytesRefsCollectionBuilder bytesCollector = new BytesRefsCollectionBuilder(values.size()); - for (int i = 0; i < values.size(); i++) { - BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(values.get(i))); + for (Object value : values) { + BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(value)); bytesCollector.accept(dvBytes); } return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesCollector.get()); diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java index de2b743ba42b4..90bd52d016549 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java @@ -37,6 +37,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -441,7 +442,8 @@ public void testExistsQuery() { } public void testTermsQuery() { - + List values = Arrays.asList("foo", "bar"); + Collections.shuffle(values, random()); // 1.test isSearchable=true, hasDocValues=true, mappedFieldTypeName=null { FlatObjectFieldMapper.FlatObjectFieldType ft = (FlatObjectFieldMapper.FlatObjectFieldType) getFlatParentFieldType( @@ -461,7 +463,7 @@ public void testTermsQuery() { new TermInSetQuery(DOC_VALUES_REWRITE, "field" + VALUE_SUFFIX, docValueterms) ); - assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); + assertEquals(expected, ft.termsQuery(values, MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); } // test isSearchable=true, hasDocValues=true, mappedFieldTypeName!=null @@ -484,7 +486,7 @@ public void testTermsQuery() { new TermInSetQuery(DOC_VALUES_REWRITE, "field" + VALUE_AND_PATH_SUFFIX, docValueterms) ); - assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); + assertEquals(expected, ft.termsQuery(values, MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); } // 2.test isSearchable=true, hasDocValues=false, mappedFieldTypeName=null @@ -500,7 +502,7 @@ public void testTermsQuery() { indexTerms.add(new BytesRef("bar")); Query expected = new TermInSetQuery("field" + VALUE_SUFFIX, indexTerms); - assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); + assertEquals(expected, ft.termsQuery(values, MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); } // test isSearchable=true, hasDocValues=false, mappedFieldTypeName!=null @@ -516,7 +518,7 @@ public void testTermsQuery() { indexTerms.add(new BytesRef("field.field1=bar")); Query expected = new TermInSetQuery("field" + VALUE_AND_PATH_SUFFIX, indexTerms); - assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), null)); + assertEquals(expected, ft.termsQuery(values, null)); } // 3.test isSearchable=false, hasDocValues=true, mappedFieldTypeName=null @@ -536,7 +538,7 @@ public void testTermsQuery() { docValueterms.add(new BytesRef("field.bar")); Query expected = new TermInSetQuery(DOC_VALUES_REWRITE, "field" + VALUE_SUFFIX, docValueterms); - assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); + assertEquals(expected, ft.termsQuery(values, MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); } // test isSearchable=false, hasDocValues=true, mappedFieldTypeName!=null @@ -553,7 +555,7 @@ public void testTermsQuery() { docValueTerms.add(new BytesRef("field.field.field1=bar")); Query expected = new TermInSetQuery(DOC_VALUES_REWRITE, "field" + VALUE_AND_PATH_SUFFIX, docValueTerms); - assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); + assertEquals(expected, ft.termsQuery(values, MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); } // 4.test isSearchable=false, hasDocValues=false, mappedFieldTypeName=null @@ -566,7 +568,7 @@ public void testTermsQuery() { ); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> ft.termsQuery(Arrays.asList("foo", "bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES) + () -> ft.termsQuery(values, MOCK_QSC_ENABLE_INDEX_DOC_VALUES) ); assertEquals( "Cannot search on field [field] since it is both not indexed, and does not have doc_values " + "enabled.", @@ -584,7 +586,7 @@ public void testTermsQuery() { ); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> ft.termsQuery(Arrays.asList("foo", "bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES) + () -> ft.termsQuery(values, MOCK_QSC_ENABLE_INDEX_DOC_VALUES) ); assertEquals( "Cannot search on field [field.field1] since it is both not indexed, and does not have doc_values " + "enabled.", diff --git a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java index f886ba3a750b0..2123869841fb9 100644 --- a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java @@ -229,6 +229,35 @@ public void testTermsSortedQuery() { assertEquals(expectedDocValues, onlyDocValues.termsQuery(sortedStrings, null)); } + public void testTermsQuery_WhenDocValuesTrickyRewrite() { + MappedFieldType ft = new KeywordFieldType("field") { + @Override + protected Object rewriteForDocValue(Object value) { + String strVal = (String) value; + if (strVal.compareTo("a") >= 0) { // prepend letters in DV with underscore + return "_" + strVal; + } else { + if (randomBoolean()) { + return super.rewriteForDocValue(value); + } else { + return randomBoolean() ? value : new String(strVal); + } + } + } + }; + List strings = List.of("1", "2", "3", "4", "5", "a", "b", "c", "d", "e"); + List dvStrings = List.of("1", "2", "3", "4", "5", "_a", "_b", "_c", "_d", "_e"); + Query expected = new IndexOrDocValuesQuery( + new TermInSetQuery("field", strings.stream().map(BytesRef::new).toList()), + new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", dvStrings.stream().map(BytesRef::new).toList()) + ); + if (rarely()) { + strings = new ArrayList<>(strings); + Collections.shuffle(strings, random()); + } + assertEquals(expected, ft.termsQuery(strings, MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); + } + public void testExistsQuery() { { KeywordFieldType ft = new KeywordFieldType("field");