Skip to content

Commit

Permalink
Add UTs for optimization
Browse files Browse the repository at this point in the history
Signed-off-by: Sandesh Kumar <[email protected]>
  • Loading branch information
sandeshkr419 committed Feb 23, 2024
1 parent d44aaf1 commit 7378906
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
return;
}
int globalOrd = singleValues.ordValue();
//Hello
collectionStrategy.collectGlobalOrd(owningBucketOrd, doc, globalOrd, sub);
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.opensearch.common.TriConsumer;
import org.opensearch.common.geo.GeoPoint;
import org.opensearch.common.network.InetAddresses;
import org.opensearch.common.settings.Settings;
Expand Down Expand Up @@ -143,6 +144,18 @@ public class TermsAggregatorTests extends AggregatorTestCase {
private static final String STRING_SCRIPT_NAME = "string_script";
private static final String STRING_SCRIPT_OUTPUT = "Orange";

private static final Consumer<TermsAggregator> DEFAULT_POST_COLLECTION = termsAggregator -> {
try {
termsAggregator.postCollection();
} catch (IOException e) {
throw new RuntimeException(e);
}
};

// aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used.
// using NOOP_POST_COLLECTION_CONSUMER ensures that the bucket count in aggregation is completed before/without running postCollection()
private static final Consumer<TermsAggregator> NOOP_POST_COLLECTION_CONSUMER = termsAggregator -> {};

@Override
protected MapperService mapperServiceMock() {
MapperService mapperService = mock(MapperService.class);
Expand Down Expand Up @@ -257,24 +270,54 @@ public void testUsesGlobalOrdinalsByDefault() throws Exception {
directory.close();
}

public void testSimple() throws Exception {
/**
* This test case utilizes the low cardinality implementation of GlobalOrdinalsStringTermsAggregator.
* In this case, the segment terms will not get initialized and will run without LeafBucketCollector#termDocFreqCollector optimization
*/
public void testSimpleAggregation() throws Exception {
testSimple(
(document, field, value) -> document.add(new SortedSetDocValuesField(field, new BytesRef(value))),
DEFAULT_POST_COLLECTION
);
}

/**
* This test case utilizes the low cardinality implementation of GlobalOrdinalsStringTermsAggregator.
* In this case, the segment terms will get initialized and will use LeafBucketCollector#termDocFreqCollector optimization
*/
public void testSimpleAggregationWithStoredValues() throws Exception {
// aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used.
// This also verifies that the bucket count is completed without running postCollection()
testSimple((document, field, value) -> {
document.add(new SortedSetDocValuesField(field, new BytesRef(value)));
document.add(new StringField(field, value, Field.Store.NO));
}, NOOP_POST_COLLECTION_CONSUMER);

}

/**
* This is a utility method to test out string terms aggregation
* @param addFieldConsumer a function that determines how a field is added to the document
*/
private void testSimple(TriConsumer<Document, String, String> addFieldConsumer, Consumer<TermsAggregator> postCollectionConsumer)
throws Exception {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
Document document = new Document();
document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
addFieldConsumer.apply(document, "string", "a");
addFieldConsumer.apply(document, "string", "b");
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("string", new BytesRef("")));
document.add(new SortedSetDocValuesField("string", new BytesRef("c")));
document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
addFieldConsumer.apply(document, "string", "");
addFieldConsumer.apply(document, "string", "c");
addFieldConsumer.apply(document, "string", "a");
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
document.add(new SortedSetDocValuesField("string", new BytesRef("d")));
addFieldConsumer.apply(document, "string", "b");
addFieldConsumer.apply(document, "string", "d");
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("string", new BytesRef("")));
addFieldConsumer.apply(document, "string", "");
indexWriter.addDocument(document);
try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
IndexSearcher indexSearcher = newIndexSearcher(indexReader);
Expand All @@ -287,7 +330,7 @@ public void testSimple() throws Exception {
TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
aggregator.preCollection();
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
postCollectionConsumer.accept(aggregator);
Terms result = reduce(aggregator);
assertEquals(5, result.getBuckets().size());
assertEquals("", result.getBuckets().get(0).getKeyAsString());
Expand All @@ -307,38 +350,63 @@ public void testSimple() throws Exception {
}
}

/**
* This test case utilizes the default implementation of GlobalOrdinalsStringTermsAggregator.
* In this case, the segment terms will not get initialized and will run without LeafBucketCollector#termDocFreqCollector optimization
*/
public void testStringIncludeExclude() throws Exception {
testStringIncludeExclude(
(document, field, value) -> document.add(new SortedSetDocValuesField(field, new BytesRef(value))),
DEFAULT_POST_COLLECTION
);
}

/**
* This test case utilizes the default implementation of GlobalOrdinalsStringTermsAggregator.
* In this case, the segment terms will get initialized and will use LeafBucketCollector#termDocFreqCollector optimization
*/
public void testStringIncludeExcludeWithStoredValues() throws Exception {
// aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used
// This also verifies that the bucket count is completed without running postCollection()
testStringIncludeExclude((document, field, value) -> {
document.add(new SortedSetDocValuesField(field, new BytesRef(value)));
document.add(new StringField(field, value, Field.Store.NO));
}, NOOP_POST_COLLECTION_CONSUMER);
}

private void testStringIncludeExclude(TriConsumer<Document, String, String> addField, Consumer<TermsAggregator> postCollectionConsumer)
throws Exception {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
Document document = new Document();
document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val000")));
document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val001")));
document.add(new SortedDocValuesField("sv_field", new BytesRef("val001")));
addField.apply(document, "mv_field", "val000");
addField.apply(document, "mv_field", "val001");
addField.apply(document, "sv_field", "val001");
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val002")));
document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val003")));
document.add(new SortedDocValuesField("sv_field", new BytesRef("val003")));
addField.apply(document, "mv_field", "val002");
addField.apply(document, "mv_field", "val003");
addField.apply(document, "sv_field", "val003");
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val004")));
document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val005")));
document.add(new SortedDocValuesField("sv_field", new BytesRef("val005")));
addField.apply(document, "mv_field", "val004");
addField.apply(document, "mv_field", "val005");
addField.apply(document, "sv_field", "val005");
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val006")));
document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val007")));
document.add(new SortedDocValuesField("sv_field", new BytesRef("val007")));
addField.apply(document, "mv_field", "val006");
addField.apply(document, "mv_field", "val007");
addField.apply(document, "sv_field", "val007");
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val008")));
document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val009")));
document.add(new SortedDocValuesField("sv_field", new BytesRef("val009")));
addField.apply(document, "mv_field", "val008");
addField.apply(document, "mv_field", "val009");
addField.apply(document, "sv_field", "val009");
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val010")));
document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val011")));
document.add(new SortedDocValuesField("sv_field", new BytesRef("val011")));
addField.apply(document, "mv_field", "val010");
addField.apply(document, "mv_field", "val011");
addField.apply(document, "sv_field", "val011");
indexWriter.addDocument(document);
try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
IndexSearcher indexSearcher = newIndexSearcher(indexReader);
Expand All @@ -355,7 +423,7 @@ public void testStringIncludeExclude() throws Exception {
TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
aggregator.preCollection();
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
postCollectionConsumer.accept(aggregator);
Terms result = reduce(aggregator);
assertEquals(10, result.getBuckets().size());
assertEquals("val000", result.getBuckets().get(0).getKeyAsString());
Expand Down Expand Up @@ -390,7 +458,7 @@ public void testStringIncludeExclude() throws Exception {
aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType2);
aggregator.preCollection();
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
postCollectionConsumer.accept(aggregator);
result = reduce(aggregator);
assertEquals(5, result.getBuckets().size());
assertEquals("val001", result.getBuckets().get(0).getKeyAsString());
Expand All @@ -414,7 +482,7 @@ public void testStringIncludeExclude() throws Exception {
aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
aggregator.preCollection();
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
postCollectionConsumer.accept(aggregator);
result = reduce(aggregator);
assertEquals(8, result.getBuckets().size());
assertEquals("val002", result.getBuckets().get(0).getKeyAsString());
Expand Down Expand Up @@ -443,7 +511,7 @@ public void testStringIncludeExclude() throws Exception {
aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
aggregator.preCollection();
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
postCollectionConsumer.accept(aggregator);
result = reduce(aggregator);
assertEquals(2, result.getBuckets().size());
assertEquals("val010", result.getBuckets().get(0).getKeyAsString());
Expand All @@ -460,7 +528,7 @@ public void testStringIncludeExclude() throws Exception {
aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
aggregator.preCollection();
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
postCollectionConsumer.accept(aggregator);
result = reduce(aggregator);
assertEquals(2, result.getBuckets().size());
assertEquals("val000", result.getBuckets().get(0).getKeyAsString());
Expand Down Expand Up @@ -492,7 +560,7 @@ public void testStringIncludeExclude() throws Exception {
aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
aggregator.preCollection();
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
postCollectionConsumer.accept(aggregator);
result = reduce(aggregator);
assertEquals(2, result.getBuckets().size());
assertEquals("val000", result.getBuckets().get(0).getKeyAsString());
Expand Down Expand Up @@ -1543,5 +1611,4 @@ private <T extends InternalAggregation> T reduce(Aggregator agg) throws IOExcept
doAssertReducedMultiBucketConsumer(result, reduceBucketConsumer);
return result;
}

}

0 comments on commit 7378906

Please sign in to comment.