Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complete keyword changes for star tree #16233

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public class StarTreeMapperIT extends OpenSearchIntegTestCase {
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB))
.build();

private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean keywordDim) {
private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean ipdim) {
try {
return jsonBuilder().startObject()
.startObject("composite")
Expand All @@ -68,12 +68,15 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", getDim(invalidDim, keywordDim))
.field("name", getDim(invalidDim, ipdim))
.endObject()
.startObject()
.field("name", "keyword_dv")
.endObject()
.endArray()
.startArray("metrics")
.startObject()
.field("name", getDim(invalidMetric, false))
.field("name", getMetric(invalidMetric, false))
.endObject()
.endArray()
.endObject()
Expand All @@ -99,6 +102,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.field("type", "keyword")
.field("doc_values", false)
.endObject()
.startObject("ip")
.field("type", "ip")
.field("doc_values", false)
.endObject()
.endObject()
.endObject();
} catch (IOException e) {
Expand Down Expand Up @@ -356,10 +363,19 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
}

private static String getDim(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return random().nextBoolean() ? "numeric" : "keyword";
} else if (isKeyword) {
return "ip";
}
return "numeric_dv";
}

private static String getMetric(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return "numeric";
} else if (isKeyword) {
return "keyword";
return "ip";
}
return "numeric_dv";
}
Expand Down Expand Up @@ -398,6 +414,7 @@ public void testValidCompositeIndex() {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("keyword_dv", starTreeFieldType.getDimensions().get(2).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
List<MetricStat> expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG);
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());
Expand Down Expand Up @@ -665,10 +682,7 @@ public void testInvalidDimCompositeIndex() {
IllegalArgumentException.class,
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(true, false, false)).get()
);
assertEquals(
"Aggregations not supported for the dimension field [numeric] with field type [integer] as part of star tree field",
ex.getMessage()
);
assertTrue(ex.getMessage().startsWith("Aggregations not supported for the dimension field "));
}

public void testMaxDimsCompositeIndex() {
Expand Down Expand Up @@ -734,7 +748,7 @@ public void testUnsupportedDim() {
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, true)).get()
);
assertEquals(
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [keyword] as part of star tree field [startree-1]",
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [ip] as part of star tree field [startree-1]",
ex.getMessage()
);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.index;

import org.apache.lucene.search.DocIdSetIterator;

/**
* Base wrapper class for DocValuesWriter.
*/
public abstract class DocValuesWriterWrapper<T extends DocIdSetIterator> {
public abstract T getDocValues();
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*
* @opensearch.experimental
*/
public class SortedNumericDocValuesWriterWrapper {
public class SortedNumericDocValuesWriterWrapper extends DocValuesWriterWrapper<SortedNumericDocValues> {

private final SortedNumericDocValuesWriter sortedNumericDocValuesWriter;

Expand Down Expand Up @@ -47,6 +47,7 @@ public void addValue(int docID, long value) {
*
* @return the {@link SortedNumericDocValues} instance
*/
@Override
public SortedNumericDocValues getDocValues() {
return sortedNumericDocValuesWriter.getDocValues();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.index;

import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;

/**
* A wrapper class for writing sorted set doc values.
* <p>
* This class provides a convenient way to add sorted set doc values to a field
* and retrieve the corresponding {@link SortedSetDocValues} instance.
*
* @opensearch.experimental
*/
public class SortedSetDocValuesWriterWrapper extends DocValuesWriterWrapper<SortedSetDocValues> {

private final SortedSetDocValuesWriter sortedSetDocValuesWriterWrapper;

/**
* Sole constructor. Constructs a new {@link SortedSetDocValuesWriterWrapper} instance.
*
* @param fieldInfo the field information for the field being written
* @param counter a counter for tracking memory usage
* @param byteBlockPool a byte block pool for allocating byte blocks
* @see SortedSetDocValuesWriter
*/
public SortedSetDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter, ByteBlockPool byteBlockPool) {
sortedSetDocValuesWriterWrapper = new SortedSetDocValuesWriter(fieldInfo, counter, byteBlockPool);
}

/**
* Adds a bytes ref value to the sorted set doc values for the specified document.
*
* @param docID the document ID
* @param value the value to add
*/
public void addValue(int docID, BytesRef value) {
sortedSetDocValuesWriterWrapper.addValue(docID, value);
}

/**
* Returns the {@link SortedSetDocValues} instance containing the sorted numeric doc values
*
* @return the {@link SortedSetDocValues} instance
*/
@Override
public SortedSetDocValues getDocValues() {
return sortedSetDocValuesWriterWrapper.getDocValues();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ public class FeatureFlags {
* aggregations.
*/
public static final String STAR_TREE_INDEX = "opensearch.experimental.feature.composite_index.star_tree.enabled";
public static final Setting<Boolean> STAR_TREE_INDEX_SETTING = Setting.boolSetting(STAR_TREE_INDEX, false, Property.NodeScope);
public static final Setting<Boolean> STAR_TREE_INDEX_SETTING = Setting.boolSetting(STAR_TREE_INDEX, true, Property.NodeScope);

/**
* Gates the functionality of application based configuration templates.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
Expand All @@ -40,6 +41,7 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -111,7 +113,7 @@ public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState r
readState.segmentInfo.getId(),
readState.segmentSuffix
);

Map<String, DocValuesType> dimensionFieldTypeMap = new HashMap<>();
while (true) {

// validate magic marker
Expand Down Expand Up @@ -155,13 +157,15 @@ public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState r
compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput);
compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata);

List<String> dimensionFields = starTreeMetadata.getDimensionFields();

Map<String, DocValuesType> dimensionFieldToDocValuesMap = starTreeMetadata.getDimensionFields();
// generating star tree unique fields (fully qualified name for dimension and metrics)
for (String dimensions : dimensionFields) {
fields.add(fullyQualifiedFieldNameForStarTreeDimensionsDocValues(compositeFieldName, dimensions));
for (Map.Entry<String, DocValuesType> dimensionEntry : dimensionFieldToDocValuesMap.entrySet()) {
String dimName = fullyQualifiedFieldNameForStarTreeDimensionsDocValues(
compositeFieldName,
dimensionEntry.getKey()
);
fields.add(dimName);
}

// adding metric fields
for (Metric metric : starTreeMetadata.getMetrics()) {
for (MetricStat metricStat : metric.getBaseMetrics()) {
Expand All @@ -184,7 +188,7 @@ public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState r

// populates the dummy list of field infos to fetch doc id set iterators for respective fields.
// the dummy field info is used to fetch the doc id set iterators for respective fields based on field name
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields));
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields, dimensionFieldTypeMap));
this.readState = new SegmentReadState(readState.directory, readState.segmentInfo, fieldInfos, readState.context);

// initialize star-tree doc values producer
Expand Down Expand Up @@ -298,4 +302,8 @@ public static SortedNumericDocValues getSortedNumericDocValues(SortedNumericDocV
return sortedNumeric == null ? DocValues.emptySortedNumeric() : sortedNumeric;
}

public static SortedSetDocValues getSortedSetDocValues(SortedSetDocValues sortedSetDv) {
return sortedSetDv == null ? DocValues.emptySortedSet() : sortedSetDv;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.IndexOutput;
import org.opensearch.common.annotation.ExperimentalApi;
Expand All @@ -34,6 +35,7 @@
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues;
import org.opensearch.index.mapper.CompositeMappedFieldType;
import org.opensearch.index.mapper.DocCountFieldMapper;
import org.opensearch.index.mapper.KeywordFieldMapper;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.mapper.StarTreeMapper;

Expand Down Expand Up @@ -71,6 +73,7 @@ public class Composite99DocValuesWriter extends DocValuesConsumer {
private final AtomicInteger fieldNumberAcrossCompositeFields;

private final Map<String, DocValuesProducer> fieldProducerMap = new HashMap<>();
private final Map<String, SortedSetDocValues> fieldDocIdSetIteratorMap = new HashMap<>();

public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState segmentWriteState, MapperService mapperService)
throws IOException {
Expand All @@ -82,14 +85,7 @@ public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState
this.compositeMappedFieldTypes = mapperService.getCompositeFieldTypes();
compositeFieldSet = new HashSet<>();
segmentFieldSet = new HashSet<>();
// TODO : add integ test for this
for (FieldInfo fi : this.state.fieldInfos) {
if (DocValuesType.SORTED_NUMERIC.equals(fi.getDocValuesType())) {
segmentFieldSet.add(fi.name);
} else if (fi.name.equals(DocCountFieldMapper.NAME)) {
segmentFieldSet.add(fi.name);
}
}
addStarTreeSupportedFieldsFromSegment();
for (CompositeMappedFieldType type : compositeMappedFieldTypes) {
compositeFieldSet.addAll(type.fields());
}
Expand Down Expand Up @@ -148,6 +144,19 @@ public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState
segmentHasCompositeFields = Collections.disjoint(segmentFieldSet, compositeFieldSet) == false;
}

private void addStarTreeSupportedFieldsFromSegment() {
// TODO : add integ test for this
for (FieldInfo fi : this.state.fieldInfos) {
if (DocValuesType.SORTED_NUMERIC.equals(fi.getDocValuesType())) {
segmentFieldSet.add(fi.name);
} else if (DocValuesType.SORTED_SET.equals(fi.getDocValuesType())) {
segmentFieldSet.add(fi.name);
} else if (fi.name.equals(DocCountFieldMapper.NAME)) {
segmentFieldSet.add(fi.name);
}
}
}

@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addNumericField(field, valuesProducer);
Expand Down Expand Up @@ -179,6 +188,15 @@ public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProdu
@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addSortedSetField(field, valuesProducer);
// Perform this only during flush flow
if (mergeState.get() == null && segmentHasCompositeFields) {
createCompositeIndicesIfPossible(valuesProducer, field);
}
if (mergeState.get() != null) {
if (compositeFieldSet.contains(field.name)) {
fieldDocIdSetIteratorMap.put(field.name, valuesProducer.getSortedSet(field));
}
}
}

@Override
Expand Down Expand Up @@ -235,6 +253,7 @@ private void createCompositeIndicesIfPossible(DocValuesProducer valuesProducer,
* Add empty doc values for fields not present in segment
*/
private void addDocValuesForEmptyField(String compositeField) {
// special case for doc count
if (compositeField.equals(DocCountFieldMapper.NAME)) {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
Expand All @@ -243,16 +262,29 @@ public NumericDocValues getNumeric(FieldInfo field) {
}
});
} else {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
return DocValues.emptySortedNumeric();
}
});
if (isSortedSetField(compositeField)) {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) {
return DocValues.emptySortedSet();
}
});
} else {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
return DocValues.emptySortedNumeric();
}
});
}
}
compositeFieldSet.remove(compositeField);
}

private boolean isSortedSetField(String field) {
return mapperService.fieldType(field) instanceof KeywordFieldMapper.KeywordFieldType;
}

@Override
public void merge(MergeState mergeState) throws IOException {
this.mergeState.compareAndSet(null, mergeState);
Expand Down Expand Up @@ -313,7 +345,14 @@ private void mergeStarTreeFields(MergeState mergeState) throws IOException {
}
}
try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService, fieldNumberAcrossCompositeFields)) {
starTreesBuilder.buildDuringMerge(metaOut, dataOut, starTreeSubsPerField, composite99DocValuesConsumer);
starTreesBuilder.buildDuringMerge(
metaOut,
dataOut,
starTreeSubsPerField,
composite99DocValuesConsumer,
mergeState,
fieldDocIdSetIteratorMap
);
}
}

Expand Down
Loading
Loading