Skip to content

Commit 1103b3f

Browse files
authored
Add skip_list param for date, scaled float and token count fields (#19142)
Signed-off-by: Asim Mahmood <[email protected]>
1 parent 6fdb010 commit 1103b3f

File tree

15 files changed

+431
-20
lines changed

15 files changed

+431
-20
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2626
- Add new extensible method to DocRequest to specify type ([#19313](https://github.com/opensearch-project/OpenSearch/pull/19313))
2727
- [Rule based auto-tagging] Add Rule based auto-tagging IT ([#18550](https://github.com/opensearch-project/OpenSearch/pull/18550))
2828
- Add all-active ingestion as docrep equivalent in pull-based ingestion ([#19316](https://github.com/opensearch-project/OpenSearch/pull/19316))
29+
- Add skip_list param for date, scaled float and token count fields ([#19142](https://github.com/opensearch-project/OpenSearch/pull/19142))
2930

3031
### Changed
3132
- Refactor `if-else` chains to use `Java 17 pattern matching switch expressions`(([#18965](https://github.com/opensearch-project/OpenSearch/pull/18965))

modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,13 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
118118
(n, c, o) -> o == null ? null : XContentMapValues.nodeDoubleValue(o),
119119
m -> toType(m).nullValue
120120
).acceptsNull();
121-
121+
private final Parameter<Boolean> skiplist = new Parameter<>(
122+
"skip_list",
123+
false,
124+
() -> false,
125+
(n, c, o) -> XContentMapValues.nodeBooleanValue(o),
126+
m -> toType(m).skiplist
127+
);
122128
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
123129

124130
public Builder(String name, Settings settings) {
@@ -148,7 +154,7 @@ Builder nullValue(double nullValue) {
148154

149155
@Override
150156
protected List<Parameter<?>> getParameters() {
151-
return Arrays.asList(indexed, hasDocValues, stored, ignoreMalformed, meta, scalingFactor, coerce, nullValue);
157+
return Arrays.asList(indexed, hasDocValues, stored, ignoreMalformed, meta, scalingFactor, coerce, nullValue, skiplist);
152158
}
153159

154160
@Override
@@ -158,6 +164,7 @@ public ScaledFloatFieldMapper build(BuilderContext context) {
158164
indexed.getValue(),
159165
stored.getValue(),
160166
hasDocValues.getValue(),
167+
skiplist.getValue(),
161168
meta.getValue(),
162169
scalingFactor.getValue(),
163170
nullValue.getValue()
@@ -182,23 +189,27 @@ public static final class ScaledFloatFieldType extends SimpleMappedFieldType imp
182189

183190
private final double scalingFactor;
184191
private final Double nullValue;
192+
private final boolean skiplist;
185193

186194
public ScaledFloatFieldType(
187195
String name,
188196
boolean indexed,
189197
boolean stored,
190198
boolean hasDocValues,
199+
boolean skiplist,
191200
Map<String, String> meta,
192201
double scalingFactor,
193202
Double nullValue
194203
) {
195204
super(name, indexed, stored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
205+
this.skiplist = skiplist;
196206
this.scalingFactor = scalingFactor;
197207
this.nullValue = nullValue;
198208
}
199209

200210
public ScaledFloatFieldType(String name, double scalingFactor) {
201-
this(name, true, false, true, Collections.emptyMap(), scalingFactor, null);
211+
// TODO: enable skiplist by default
212+
this(name, true, false, true, false, Collections.emptyMap(), scalingFactor, null);
202213
}
203214

204215
@Override
@@ -384,9 +395,9 @@ public double toDoubleValue(long value) {
384395
private final boolean indexed;
385396
private final boolean hasDocValues;
386397
private final boolean stored;
398+
private final boolean skiplist;
387399
private final Double nullValue;
388400
private final double scalingFactor;
389-
390401
private final boolean ignoreMalformedByDefault;
391402
private final boolean coerceByDefault;
392403

@@ -401,6 +412,7 @@ private ScaledFloatFieldMapper(
401412
this.indexed = builder.indexed.getValue();
402413
this.hasDocValues = builder.hasDocValues.getValue();
403414
this.stored = builder.stored.getValue();
415+
this.skiplist = builder.skiplist.getValue();
404416
this.scalingFactor = builder.scalingFactor.getValue();
405417
this.nullValue = builder.nullValue.getValue();
406418
this.ignoreMalformed = builder.ignoreMalformed.getValue();
@@ -491,7 +503,7 @@ protected void parseCreateField(ParseContext context) throws IOException {
491503
scaledValue,
492504
indexed,
493505
hasDocValues,
494-
false,
506+
skiplist,
495507
stored
496508
);
497509
context.doc().addAll(fields);

modules/mapper-extras/src/main/java/org/opensearch/index/mapper/TokenCountFieldMapper.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import java.util.List;
4545
import java.util.Map;
4646

47+
import static org.opensearch.common.xcontent.support.XContentMapValues.nodeBooleanValue;
4748
import static org.opensearch.common.xcontent.support.XContentMapValues.nodeIntegerValue;
4849

4950
/**
@@ -77,6 +78,13 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
7778
m -> toType(m).enablePositionIncrements,
7879
true
7980
);
81+
private final Parameter<Boolean> skiplist = new Parameter<>(
82+
"skip_list",
83+
false,
84+
() -> false,
85+
(n, c, o) -> nodeBooleanValue(o),
86+
m -> toType(m).skiplist
87+
);
8088

8189
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
8290

@@ -86,7 +94,7 @@ public Builder(String name) {
8694

8795
@Override
8896
protected List<Parameter<?>> getParameters() {
89-
return Arrays.asList(index, hasDocValues, store, analyzer, nullValue, enablePositionIncrements, meta);
97+
return Arrays.asList(index, hasDocValues, store, analyzer, nullValue, enablePositionIncrements, meta, skiplist);
9098
}
9199

92100
@Override
@@ -99,6 +107,7 @@ public TokenCountFieldMapper build(BuilderContext context) {
99107
index.getValue(),
100108
store.getValue(),
101109
hasDocValues.getValue(),
110+
skiplist.getValue(),
102111
nullValue.getValue(),
103112
meta.getValue()
104113
);
@@ -113,10 +122,11 @@ static class TokenCountFieldType extends NumberFieldMapper.NumberFieldType {
113122
boolean isSearchable,
114123
boolean isStored,
115124
boolean hasDocValues,
125+
boolean skiplist,
116126
Number nullValue,
117127
Map<String, String> meta
118128
) {
119-
super(name, NumberFieldMapper.NumberType.INTEGER, isSearchable, isStored, hasDocValues, false, nullValue, meta);
129+
super(name, NumberFieldMapper.NumberType.INTEGER, isSearchable, isStored, hasDocValues, skiplist, false, nullValue, meta);
120130
}
121131

122132
@Override
@@ -132,6 +142,7 @@ public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchL
132142

133143
private final boolean index;
134144
private final boolean hasDocValues;
145+
private final boolean skiplist;
135146
private final boolean store;
136147
private final NamedAnalyzer analyzer;
137148
private final boolean enablePositionIncrements;
@@ -150,6 +161,7 @@ protected TokenCountFieldMapper(
150161
this.nullValue = builder.nullValue.getValue();
151162
this.index = builder.index.getValue();
152163
this.hasDocValues = builder.hasDocValues.getValue();
164+
this.skiplist = builder.skiplist.getValue();
153165
this.store = builder.store.getValue();
154166
}
155167

@@ -174,7 +186,9 @@ protected void parseCreateField(ParseContext context) throws IOException {
174186
}
175187

176188
context.doc()
177-
.addAll(NumberFieldMapper.NumberType.INTEGER.createFields(fieldType().name(), tokenCount, index, hasDocValues, false, store));
189+
.addAll(
190+
NumberFieldMapper.NumberType.INTEGER.createFields(fieldType().name(), tokenCount, index, hasDocValues, skiplist, store)
191+
);
178192
}
179193

180194
/**

modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldMapperTests.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.lucene.document.SortedNumericDocValuesField;
3838
import org.apache.lucene.document.StoredField;
3939
import org.apache.lucene.index.DirectoryReader;
40+
import org.apache.lucene.index.DocValuesSkipIndexType;
4041
import org.apache.lucene.index.DocValuesType;
4142
import org.apache.lucene.index.IndexWriter;
4243
import org.apache.lucene.index.IndexWriterConfig;
@@ -85,6 +86,7 @@ protected void registerParameters(ParameterChecker checker) throws IOException {
8586
b.field("scaling_factor", 5.0);
8687
}));
8788
checker.registerConflictCheck("doc_values", b -> b.field("doc_values", false));
89+
checker.registerConflictCheck("skip_list", b -> b.field("skip_list", true));
8890
checker.registerConflictCheck("index", b -> b.field("index", false));
8991
checker.registerConflictCheck("store", b -> b.field("store", true));
9092
checker.registerConflictCheck("null_value", b -> b.field("null_value", 1));
@@ -489,4 +491,37 @@ public void testScaledFloatEncodePoint() {
489491
assertEquals(1051, decodedUp); // 10.5 scaled = 1050, then +1 = 1051 (represents 10.51)
490492
assertEquals(1049, decodedDown); // 10.5 scaled = 1050, then -1 = 1049 (represents 10.49)
491493
}
494+
495+
public void testSkiplistParameter() throws IOException {
496+
// Test default value (none)
497+
DocumentMapper defaultMapper = createDocumentMapper(
498+
fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 100))
499+
);
500+
ParsedDocument doc = defaultMapper.parse(source(b -> b.field("field", 123.45)));
501+
IndexableField[] fields = doc.rootDoc().getFields("field");
502+
assertEquals(2, fields.length); // point field + doc values field
503+
IndexableField dvField = fields[1];
504+
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
505+
assertEquals(DocValuesSkipIndexType.NONE, dvField.fieldType().docValuesSkipIndexType());
506+
507+
// Test skiplist = "skip_list"
508+
DocumentMapper skiplistMapper = createDocumentMapper(
509+
fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 100).field("skip_list", "true"))
510+
);
511+
doc = skiplistMapper.parse(source(b -> b.field("field", 123.45)));
512+
fields = doc.rootDoc().getFields("field");
513+
assertEquals(2, fields.length);
514+
dvField = fields[1];
515+
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
516+
assertEquals(DocValuesSkipIndexType.RANGE, dvField.fieldType().docValuesSkipIndexType());
517+
518+
// Test invalid value
519+
MapperParsingException e = expectThrows(
520+
MapperParsingException.class,
521+
() -> createDocumentMapper(
522+
fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 100).field("skip_list", "invalid"))
523+
)
524+
);
525+
assertThat(e.getMessage(), containsString("Failed to parse value [invalid] as only [true] or [false] are allowed"));
526+
}
492527
}

modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package org.opensearch.index.mapper;
3434

3535
import org.apache.lucene.document.Document;
36+
import org.apache.lucene.document.DoublePoint;
3637
import org.apache.lucene.document.LongField;
3738
import org.apache.lucene.document.LongPoint;
3839
import org.apache.lucene.document.SortedNumericDocValuesField;
@@ -82,6 +83,79 @@ public void testTermsQuery() {
8283
assertEquals(LongField.newSetQuery("scaled_float", scaledValue1, scaledValue2), ft.termsQuery(Arrays.asList(value1, value2), null));
8384
}
8485

86+
public void testRangeQuery() throws IOException {
87+
// make sure the accuracy loss of scaled floats only occurs at index time
88+
// this test checks that searching scaled floats yields the same results as
89+
// searching doubles that are rounded to the closest half float
90+
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType(
91+
"scaled_float",
92+
true,
93+
false,
94+
false,
95+
false,
96+
Collections.emptyMap(),
97+
0.1 + randomDouble() * 100,
98+
null
99+
);
100+
Directory dir = newDirectory();
101+
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
102+
long[] scaled_floats = new long[1000];
103+
double[] doubles = new double[1000];
104+
final int numDocs = 1000;
105+
for (int i = 0; i < numDocs; ++i) {
106+
Document doc = new Document();
107+
double value = (randomDouble() * 2 - 1) * 10000;
108+
long scaledValue = Math.round(value * ft.getScalingFactor());
109+
double rounded = scaledValue / ft.getScalingFactor();
110+
scaled_floats[i] = scaledValue;
111+
doubles[i] = rounded;
112+
doc.add(new LongPoint("scaled_float", scaledValue));
113+
doc.add(new DoublePoint("double", rounded));
114+
w.addDocument(doc);
115+
}
116+
final DirectoryReader reader = DirectoryReader.open(w);
117+
w.close();
118+
IndexSearcher searcher = newSearcher(reader);
119+
final int numQueries = 1000;
120+
for (int i = 0; i < numQueries; ++i) {
121+
Double l = randomBoolean() ? null : (randomDouble() * 2 - 1) * 10000;
122+
Double u = randomBoolean() ? null : (randomDouble() * 2 - 1) * 10000;
123+
boolean includeLower = randomBoolean();
124+
boolean includeUpper = randomBoolean();
125+
126+
// Use the same rounding logic for query bounds as used in indexing
127+
Double queryL = l;
128+
Double queryU = u;
129+
if (l != null) {
130+
long scaledL = Math.round(l * ft.getScalingFactor());
131+
queryL = scaledL / ft.getScalingFactor();
132+
}
133+
if (u != null) {
134+
long scaledU = Math.round(u * ft.getScalingFactor());
135+
queryU = scaledU / ft.getScalingFactor();
136+
}
137+
138+
Query doubleQ = NumberFieldMapper.NumberType.DOUBLE.rangeQuery(
139+
"double",
140+
queryL,
141+
queryU,
142+
includeLower,
143+
includeUpper,
144+
false,
145+
true,
146+
MOCK_QSC
147+
);
148+
Query scaledFloatQ = ft.rangeQuery(queryL, queryU, includeLower, includeUpper, MOCK_QSC);
149+
int expectedCount = searcher.count(doubleQ);
150+
int scaledCount = searcher.count(scaledFloatQ);
151+
152+
// System.out.println("l=" + l + " queryL=" + queryL + " u=" + u + " queryU=" + queryU + " scalingFactor=" +
153+
// ft.getScalingFactor() + " expected= "+ expectedCount + " count= " + scaledCount);
154+
assertEquals(expectedCount, scaledCount);
155+
}
156+
IOUtils.close(reader, dir);
157+
}
158+
85159
public void testRoundsUpperBoundCorrectly() {
86160
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType("scaled_float", 100);
87161
Query scaledFloatQ = ft.rangeQuery(null, 0.1, true, false, MOCK_QSC);
@@ -229,6 +303,7 @@ public void testLargeNumberIndexingAndQuerying() throws IOException {
229303
true,
230304
false,
231305
true,
306+
true,
232307
Collections.emptyMap(),
233308
scalingFactor,
234309
null

0 commit comments

Comments
 (0)