Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

search_after query optimization with missing value comparison #14852

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 56 additions & 12 deletions server/src/main/java/org/opensearch/search/SearchService.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@
import org.apache.logging.log4j.Logger;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
import org.opensearch.OpenSearchException;
import org.opensearch.action.ActionRunnable;
import org.opensearch.action.OriginalIndices;
Expand Down Expand Up @@ -132,10 +134,10 @@
import org.opensearch.search.rescore.RescorerBuilder;
import org.opensearch.search.searchafter.SearchAfterBuilder;
import org.opensearch.search.sort.FieldSortBuilder;
import org.opensearch.search.sort.FieldStats;
import org.opensearch.search.sort.MinAndMax;
import org.opensearch.search.sort.SortAndFormats;
import org.opensearch.search.sort.SortBuilder;
import org.opensearch.search.sort.SortOrder;
import org.opensearch.search.suggest.Suggest;
import org.opensearch.search.suggest.completion.CompletionSuggestion;
import org.opensearch.tasks.TaskResourceTrackingService;
Expand Down Expand Up @@ -1622,8 +1624,11 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre
);
Rewriteable.rewrite(request.getRewriteable(), context, false);
final boolean aliasFilterCanMatch = request.getAliasFilter().getQueryBuilder() instanceof MatchNoneQueryBuilder == false;
FieldSortBuilder sortBuilder = FieldSortBuilder.getPrimaryFieldSortOrNull(request.source());
MinAndMax<?> minMax = sortBuilder != null ? FieldSortBuilder.getMinMaxOrNull(context, sortBuilder) : null;
final FieldSortBuilder sortBuilder = FieldSortBuilder.getPrimaryFieldSortOrNull(request.source());
final SortAndFormats primarySort = sortBuilder != null
? SortBuilder.buildSort(Collections.singletonList(sortBuilder), context).get()
: null;
FieldStats stats = sortBuilder != null ? FieldSortBuilder.getFieldStatsForShard(context, sortBuilder) : FieldStats.UNKNOWN;
boolean canMatch;
if (canRewriteToMatchNone(request.source())) {
QueryBuilder queryBuilder = request.source().query();
Expand All @@ -1634,44 +1639,83 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre
}
final FieldDoc searchAfterFieldDoc = getSearchAfterFieldDoc(request, context);
final Integer trackTotalHitsUpto = request.source() == null ? null : request.source().trackTotalHitsUpTo();
canMatch = canMatch && canMatchSearchAfter(searchAfterFieldDoc, minMax, sortBuilder, trackTotalHitsUpto);
canMatch = canMatch
&& canMatchSearchAfter(
searchAfterFieldDoc,
stats.getMinAndMax(),
primarySort,
trackTotalHitsUpto,
stats.allDocsNonMissing()
);

return new CanMatchResponse(canMatch || hasRefreshPending, minMax);
return new CanMatchResponse(canMatch || hasRefreshPending, stats.getMinAndMax());
}
}
}

public static boolean canMatchSearchAfter(
FieldDoc searchAfter,
MinAndMax<?> minMax,
FieldSortBuilder primarySortField,
Integer trackTotalHitsUpto
SortAndFormats primarySort,
Integer trackTotalHitsUpto,
boolean allDocsNonMissing
) {
// Check for sort.missing == null, since in case of missing values sort queries, if segment/shard's min/max
// is out of search_after range, it still should be printed and hence we should not skip segment/shard.
// Skipping search on shard/segment entirely can cause mismatch on total_tracking_hits, hence skip only if
// track_total_hits is false.
if (searchAfter != null
&& searchAfter.fields[0] != null
&& minMax != null
&& primarySortField != null
&& primarySortField.missing() == null
&& primarySort != null
&& Objects.equals(trackTotalHitsUpto, SearchContext.TRACK_TOTAL_HITS_DISABLED)) {
final Object searchAfterPrimary = searchAfter.fields[0];
if (primarySortField.order() == SortOrder.DESC) {
SortField primarySortField = primarySort.sort.getSort()[0];
if (primarySortField.getReverse()) {
if (minMax.compareMin(searchAfterPrimary) > 0) {
// In Desc order, if segment/shard minimum is gt search_after, the segment/shard won't be competitive
return false;
return allDocsNonMissing == false && canMatchMissingValue(primarySortField, searchAfterPrimary);
}
} else {
if (minMax.compareMax(searchAfterPrimary) < 0) {
// In ASC order, if segment/shard maximum is lt search_after, the segment/shard won't be competitive
return false;
return allDocsNonMissing == false && canMatchMissingValue(primarySortField, searchAfterPrimary);
}
}
}
return true;
}

private static boolean canMatchMissingValue(SortField primarySortField, Object primarySearchAfter) {
final Object missingValue = primarySortField.getMissingValue();
if (primarySortField.getReverse()) {
// the missing value of Type.STRING can only be SortField.STRING_FIRS or SortField.STRING_LAST
if (primarySearchAfter instanceof BytesRef) {
return missingValue == SortField.STRING_FIRST;
}
return compare(primarySearchAfter, missingValue) >= 0;
} else {
if (primarySearchAfter instanceof BytesRef) {
return missingValue == SortField.STRING_LAST;
}
return compare(primarySearchAfter, missingValue) <= 0;
}
}

private static int compare(Object one, Object two) {
if (one instanceof Long) {
return Long.compare((Long) one, (Long) two);
} else if (one instanceof Integer) {
return Integer.compare((Integer) one, (Integer) two);
} else if (one instanceof Float) {
return Float.compare((Float) one, (Float) two);
} else if (one instanceof Double) {
return Double.compare((Double) one, (Double) two);
} else {
throw new UnsupportedOperationException("compare type not supported : " + one.getClass());
}
}

private static FieldDoc getSearchAfterFieldDoc(ShardSearchRequest request, QueryShardContext context) throws IOException {
if (context != null && request != null && request.source() != null && request.source().sorts() != null) {
final List<SortBuilder<?>> sorts = request.source().sorts();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
import org.opensearch.search.query.QueryPhase;
import org.opensearch.search.query.QuerySearchResult;
import org.opensearch.search.sort.FieldSortBuilder;
import org.opensearch.search.sort.MinAndMax;
import org.opensearch.search.sort.FieldStats;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -517,17 +517,19 @@ private boolean canMatchSearchAfter(LeafReaderContext ctx) throws IOException {
// Only applied on primary sort field and primary search_after.
FieldSortBuilder primarySortField = FieldSortBuilder.getPrimaryFieldSortOrNull(searchContext.request().source());
if (primarySortField != null) {
MinAndMax<?> minMax = FieldSortBuilder.getMinMaxOrNullForSegment(
FieldStats stats = FieldSortBuilder.getFieldStatsForSegment(
this.searchContext.getQueryShardContext(),
ctx,
primarySortField,
searchContext.sort()
);
assert stats != null;
return SearchService.canMatchSearchAfter(
searchContext.searchAfter(),
minMax,
primarySortField,
searchContext.trackTotalHitsUpTo()
stats.getMinAndMax(),
searchContext.sort(),
searchContext.trackTotalHitsUpTo(),
stats.allDocsNonMissing()
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ public static FieldDoc buildFieldDoc(SortAndFormats sort, Object[] values) {
if (values[i] != null) {
fieldValues[i] = convertValueFromSortField(values[i], sortField, format);
} else {
SortField.Type sortType = extractSortType(sortField);
if (sortType != SortField.Type.STRING && sortType != SortField.Type.STRING_VAL) {
throw new IllegalArgumentException("search after value of type [" + sortType + "] cannot be null");
}
fieldValues[i] = null;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -606,73 +606,77 @@ public static FieldSortBuilder getPrimaryFieldSortOrNull(SearchSourceBuilder sou
}

/**
* Return the {@link MinAndMax} indexed value for shard from the provided {@link FieldSortBuilder} or <code>null</code> if unknown.
* Return the {@link FieldStats} indexed value for shard from the provided {@link FieldSortBuilder} or {@link FieldStats#UNKNOWN} if unknown.
* The value can be extracted on non-nested indexed mapped fields of type keyword, numeric or date, other fields
* and configurations return <code>null</code>.
* and configurations return {@link FieldStats#UNKNOWN}.
*/
public static MinAndMax<?> getMinMaxOrNull(QueryShardContext context, FieldSortBuilder sortBuilder) throws IOException {
public static FieldStats getFieldStatsForShard(QueryShardContext context, FieldSortBuilder sortBuilder) throws IOException {
final SortAndFormats sort = SortBuilder.buildSort(Collections.singletonList(sortBuilder), context).get();
return getMinMaxOrNullInternal(context.getIndexReader(), context, sortBuilder, sort);
return getFieldStatsInternal(context.getIndexReader(), context, sortBuilder, sort);
}

/**
* Return the {@link MinAndMax} indexed value for segment from the provided {@link FieldSortBuilder} or <code>null</code> if unknown.
* Return the {@link FieldStats} indexed value for segment from the provided {@link FieldSortBuilder} or {@link FieldStats#UNKNOWN} if unknown.
* The value can be extracted on non-nested indexed mapped fields of type keyword, numeric or date, other fields
* and configurations return <code>null</code>.
* and configurations return {@link FieldStats#UNKNOWN}.
*/
public static MinAndMax<?> getMinMaxOrNullForSegment(
public static FieldStats getFieldStatsForSegment(
QueryShardContext context,
LeafReaderContext ctx,
FieldSortBuilder sortBuilder,
SortAndFormats sort
) throws IOException {
return getMinMaxOrNullInternal(ctx.reader(), context, sortBuilder, sort);
return getFieldStatsInternal(ctx.reader(), context, sortBuilder, sort);
}

private static MinAndMax<?> getMinMaxOrNullInternal(
private static FieldStats getFieldStatsInternal(
IndexReader reader,
QueryShardContext context,
FieldSortBuilder sortBuilder,
SortAndFormats sort
) throws IOException {
SortField sortField = sort.sort.getSort()[0];
if (sortField.getField() == null) {
return null;
return FieldStats.UNKNOWN;
}
MappedFieldType fieldType = context.fieldMapper(sortField.getField());
if (reader == null || (fieldType == null || fieldType.isSearchable() == false)) {
return null;
return FieldStats.UNKNOWN;
}
switch (IndexSortConfig.getSortFieldType(sortField)) {
case LONG:
case INT:
case DOUBLE:
case FLOAT:
return extractNumericMinAndMax(reader, sortField, fieldType, sortBuilder);
return extractNumericFieldStats(reader, sortField, fieldType, sortBuilder);
case STRING:
case STRING_VAL:
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
Terms terms = MultiTerms.getTerms(reader, fieldType.name());
if (terms == null) {
return null;
return FieldStats.UNKNOWN;
}
return terms.getMin() != null ? new MinAndMax<>(terms.getMin(), terms.getMax()) : null;
MinAndMax<?> minAndMax = terms.getMin() != null ? new MinAndMax<>(terms.getMin(), terms.getMax()) : null;
return new FieldStats(minAndMax, terms.getDocCount() == reader.maxDoc());
}
break;
}
return null;
return FieldStats.UNKNOWN;
}

private static MinAndMax<?> extractNumericMinAndMax(
private static FieldStats extractNumericFieldStats(
IndexReader reader,
SortField sortField,
MappedFieldType fieldType,
FieldSortBuilder sortBuilder
) throws IOException {
String fieldName = fieldType.name();
if (PointValues.size(reader, fieldName) == 0) {
return null;
final int docCount = PointValues.getDocCount(reader, fieldName);
if (docCount == 0) {
return FieldStats.UNKNOWN;
}
final boolean allDocsNonMissing = docCount == reader.maxDoc();
MinAndMax<?> minAndMax = null;
if (fieldType instanceof NumberFieldType) {
NumberFieldType numberFieldType = (NumberFieldType) fieldType;
Number minPoint = numberFieldType.parsePoint(PointValues.getMinPackedValue(reader, fieldName));
Expand All @@ -681,27 +685,31 @@ private static MinAndMax<?> extractNumericMinAndMax(
case LONG:
if (numberFieldType.numericType() == NumericType.UNSIGNED_LONG) {
// The min and max are expected to be BigInteger numbers
return new MinAndMax<>((BigInteger) minPoint, (BigInteger) maxPoint);
minAndMax = new MinAndMax<>((BigInteger) minPoint, (BigInteger) maxPoint);
} else {
return new MinAndMax<>(minPoint.longValue(), maxPoint.longValue());
minAndMax = new MinAndMax<>(minPoint.longValue(), maxPoint.longValue());
}
break;
case INT:
return new MinAndMax<>(minPoint.intValue(), maxPoint.intValue());
minAndMax = new MinAndMax<>(minPoint.intValue(), maxPoint.intValue());
break;
case DOUBLE:
return new MinAndMax<>(minPoint.doubleValue(), maxPoint.doubleValue());
minAndMax = new MinAndMax<>(minPoint.doubleValue(), maxPoint.doubleValue());
break;
case FLOAT:
return new MinAndMax<>(minPoint.floatValue(), maxPoint.floatValue());
minAndMax = new MinAndMax<>(minPoint.floatValue(), maxPoint.floatValue());
break;
default:
return null;
// no-op
}
} else if (fieldType instanceof DateFieldType) {
DateFieldType dateFieldType = (DateFieldType) fieldType;
Function<byte[], Long> dateConverter = createDateConverter(sortBuilder, dateFieldType);
Long min = dateConverter.apply(PointValues.getMinPackedValue(reader, fieldName));
Long max = dateConverter.apply(PointValues.getMaxPackedValue(reader, fieldName));
return new MinAndMax<>(min, max);
minAndMax = new MinAndMax<>(min, max);
}
return null;
return new FieldStats(minAndMax, allDocsNonMissing);
}

private static Function<byte[], Long> createDateConverter(FieldSortBuilder sortBuilder, DateFieldType dateFieldType) {
Expand Down
40 changes: 40 additions & 0 deletions server/src/main/java/org/opensearch/search/sort/FieldStats.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.sort;

/**
* A class that encapsulates some stats about a field, including min/max etc.
*
* @opensearch.internal
*/
public class FieldStats {
public static final FieldStats UNKNOWN = new FieldStats(null, false);

private final MinAndMax<?> minAndMax;
private final boolean allDocsNonMissing;

public FieldStats(MinAndMax<?> minAndMax, boolean allDocsNonMissing) {
this.minAndMax = minAndMax;
this.allDocsNonMissing = allDocsNonMissing;
}

/**
* Return the minimum and maximum value.
*/
public MinAndMax<?> getMinAndMax() {
return minAndMax;
}

/**
* Indicates whether all docs have values for corresponding field
*/
public boolean allDocsNonMissing() {
return allDocsNonMissing;
}
}
Loading
Loading