diff --git a/CHANGELOG.md b/CHANGELOG.md index 33f11a16c3517..cfdfd4aab7728 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Support system generated search pipeline. ([#19128](https://github.com/opensearch-project/OpenSearch/pull/19128)) - Add `epoch_micros` date format ([#14669](https://github.com/opensearch-project/OpenSearch/issues/14669)) - Grok processor supports capturing multiple values for same field name ([#18799](https://github.com/opensearch-project/OpenSearch/pull/18799)) +- Add support for search tie-breaking by _shard_doc ([#18924](https://github.com/opensearch-project/OpenSearch/pull/18924)) - Upgrade opensearch-protobufs dependency to 0.13.0 and update transport-grpc module compatibility ([#19007](https://github.com/opensearch-project/OpenSearch/issues/19007)) - Add new extensible method to DocRequest to specify type ([#19313](https://github.com/opensearch-project/OpenSearch/pull/19313)) - [Rule based auto-tagging] Add Rule based auto-tagging IT ([#18550](https://github.com/opensearch-project/OpenSearch/pull/18550)) diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/search/sort/ShardDocComparatorBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/search/sort/ShardDocComparatorBenchmark.java new file mode 100644 index 0000000000000..c94857480b185 --- /dev/null +++ b/benchmarks/src/main/java/org/opensearch/benchmark/search/sort/ShardDocComparatorBenchmark.java @@ -0,0 +1,148 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.benchmark.search.sort; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.Random; +import java.util.concurrent.TimeUnit; + +/** + * JMH microbenchmarks for the _shard_doc composite key path: + * key = (shardKeyPrefix | (docBase + doc)) + * + * Mirrors hot operations in ShardDocFieldComparatorSource without needing Lucene classes. + */ +@Fork(3) +@Warmup(iterations = 5) +@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) + +public class ShardDocComparatorBenchmark { + + @Param({ "1", "4", "16" }) + public int segments; + + @Param({ "50000" }) + public int docsPerSegment; + + @Param({ "7" }) + public int shardId; + + private long shardKeyPrefix; + private int[] docBases; + private int[] docs; + private long[] keys; // precomputed composite keys + + // per-doc global doc (docBase + doc) for doc-only baseline + private int[] globalDocs; + + @Setup + public void setup() { + shardKeyPrefix = ((long) shardId) << 32; // Must mirror ShardDocFieldComparatorSource.shardKeyPrefix + + docBases = new int[segments]; + for (int i = 1; i < segments; i++) { + docBases[i] = docBases[i - 1] + docsPerSegment; + } + + int total = segments * docsPerSegment; + docs = new int[total]; + keys = new long[total]; + globalDocs = new int[total]; + + Random r = new Random(42); + int pos = 0; + for (int s = 0; s < segments; s++) { + int base = docBases[s]; + for (int d = 0; d < docsPerSegment; d++) { + int doc = r.nextInt(docsPerSegment); + docs[pos] = doc; + keys[pos] = computeGlobalDocKey(base, doc); + globalDocs[pos] = base + doc; + pos++; + } + } + } + + /** Baseline: compare only globalDoc */ + @Benchmark + public long compareDocOnlyAsc() { + long acc = 0; + for (int i = 1; i < globalDocs.length; i++) { + acc += Integer.compare(globalDocs[i - 1], globalDocs[i]); + } + return acc; + } + + /** raw key packing cost */ + @Benchmark + public void packKey(Blackhole bh) { + int total = segments * docsPerSegment; + int idx = 0; + for (int s = 0; s < segments; s++) { + int base = docBases[s]; + for (int d = 0; d < docsPerSegment; d++) { + long k = computeGlobalDocKey(base, docs[idx++]); + bh.consume(k); + } + } + } + + /** compare already-packed keys as ASC */ + @Benchmark + public long compareAsc() { + long acc = 0; + for (int i = 1; i < keys.length; i++) { + acc += Long.compare(keys[i - 1], keys[i]); + } + return acc; + } + + /** compare already-packed keys as DESC */ + @Benchmark + public long compareDesc() { + long acc = 0; + for (int i = 1; i < keys.length; i++) { + acc += Long.compare(keys[i], keys[i - 1]); // reversed + } + return acc; + } + + /** rough “collector loop” mix: copy + occasional compareBottom */ + @Benchmark + public int copyAndCompareBottomAsc() { + long bottom = Long.MIN_VALUE; + int worse = 0; + for (int i = 0; i < keys.length; i++) { + long v = keys[i]; // simulate copy(slot, doc) + if ((i & 31) == 0) bottom = v; // simulate setBottom every 32 items + if (Long.compare(bottom, v) < 0) worse++; + } + return worse; + } + + // Must mirror ShardDocFieldComparatorSource.computeGlobalDocKey: (shardId << 32) | (docBase + doc) + private long computeGlobalDocKey(int docBase, int doc) { + return shardKeyPrefix | (docBase + doc); + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/95_search_after_shard_doc.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/95_search_after_shard_doc.yml new file mode 100644 index 0000000000000..89e6cc5979ea4 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/95_search_after_shard_doc.yml @@ -0,0 +1,191 @@ +--- +setup: + - skip: + version: " - 3.2.99" + reason: "introduced in 3.3.0" + + # Multi-shard index + - do: + indices.create: + index: sharddoc_paging + body: + settings: + number_of_shards: 4 + number_of_replicas: 0 + mappings: + properties: + id: { type: integer } + txt: { type: keyword } + - do: + cluster.health: + wait_for_status: green + index: sharddoc_paging + - do: + bulk: + refresh: true + index: sharddoc_paging + body: | + {"index":{}} + {"id":1,"txt":"a"} + {"index":{}} + {"id":2,"txt":"b"} + {"index":{}} + {"id":3,"txt":"c"} + {"index":{}} + {"id":4,"txt":"d"} + {"index":{}} + {"id":5,"txt":"e"} + {"index":{}} + {"id":6,"txt":"f"} + {"index":{}} + {"id":7,"txt":"g"} + {"index":{}} + {"id":8,"txt":"h"} + {"index":{}} + {"id":9,"txt":"i"} + {"index":{}} + {"id":10,"txt":"j"} + {"index":{}} + {"id":11,"txt":"k"} + {"index":{}} + {"id":12,"txt":"l"} + {"index":{}} + {"id":13,"txt":"m"} + {"index":{}} + {"id":14,"txt":"n"} + {"index":{}} + {"id":15,"txt":"o"} + {"index":{}} + {"id":16,"txt":"p"} + {"index":{}} + {"id":17,"txt":"q"} + {"index":{}} + {"id":18,"txt":"r"} + {"index":{}} + {"id":19,"txt":"s"} + {"index":{}} + {"id":20,"txt":"t"} + {"index":{}} + {"id":21,"txt":"u"} + {"index":{}} + {"id":22,"txt":"v"} + +# ------------------------------------------------------------------- +# VALIDATION +# ------------------------------------------------------------------- + +--- +"reject _shard_doc without PIT": + - do: + catch: bad_request + search: + index: sharddoc_paging + body: + sort: + - _shard_doc + - match: { status: 400 } + - match: { error.type: action_request_validation_exception } + - match: { error.reason: "/.*_shard_doc is only supported with point-in-time.*|.*PIT.*/" } + +--- +"detect _shard_doc via FieldSortBuilder-style object without PIT": + - do: + catch: bad_request + search: + index: sharddoc_paging + body: + sort: + - _shard_doc: { } # object form, still invalid without PIT + - match: { status: 400 } + - match: { error.type: action_request_validation_exception } + - match: { error.reason: "/.*_shard_doc is only supported with point-in-time.*|.*PIT.*/" } + + +# ------------------------------------------------------------------- +# HAPPY PATH: PAGINATION WITH PIT ON MULTI-SHARD INDEX +# ------------------------------------------------------------------- + +--- +"accept _shard_doc with PIT + paginate with search_after (multi-shard)": + - do: + create_pit: + index: sharddoc_paging + keep_alive: 1m + - set: { pit_id: pit_id } + + # Page 1 + - do: + search: + body: + size: 10 + pit: { id: "$pit_id", keep_alive: "1m" } + sort: + - _shard_doc: {} + - match: { _shards.failed: 0 } + - length: { hits.hits: 10 } + - is_true: hits.hits.9.sort + + - set: { hits.hits.9.sort: after1 } + + # Page 2 + - do: + search: + body: + size: 10 + pit: { id: "$pit_id", keep_alive: "1m" } + sort: + - _shard_doc: { } + search_after: $after1 + + - match: { _shards.failed: 0 } + - length: { hits.hits: 10 } + - is_true: hits.hits.9.sort + + - set: { hits.hits.9.sort: after2 } + - set: { hits.hits.9.sort.0: last_value_page2 } + + # Check that the sort values increase from one hit to the next without ever decreasing. + - set: { hits.hits.0.sort.0: prev } + - gt: { hits.hits.1.sort.0: $prev } + + - set: { hits.hits.1.sort.0: prev } + - gt: { hits.hits.2.sort.0: $prev } + + - set: { hits.hits.2.sort.0: prev } + - gt: { hits.hits.3.sort.0: $prev } + + - set: { hits.hits.3.sort.0: prev } + - gt: { hits.hits.4.sort.0: $prev } + + - set: { hits.hits.4.sort.0: prev } + - gt: { hits.hits.5.sort.0: $prev } + + - set: { hits.hits.5.sort.0: prev } + - gt: { hits.hits.6.sort.0: $prev } + + - set: { hits.hits.6.sort.0: prev } + - gt: { hits.hits.7.sort.0: $prev } + + - set: { hits.hits.7.sort.0: prev } + - gt: { hits.hits.8.sort.0: $prev } + + - set: { hits.hits.8.sort.0: prev } + - gt: { hits.hits.9.sort.0: $prev } + + # Page 3: drain the rest (22 docs total => 10 + 10 + 2) + - do: + search: + body: + size: 10 + pit: { id: "$pit_id", keep_alive: "1m" } + sort: + - _shard_doc: {} + search_after: $after2 + + - match: { _shards.failed: 0 } + - length: { hits.hits: 2 } + + - do: + delete_pit: + body: + pit_id: [ "$pit_id" ] diff --git a/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java b/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java new file mode 100644 index 0000000000000..33cd5d42bf4d7 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java @@ -0,0 +1,501 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.sort; + +import org.opensearch.action.search.CreatePitAction; +import org.opensearch.action.search.CreatePitRequest; +import org.opensearch.action.search.CreatePitResponse; +import org.opensearch.action.search.DeletePitAction; +import org.opensearch.action.search.DeletePitRequest; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.common.action.ActionFuture; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.search.SearchHit; +import org.opensearch.search.builder.PointInTimeBuilder; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.lessThan; + +@OpenSearchIntegTestCase.ClusterScope(numDataNodes = 2, supportsDedicatedMasters = false) +public class ShardDocFieldComparatorSourceIT extends OpenSearchIntegTestCase { + + private static final String INDEX = "test_shard_doc"; + + @Before + public void setupIndex() { + createIndex(INDEX, Settings.builder().put("index.number_of_shards", 2).put("index.number_of_replicas", 0).build()); + ensureGreen(INDEX); + } + + public void testEmptyIndex() throws Exception { + String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + try { + SearchSourceBuilder ssb = new SearchSourceBuilder().size(10) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + + // no hits at all + SearchHit[] hits = resp.getHits().getHits(); + assertThat(hits.length, equalTo(0)); + assertThat(resp.getHits().getTotalHits().value(), equalTo(0L)); + } finally { + closePit(pitId); + } + } + + public void testSingleDocument() throws Exception { + client().prepareIndex(INDEX).setId("42").setSource("foo", "bar").get(); + refresh(); + + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(5) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + + assertThat(resp.getHits().getTotalHits().value(), equalTo(1L)); + assertThat(resp.getHits().getHits()[0].getId(), equalTo("42")); + } finally { + closePit(pitId); + } + } + + public void testSearchAfterBeyondEndYieldsNoHits() throws Exception { + indexSequentialDocs(5); + refresh(); + List allKeys = new ArrayList<>(); + + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(5) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + + SearchResponse resp0 = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + // collect first page + for (SearchHit hit : resp0.getHits().getHits()) { + Object[] sv = hit.getSortValues(); + allKeys.add(((Number) sv[0]).longValue()); + } + + long globalMax = allKeys.get(allKeys.size() - 1); + + SearchSourceBuilder next = new SearchSourceBuilder().size(3) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)) + .searchAfter(new Object[] { globalMax + 1 }); + + SearchResponse resp = client().search(new SearchRequest(INDEX).source(next)).actionGet(); + SearchHit[] hits = resp.getHits().getHits(); + assertThat(hits.length, equalTo(0)); + + } finally { + closePit(pitId); + } + } + + public void testSearchAfterBeyondEndYieldsNoHits_DESC() throws Exception { + indexSequentialDocs(5); + refresh(); + + String pitId = null; + try { + // First page: _shard_doc DESC, grab the SMALLEST key (last hit on the page) + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(5) + .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)) + .pointInTimeBuilder(pit(pitId)); + + SearchResponse first = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + assertThat(first.getHits().getHits().length, equalTo(5)); + + // Probe strictly beyond the end for DESC: use search_after < min (min - 1) => expect 0 hits + long minKey = ((Number) first.getHits().getHits()[4].getSortValues()[0]).longValue(); // smallest in DESC page + SearchSourceBuilder probe = new SearchSourceBuilder().size(3) + .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)) + .pointInTimeBuilder(pit(pitId)) + .searchAfter(new Object[] { minKey - 1 }); + + SearchResponse resp = client().search(new SearchRequest(INDEX).source(probe)).actionGet(); + assertThat(resp.getHits().getHits().length, equalTo(0)); + + } finally { + closePit(pitId); + } + } + + public void testPrimaryFieldSortThenShardDocTieBreaker() throws Exception { + // force ties on primary + for (int i = 1; i <= 30; i++) { + client().prepareIndex(INDEX).setId(Integer.toString(i)).setSource("val", 123).get(); + } + refresh(); + + List shardDocKeys = new ArrayList<>(); + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + collectIdsAndSortKeys( + INDEX, + pitId, + 10, + 1, + null, + shardDocKeys, + new FieldSortBuilder("val").order(SortOrder.ASC), + SortBuilders.shardDocSort().order(SortOrder.ASC) + ); + + assertThat(shardDocKeys.size(), equalTo(30)); + for (int i = 1; i < shardDocKeys.size(); i++) { + assertThat(shardDocKeys.get(i), greaterThan(shardDocKeys.get(i - 1))); + } + } finally { + closePit(pitId); + } + } + + public void testOrderingAscAndPagination() throws Exception { + assertShardDocOrdering(SortOrder.ASC); + } + + public void testOrderingDescAndPagination() throws Exception { + assertShardDocOrdering(SortOrder.DESC); + } + + private void assertShardDocOrdering(SortOrder order) throws Exception { + int pageSize = randomIntBetween(5, 23); + int totalDocs = randomIntBetween(73, 187); + indexSequentialDocs(totalDocs); + refresh(); + + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + List shardDocKeys = new ArrayList<>(); + // shardDocIndex = 0 because we're only sorting by _shard_doc here + collectIdsAndSortKeys(INDEX, pitId, pageSize, 0, null, shardDocKeys, SortBuilders.shardDocSort().order(order)); + + assertThat(shardDocKeys.size(), equalTo(totalDocs)); + + for (int i = 1; i < shardDocKeys.size(); i++) { + if (order == SortOrder.ASC) { + assertThat("not strictly increasing at i=" + i, shardDocKeys.get(i), greaterThan(shardDocKeys.get(i - 1))); + } else { + assertThat("not strictly decreasing at i=" + i, shardDocKeys.get(i), lessThan(shardDocKeys.get(i - 1))); + } + } + + } finally { + closePit(pitId); + } + } + + public void testPageLocalMonotonicity_ASC() throws Exception { + indexSequentialDocs(20); + refresh(); + + String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(10) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + + SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + SearchHit[] hits = resp.getHits().getHits(); + for (int i = 1; i < hits.length; i++) { + long prev = ((Number) hits[i - 1].getSortValues()[0]).longValue(); + long cur = ((Number) hits[i].getSortValues()[0]).longValue(); + assertThat("regression at i=" + i, cur, greaterThan(prev)); + } + closePit(pitId); + } + + // No duplicates across the whole scan (ASC & DESC). + public void testNoDuplicatesAcrossScan_ASC_DESC() throws Exception { + indexSequentialDocs(123); + refresh(); + + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + List idsAsc = new ArrayList<>(); + List shardDocKeys = new ArrayList<>(); + + // ASC + collectIdsAndSortKeys(INDEX, pitId, 13, 0, idsAsc, shardDocKeys, SortBuilders.shardDocSort().order(SortOrder.ASC)); + assertThat(idsAsc.size(), equalTo(123)); + assertThat(new HashSet<>(idsAsc).size(), equalTo(idsAsc.size())); + + // DESC + List idsDesc = new ArrayList<>(); + collectIdsAndSortKeys(INDEX, pitId, 17, 0, idsDesc, shardDocKeys, SortBuilders.shardDocSort().order(SortOrder.DESC)); + assertThat(idsDesc.size(), equalTo(123)); + assertThat(new HashSet<>(idsDesc).size(), equalTo(idsDesc.size())); + } finally { + closePit(pitId); + } + } + + // Resume from the middle of a page (ASC). + public void testResumeFromMiddleOfPage_ASC() throws Exception { + indexSequentialDocs(60); + refresh(); + + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + + // First page to pick a middle anchor + SearchSourceBuilder firstPage = new SearchSourceBuilder().size(10) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + SearchResponse r1 = client().search(new SearchRequest(INDEX).source(firstPage)).actionGet(); + assertThat(r1.getHits().getHits().length, equalTo(10)); + + int mid = 4; + Object[] midSort = r1.getHits().getHits()[mid].getSortValues(); + + // Collect IDs = first page up to 'mid' (inclusive), then resume from mid sort tuple + List ids = new ArrayList<>(); + for (int i = 0; i <= mid; i++) { + ids.add(r1.getHits().getHits()[i].getId()); + } + + SearchSourceBuilder resume = new SearchSourceBuilder().size(10) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)) + .searchAfter(midSort); + SearchResponse resp = client().search(new SearchRequest(INDEX).source(resume)).actionGet(); + + while (true) { + SearchHit[] hits = resp.getHits().getHits(); + // should start strictly after the anchor + for (SearchHit h : hits) + ids.add(h.getId()); + if (hits.length < 10) break; + Object[] after = hits[hits.length - 1].getSortValues(); + + resume = new SearchSourceBuilder().size(10) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)) + .searchAfter(after); + resp = client().search(new SearchRequest(INDEX).source(resume)).actionGet(); + } + + // Should cover all 60 docs exactly once + assertThat(ids.size(), equalTo(60)); + assertThat(new HashSet<>(ids).size(), equalTo(60)); + } finally { + closePit(pitId); + } + } + + // Tiny page sizes (size=1 and size=2) with strict monotonicity & no dupes. + public void testTinyPageSizes_ASC() throws Exception { + indexSequentialDocs(41); + refresh(); + + for (int pageSize : new int[] { 1, 2 }) { + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + List keys = new ArrayList<>(); + collectIdsAndSortKeys(INDEX, pitId, pageSize, 0, null, keys, SortBuilders.shardDocSort().order(SortOrder.ASC)); + + assertThat(keys.size(), equalTo(41)); + for (int i = 1; i < keys.size(); i++) { + assertThat(keys.get(i), greaterThan(keys.get(i - 1))); + } + } finally { + closePit(pitId); + } + } + } + + // Replicas enabled: still strict order and no dupes. + public void testWithReplicasEnabled_ASC() throws Exception { + final String repIdx = INDEX + "_repl"; + createIndex(repIdx, Settings.builder().put("index.number_of_shards", 3).put("index.number_of_replicas", 1).build()); + ensureGreen(repIdx); + + for (int i = 1; i <= 100; i++) { + client().prepareIndex(repIdx).setId(Integer.toString(i)).setSource("v", i).get(); + } + refresh(repIdx); + + String pitId = null; + try { + pitId = openPit(repIdx, TimeValue.timeValueMinutes(1)); + List keys = new ArrayList<>(); + List ids = new ArrayList<>(); + collectIdsAndSortKeys(repIdx, pitId, 11, 0, ids, keys, SortBuilders.shardDocSort().order(SortOrder.ASC)); + assertThat(keys.size(), equalTo(100)); + for (int i = 1; i < keys.size(); i++) { + assertThat(keys.get(i), greaterThan(keys.get(i - 1))); + } + // also IDs unique + // List ids = collectAllIds(repIdx, pitId, 11, SortBuilders.shardDocSort().order(SortOrder.ASC)); + assertThat(new HashSet<>(ids).size(), equalTo(ids.size())); + } finally { + closePit(pitId); + } + } + + // Boundary equality: using the exact last sort tuple as search_after should not duplicate the boundary doc. + public void testBoundaryEqualityNoOverlap_ASC() throws Exception { + indexSequentialDocs(30); + refresh(); + + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + + SearchSourceBuilder p1 = new SearchSourceBuilder().size(7) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + SearchResponse r1 = client().search(new SearchRequest(INDEX).source(p1)).actionGet(); + SearchHit[] hits1 = r1.getHits().getHits(); + SearchHit lastOfPage1 = hits1[hits1.length - 1]; + + SearchSourceBuilder p2 = new SearchSourceBuilder().size(7) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)) + .searchAfter(lastOfPage1.getSortValues()); + SearchResponse r2 = client().search(new SearchRequest(INDEX).source(p2)).actionGet(); + SearchHit[] hits2 = r2.getHits().getHits(); + + if (hits2.length > 0) { + assertNotEquals("no overlap with boundary", lastOfPage1.getId(), hits2[0].getId()); + } + } finally { + closePit(pitId); + } + } + + // Large corpus, odd page sizes, multi-shard interleaving stress. + public void testLargeCorpusInterleaving_ASC() throws Exception { + final String bigIdx = INDEX + "_big"; + createIndex(bigIdx, Settings.builder().put("index.number_of_shards", 5).put("index.number_of_replicas", 1).build()); + ensureGreen(TimeValue.timeValueSeconds(60), bigIdx); + + for (int i = 1; i <= 2000; i++) { + client().prepareIndex(bigIdx).setId(Integer.toString(i)).setSource("v", i).get(); + } + refresh(bigIdx); + + String pitId = null; + try { + pitId = openPit(bigIdx, TimeValue.timeValueMinutes(1)); + // odd page sizes to stress boundaries + int[] sizes = new int[] { 13, 17, 19, 23, 31 }; + for (int sz : sizes) { + List keys = new ArrayList<>(); + // shardDocIndex=0 since only shard_doc is sorted + collectIdsAndSortKeys(INDEX, pitId, sz, 0, null, keys, SortBuilders.shardDocSort().order(SortOrder.ASC)); + assertThat(keys.size(), equalTo(2000)); + for (int i = 1; i < keys.size(); i++) { + assertThat(keys.get(i), greaterThan(keys.get(i - 1))); + } + } + } finally { + closePit(pitId); + } + } + + private void indexSequentialDocs(int count) { + for (int i = 1; i <= count; i++) { + client().prepareIndex(INDEX) + .setId(Integer.toString(i)) + // the content doesn't matter for _shard_doc + .setSource("val", i) + .get(); + } + } + + private String openPit(String index, TimeValue keepAlive) throws Exception { + CreatePitRequest request = new CreatePitRequest(keepAlive, true); + request.setIndices(new String[] { index }); + ActionFuture execute = client().execute(CreatePitAction.INSTANCE, request); + CreatePitResponse pitResponse = execute.get(); + return pitResponse.getId(); + } + + private void closePit(String pitId) { + if (pitId == null) return; + DeletePitRequest del = new DeletePitRequest(Collections.singletonList(pitId)); + client().execute(DeletePitAction.INSTANCE, del).actionGet(); + } + + private static PointInTimeBuilder pit(String pitId) { + return new PointInTimeBuilder(pitId).setKeepAlive(TimeValue.timeValueMinutes(1)); + } + + // Generic paginator: works for 1 or many sort keys. + // - pageSize: page size + // - shardDocIndex: which position in sortValues[] + // - sorts: the full sort list to apply (e.g., only _shard_doc, or primary then _shard_doc) + private void collectIdsAndSortKeys( + String index, + String pitId, + int pageSize, + int shardDocIndex, + List ids, + List keys, + SortBuilder... sorts + ) { + SearchSourceBuilder ssb = new SearchSourceBuilder().size(pageSize).pointInTimeBuilder(pit(pitId)); + for (var s : sorts) { + ssb.sort(s); + } + SearchResponse resp = client().search(new SearchRequest(index).source(ssb)).actionGet(); + + while (true) { + SearchHit[] hits = resp.getHits().getHits(); + for (SearchHit hit : hits) { + Object[] sv = hit.getSortValues(); + assertNotNull("every hit must have sort", sv); + assertTrue("shard_doc should be present", shardDocIndex < sv.length); + assertThat("sort key must be a Long", sv[shardDocIndex], instanceOf(Long.class)); + long k = (Long) sv[shardDocIndex]; + keys.add(k); + if (ids != null) { + ids.add(hit.getId()); + } + } + // stop if last page + if (hits.length < pageSize) break; + + // use the FULL last sortValues[] as search_after for correctness + Object[] nextAfter = hits[hits.length - 1].getSortValues(); + ssb = new SearchSourceBuilder().size(pageSize).pointInTimeBuilder(pit(pitId)); + for (var s : sorts) { + ssb.sort(s); + } + ssb.searchAfter(nextAfter); + + resp = client().search(new SearchRequest(index).source(ssb)).actionGet(); + } + } +} diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequest.java b/server/src/main/java/org/opensearch/action/search/SearchRequest.java index 4a4a309b45a2e..a1e6e7605cbdb 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequest.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequest.java @@ -51,6 +51,9 @@ import org.opensearch.search.builder.PointInTimeBuilder; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.internal.SearchContext; +import org.opensearch.search.sort.FieldSortBuilder; +import org.opensearch.search.sort.ShardDocSortBuilder; +import org.opensearch.search.sort.SortBuilder; import org.opensearch.transport.client.Client; import org.opensearch.transport.client.Requests; @@ -349,9 +352,45 @@ public ActionRequestValidationException validate() { validationException = addValidationError("using [point in time] is not allowed in a scroll context", validationException); } } + + // _shard_doc validation + if (source != null && source.sorts() != null && !source.sorts().isEmpty()) { + int shardDocCount = 0; + for (SortBuilder sb : source.sorts()) { + if (isShardDocSort(sb)) shardDocCount++; + } + final boolean hasPit = pointInTimeBuilder() != null; + + if (shardDocCount > 0 && scroll) { + validationException = addValidationError( + "_shard_doc cannot be used with scroll. Use PIT + search_after instead.", + validationException + ); + } + if (shardDocCount > 0 && !hasPit) { + validationException = addValidationError( + "_shard_doc is only supported with point-in-time (PIT). Add a PIT or remove _shard_doc.", + validationException + ); + } + if (shardDocCount > 1) { + validationException = addValidationError( + "duplicate _shard_doc sort detected. Specify it at most once.", + validationException + ); + } + } return validationException; } + private static boolean isShardDocSort(SortBuilder sb) { + if (sb instanceof ShardDocSortBuilder) return true; + if (sb instanceof FieldSortBuilder) { + return ShardDocSortBuilder.NAME.equals(((FieldSortBuilder) sb).getFieldName()); + } + return false; + } + /** * Returns the alias of the cluster that this search request is being executed on. A non-null value indicates that this search request * is being executed as part of a locally reduced cross-cluster search request. The cluster alias is used to prefix index names diff --git a/server/src/main/java/org/opensearch/search/SearchModule.java b/server/src/main/java/org/opensearch/search/SearchModule.java index f588013b74af2..d7a67836e4247 100644 --- a/server/src/main/java/org/opensearch/search/SearchModule.java +++ b/server/src/main/java/org/opensearch/search/SearchModule.java @@ -268,6 +268,7 @@ import org.opensearch.search.sort.GeoDistanceSortBuilder; import org.opensearch.search.sort.ScoreSortBuilder; import org.opensearch.search.sort.ScriptSortBuilder; +import org.opensearch.search.sort.ShardDocSortBuilder; import org.opensearch.search.sort.SortBuilder; import org.opensearch.search.sort.SortValue; import org.opensearch.search.suggest.Suggest; @@ -1202,6 +1203,7 @@ private void registerSortParsers(List plugins) { ); registerSort(new SortSpec<>(ScoreSortBuilder.NAME, ScoreSortBuilder::new, ScoreSortBuilder::fromXContent)); registerFromPlugin(plugins, SearchPlugin::getSorts, this::registerSort); + registerSort(new SortSpec<>(ShardDocSortBuilder.NAME, ShardDocSortBuilder::new, ShardDocSortBuilder::fromXContent)); } private void registerIntervalsSourceProviders() { diff --git a/server/src/main/java/org/opensearch/search/searchafter/SearchAfterBuilder.java b/server/src/main/java/org/opensearch/search/searchafter/SearchAfterBuilder.java index a45b2bd40c03d..cc67b68c90d7e 100644 --- a/server/src/main/java/org/opensearch/search/searchafter/SearchAfterBuilder.java +++ b/server/src/main/java/org/opensearch/search/searchafter/SearchAfterBuilder.java @@ -49,6 +49,7 @@ import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.search.DocValueFormat; +import org.opensearch.search.sort.ShardDocFieldComparatorSource; import org.opensearch.search.sort.SortAndFormats; import java.io.IOException; @@ -161,6 +162,8 @@ static SortField.Type extractSortType(SortField sortField) { } else if ("LatLonPointSortField".equals(sortField.getClass().getSimpleName())) { // for geo distance sorting return SortField.Type.DOUBLE; + } else if (sortField.getComparatorSource() instanceof ShardDocFieldComparatorSource) { + return SortField.Type.LONG; } else { return sortField.getType(); } diff --git a/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java b/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java new file mode 100644 index 0000000000000..47a71fd2fd772 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java @@ -0,0 +1,110 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.sort; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.LeafFieldComparator; +import org.apache.lucene.search.Pruning; +import org.apache.lucene.search.Scorable; +import org.apache.lucene.search.SortField; +import org.opensearch.common.util.BigArrays; +import org.opensearch.index.fielddata.IndexFieldData; +import org.opensearch.search.DocValueFormat; +import org.opensearch.search.MultiValueMode; +import org.opensearch.search.sort.BucketedSort.ExtraData; + +/** + * A pseudo‑field (_shard_doc) comparator that tiebreaks by {@code (shardId << 32) | globalDocId} + */ +public class ShardDocFieldComparatorSource extends IndexFieldData.XFieldComparatorSource { + public static final String NAME = "_shard_doc"; + + private final long shardKeyPrefix; + + /** + * @param shardId the shard ID of this shard + */ + public ShardDocFieldComparatorSource(int shardId) { + super(null, MultiValueMode.MIN, null); + shardKeyPrefix = ((long) shardId) << 32; + } + + @Override + public SortField.Type reducedType() { + return SortField.Type.LONG; + } + + @Override + public BucketedSort newBucketedSort(BigArrays bigArrays, SortOrder sortOrder, DocValueFormat format, int bucketSize, ExtraData extra) { + throw new UnsupportedOperationException("bucketed sort not supported for " + NAME); + } + + @Override + public FieldComparator newComparator(String fieldname, int numHits, Pruning pruning, boolean reversed) { + return new FieldComparator() { + private final long[] values = new long[numHits]; + private long bottom; + private long topValue; + + @Override + public LeafFieldComparator getLeafComparator(LeafReaderContext context) { + final int docBase = context.docBase; + + return new LeafFieldComparator() { + Scorable scorer; + + @Override + public void setScorer(Scorable scorer) { + this.scorer = scorer; + } + + @Override + public void setBottom(int slot) { + bottom = values[slot]; + } + + @Override + public int compareBottom(int doc) { + return Long.compare(bottom, computeGlobalDocKey(doc)); + } + + @Override + public void copy(int slot, int doc) { + values[slot] = computeGlobalDocKey(doc); + } + + @Override + public int compareTop(int doc) { + return Long.compare(topValue, computeGlobalDocKey(doc)); + } + + private long computeGlobalDocKey(int doc) { + return shardKeyPrefix | (docBase + doc); + } + }; + } + + @Override + public int compare(int slot1, int slot2) { + return Long.compare(values[slot1], values[slot2]); + } + + @Override + public Long value(int slot) { + return values[slot]; + } + + @Override + public void setTopValue(Long value) { + this.topValue = value; + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java b/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java new file mode 100644 index 0000000000000..99cebaeed3474 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java @@ -0,0 +1,129 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.sort; + +import org.apache.lucene.search.SortField; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.xcontent.ObjectParser; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.query.QueryRewriteContext; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.DocValueFormat; + +import java.io.IOException; +import java.util.Objects; + +/** + * Sort builder for the pseudo‐field "_shard_doc", which tiebreaks by {@code (shardId << 32) | globalDocId}. + */ +public class ShardDocSortBuilder extends SortBuilder { + + public static final String NAME = "_shard_doc"; + + // parser for JSON: { "_shard_doc": { "order":"asc" } } + private static final ObjectParser PARSER = new ObjectParser<>(NAME, ShardDocSortBuilder::new); + + static { + PARSER.declareString((b, s) -> b.order(SortOrder.fromString(s)), ORDER_FIELD); + } + + public ShardDocSortBuilder() { + this.order = SortOrder.ASC; // default to ASC + } + + public ShardDocSortBuilder(StreamInput in) throws IOException { + this.order = SortOrder.readFromStream(in); + } + + public ShardDocSortBuilder(ShardDocSortBuilder other) { + this.order = other.order; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + order.writeTo(out); + } + + public static ShardDocSortBuilder fromXContent(XContentParser parser, String fieldName) throws IOException { + XContentParser.Token token = parser.currentToken(); + if (token == XContentParser.Token.FIELD_NAME) { + token = parser.nextToken(); + } + + switch (token) { + case START_OBJECT: + return PARSER.parse(parser, null); // { "_shard_doc": { "order": "asc" } } + + case VALUE_STRING: + case VALUE_NUMBER: + case VALUE_BOOLEAN: + case VALUE_NULL: + return new ShardDocSortBuilder(); // Scalar shorthand: "_shard_doc" → defaults to ASC + + case START_ARRAY: + throw new org.opensearch.core.xcontent.XContentParseException( + parser.getTokenLocation(), + "[" + NAME + "] Expected START_OBJECT or scalar but was: START_ARRAY" + ); + + default: + throw new org.opensearch.core.xcontent.XContentParseException( + parser.getTokenLocation(), + "[" + NAME + "] Expected START_OBJECT or scalar but was: " + token + ); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.startObject(NAME); + builder.field(ORDER_FIELD.getPreferredName(), order); + builder.endObject(); + builder.endObject(); + return builder; + } + + @Override + protected SortFieldAndFormat build(QueryShardContext context) { + final int shardId = context.getShardId(); + SortField sf = new SortField(NAME, new ShardDocFieldComparatorSource(shardId), order == SortOrder.DESC); + return new SortFieldAndFormat(sf, DocValueFormat.RAW); + } + + @Override + public BucketedSort buildBucketedSort(QueryShardContext context, int bucketSize, BucketedSort.ExtraData extra) throws IOException { + throw new UnsupportedOperationException("bucketed sort not supported for " + NAME); + } + + @Override + public ShardDocSortBuilder rewrite(QueryRewriteContext ctx) { + return this; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (obj == null || getClass() != obj.getClass()) return false; + ShardDocSortBuilder other = (ShardDocSortBuilder) obj; + return order == other.order; + } + + @Override + public int hashCode() { + return Objects.hash(order); + } +} diff --git a/server/src/main/java/org/opensearch/search/sort/SortBuilder.java b/server/src/main/java/org/opensearch/search/sort/SortBuilder.java index a8c21e7311061..e3d83e6a8102e 100644 --- a/server/src/main/java/org/opensearch/search/sort/SortBuilder.java +++ b/server/src/main/java/org/opensearch/search/sort/SortBuilder.java @@ -135,6 +135,8 @@ public static List> fromXContent(XContentParser parser) throws IO private static SortBuilder fieldOrScoreSort(String fieldName) { if (fieldName.equals(ScoreSortBuilder.NAME)) { return new ScoreSortBuilder(); + } else if (fieldName.equals(ShardDocSortBuilder.NAME)) { // ShardDocSortBuilder is a special "field" sort + return new ShardDocSortBuilder(); } else { return new FieldSortBuilder(fieldName); } diff --git a/server/src/main/java/org/opensearch/search/sort/SortBuilders.java b/server/src/main/java/org/opensearch/search/sort/SortBuilders.java index 209b5b160f30b..729e62d8675ff 100644 --- a/server/src/main/java/org/opensearch/search/sort/SortBuilders.java +++ b/server/src/main/java/org/opensearch/search/sort/SortBuilders.java @@ -100,4 +100,11 @@ public static GeoDistanceSortBuilder geoDistanceSort(String fieldName, GeoPoint. public static GeoDistanceSortBuilder geoDistanceSort(String fieldName, String... geohashes) { return new GeoDistanceSortBuilder(fieldName, geohashes); } + + /** + * Constructs a new shard‐doc tiebreaker sort. + */ + public static ShardDocSortBuilder shardDocSort() { + return new ShardDocSortBuilder(); + } } diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java index acda1445bacbb..23a84737ec5df 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java @@ -50,6 +50,10 @@ import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.fetch.subphase.FetchSourceContext; import org.opensearch.search.rescore.QueryRescorerBuilder; +import org.opensearch.search.sort.FieldSortBuilder; +import org.opensearch.search.sort.ShardDocSortBuilder; +import org.opensearch.search.sort.SortBuilders; +import org.opensearch.search.sort.SortOrder; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; import org.opensearch.test.rest.FakeRestRequest; @@ -63,6 +67,7 @@ import static org.opensearch.action.search.SearchType.DFS_QUERY_THEN_FETCH; import static org.opensearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasItems; import static org.mockito.Mockito.mock; public class SearchRequestTests extends AbstractSearchTestCase { @@ -238,6 +243,146 @@ public void testValidate() throws IOException { assertEquals(1, validationErrors.validationErrors().size()); assertEquals("using [point in time] is not allowed in a scroll context", validationErrors.validationErrors().get(0)); } + { + // _shard_doc without PIT -> reject + SearchRequest searchRequest = new SearchRequest().source(new SearchSourceBuilder().sort(SortBuilders.shardDocSort())); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertEquals(1, e.validationErrors().size()); + assertEquals( + "_shard_doc is only supported with point-in-time (PIT). Add a PIT or remove _shard_doc.", + e.validationErrors().get(0) + ); + } + { + // _shard_doc with scroll -> reject (even if PIT is present, scroll is illegal) + SearchRequest searchRequest = new SearchRequest().source( + // include PIT to mirror real usage; scroll + PIT is already invalid, but we assert shard_doc+scroll error is present + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")).sort(SortBuilders.shardDocSort()) + ).scroll(TimeValue.timeValueSeconds(30)); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertTrue( + "Expected shard_doc + scroll error", + e.validationErrors().contains("_shard_doc cannot be used with scroll. Use PIT + search_after instead.") + ); + } + { + // Smuggled as FieldSortBuilder("_shard_doc") without PIT -> reject + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().sort(new FieldSortBuilder(ShardDocSortBuilder.NAME)) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertEquals(1, e.validationErrors().size()); + assertEquals( + "_shard_doc is only supported with point-in-time (PIT). Add a PIT or remove _shard_doc.", + e.validationErrors().get(0) + ); + } + { + // Duplicate _shard_doc with PIT -> reject + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")) + .sort(SortBuilders.shardDocSort()) // first + .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)) // second + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertEquals(1, e.validationErrors().size()); + assertEquals("duplicate _shard_doc sort detected. Specify it at most once.", e.validationErrors().get(0)); + } + { + // Duplicate detection insensitive to order differences (ASC + DESC is still duplicate) + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertEquals(1, e.validationErrors().size()); + assertEquals("duplicate _shard_doc sort detected. Specify it at most once.", e.validationErrors().get(0)); + } + { + // Duplicate via mixed builders: ShardDocSortBuilder + FieldSortBuilder("_shard_doc") -> reject + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")) + .sort(SortBuilders.shardDocSort()) + .sort(new FieldSortBuilder(ShardDocSortBuilder.NAME)) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertEquals(1, e.validationErrors().size()); + assertEquals("duplicate _shard_doc sort detected. Specify it at most once.", e.validationErrors().get(0)); + } + { + // _shard_doc + search_after but NO PIT -> reject (explicitly exercising this combo) + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().sort(SortBuilders.shardDocSort()).searchAfter(new Object[] { 1L }) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertEquals(1, e.validationErrors().size()); + assertEquals( + "_shard_doc is only supported with point-in-time (PIT). Add a PIT or remove _shard_doc.", + e.validationErrors().get(0) + ); + } + { + // Error aggregation with SCROLL: collect multiple errors including shard_doc+scroll + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")) // PIT + scroll already illegal + .sort(SortBuilders.shardDocSort()) // shard_doc + scroll = illegal + .from(10) // also illegal with scroll + .size(0) // also illegal with scroll + ).scroll(TimeValue.timeValueSeconds(30)); + + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertThat( + e.validationErrors(), + hasItems( + "using [point in time] is not allowed in a scroll context", + "_shard_doc cannot be used with scroll. Use PIT + search_after instead.", + "using [from] is not allowed in a scroll context", + "[size] cannot be [0] in a scroll context" + ) + ); + } + { + // PIT present + search_after but NO _shard_doc -> valid (control for PIT/search_after) + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")) + .sort("_id", SortOrder.ASC) + .searchAfter(new Object[] { "x" }) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNull(e); + } + { + // FieldSortBuilder("_shard_doc") WITH PIT -> valid (object-style declared as FieldSort) + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")) + .sort(new FieldSortBuilder(ShardDocSortBuilder.NAME).order(SortOrder.ASC)) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNull(e); + } + { + // Good: PIT + _shard_doc -> valid + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")).sort(SortBuilders.shardDocSort()) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNull("PIT + _shard_doc should be valid", e); + } + { + // Control: no PIT, no _shard_doc -> valid + SearchRequest searchRequest = new SearchRequest().source(new SearchSourceBuilder().sort("_id", SortOrder.ASC)); + ActionRequestValidationException e = searchRequest.validate(); + assertNull(e); + } } public void testCopyConstructor() throws IOException { diff --git a/server/src/test/java/org/opensearch/search/sort/ShardDocSortBuilderTests.java b/server/src/test/java/org/opensearch/search/sort/ShardDocSortBuilderTests.java new file mode 100644 index 0000000000000..7f026f249f8f4 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/sort/ShardDocSortBuilderTests.java @@ -0,0 +1,120 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.sort; + +import org.apache.lucene.search.SortField; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParseException; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.search.DocValueFormat; +import org.opensearch.search.builder.SearchSourceBuilder; + +import java.io.IOException; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.instanceOf; + +public class ShardDocSortBuilderTests extends AbstractSortTestCase { + + @Override + protected ShardDocSortBuilder createTestItem() { + ShardDocSortBuilder b = new ShardDocSortBuilder(); + if (randomBoolean()) { + b.order(randomFrom(SortOrder.values())); + } + return b; + } + + @Override + protected ShardDocSortBuilder mutate(ShardDocSortBuilder original) throws IOException { + // only mutates order; builder is intentionally tiny + ShardDocSortBuilder copy = new ShardDocSortBuilder(original); + copy.order(original.order() == SortOrder.ASC ? SortOrder.DESC : SortOrder.ASC); + return copy; + } + + @Override + protected void sortFieldAssertions(ShardDocSortBuilder builder, SortField sortField, DocValueFormat format) throws IOException { + assertEquals(SortField.Type.CUSTOM, sortField.getType()); + assertEquals(builder.order() == SortOrder.DESC, sortField.getReverse()); + assertEquals(DocValueFormat.RAW, format); + } + + @Override + protected ShardDocSortBuilder fromXContent(XContentParser parser, String fieldName) throws IOException { + return ShardDocSortBuilder.fromXContent(parser, fieldName); + } + + public void testParseScalarAndObject() throws IOException { + String json = " [ { \"_shard_doc\": { \"order\": \"desc\" } } ] "; + XContentParser parser = createParser(JsonXContent.jsonXContent, json); + parser.nextToken(); + SortBuilder sb = SortBuilder.fromXContent(parser).get(0); + assertThat(sb, instanceOf(ShardDocSortBuilder.class)); + assertEquals(SortOrder.DESC, sb.order()); + + json = " [ { \"_shard_doc\": { \"order\": \"asc\" } } ] "; + parser = createParser(JsonXContent.jsonXContent, json); + parser.nextToken(); + sb = SortBuilder.fromXContent(parser).get(0); + assertThat(sb, instanceOf(ShardDocSortBuilder.class)); + assertEquals(SortOrder.ASC, sb.order()); + + json = " [ \"_shard_doc\" ] "; // default to asc + parser = createParser(JsonXContent.jsonXContent, json); + parser.nextToken(); + sb = SortBuilder.fromXContent(parser).get(0); + assertThat(sb, instanceOf(ShardDocSortBuilder.class)); + assertEquals(SortOrder.ASC, sb.order()); + + // from ShardDocSortBuilder + json = "{ \"_shard_doc\": { \"order\": \"desc\" } }"; + try (XContentParser p = createParser(JsonXContent.jsonXContent, json)) { + p.nextToken(); + p.nextToken(); + p.nextToken(); + ShardDocSortBuilder b = ShardDocSortBuilder.fromXContent(p, "_shard_doc"); + assertEquals(SortOrder.DESC, b.order()); + } + } + + public void testUnknownOptionFails() throws IOException { + String json = "{ \"_shard_doc\": { \"reverse\": true } }"; + try (XContentParser p = createParser(JsonXContent.jsonXContent, json)) { + p.nextToken(); + p.nextToken(); + p.nextToken(); + XContentParseException e = expectThrows(XContentParseException.class, () -> ShardDocSortBuilder.fromXContent(p, "_shard_doc")); + assertThat(e.getMessage(), containsString("unknown field [reverse]")); + } + } + + public void testXContentRoundTripAndSerialization() throws IOException { + ShardDocSortBuilder original = new ShardDocSortBuilder().order(SortOrder.DESC); + // XContent round-trip + XContentBuilder builder = JsonXContent.contentBuilder(); + original.toXContent(builder, ToXContent.EMPTY_PARAMS); + String rendered = builder.toString(); + SearchSourceBuilder ssb = SearchSourceBuilder.fromXContent( + createParser(JsonXContent.jsonXContent, "{ \"sort\": [" + rendered + "] }") + ); + SortBuilder parsed = ssb.sorts().get(0); + assertEquals(original, parsed); + assertEquals(original.hashCode(), parsed.hashCode()); + + // Stream serialization + BytesStreamOutput out = new BytesStreamOutput(); + original.writeTo(out); + ShardDocSortBuilder restored = new ShardDocSortBuilder(out.bytes().streamInput()); + assertEquals(original, restored); + } +}