Skip to content

Commit

Permalink
Add reproducer for pagination skipping bug
Browse files Browse the repository at this point in the history
Signed-off-by: Simeon Widdis <[email protected]>
  • Loading branch information
Swiddis committed Oct 8, 2024
1 parent ac8678c commit e143507
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,13 @@ public enum Index {
"calcs",
getMappingFile("calcs_index_mappings.json"),
"src/test/resources/calcs.json"),
// Calcs has enough records for shards to be interesting, but updating the existing mapping with shards in-place
// breaks existing tests. Aside from introducing a primary shard setting > 1, this index is identical to CALCS.
CALCS_WITH_SHARDS(
TestsConstants.TEST_INDEX_CALCS,
"calcs",
getMappingFile("calcs_with_shards_index_mappings.json"),
"src/test/resources/calcs.json"),
DATE_FORMATS(
TestsConstants.TEST_INDEX_DATE_FORMATS,
"date_formats",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,23 @@

package org.opensearch.sql.sql;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PHRASE;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.json.JSONObject;
import org.junit.After;
import org.junit.Test;
import org.opensearch.client.ResponseException;
import org.opensearch.sql.legacy.SQLIntegTestCase;

import static org.opensearch.sql.legacy.TestsConstants.*;

public class PaginationWindowIT extends SQLIntegTestCase {
@Override
public void init() throws IOException {
loadIndex(Index.PHRASE);
loadIndex(Index.CALCS_WITH_SHARDS);
}

@After
Expand Down Expand Up @@ -92,4 +96,39 @@ public void testFetchSizeLargerThanResultWindowFails() throws IOException {
() -> executeQueryTemplate("SELECT * FROM %s", TEST_INDEX_PHRASE, window + 1));
resetMaxResultWindow(TEST_INDEX_PHRASE);
}

@Test
public void testMultiShardPagesEqualsActualData() throws IOException {
// A bug made it so when pulling unordered data from an index with multiple shards, data gets lost if the fetchSize
// is not a multiple of the shard count. This tests that, for an index with 4 shards, pulling one page of 10 records
// is equivalent to pulling two pages of 5 records.

var query = "SELECT key from " + TEST_INDEX_CALCS;

var expectedResponse = new JSONObject(executeFetchQuery(query, 10, "jdbc"));
var expectedRows = expectedResponse.getJSONArray("datarows");

List<String> expectedKeys = new ArrayList<>();
for (int i = 0; i < expectedRows.length(); i++) {
expectedKeys.add(expectedRows.getJSONArray(i).getString(0));
}

var actualPage1 = new JSONObject(executeFetchQuery(query, 5, "jdbc"));

var actualRows1 = actualPage1.getJSONArray("datarows");
var cursor = actualPage1.getString("cursor");
var actualPage2 = executeCursorQuery(cursor);

var actualRows2 = actualPage2.getJSONArray("datarows");

List<String> actualKeys = new ArrayList<>();
for (int i = 0; i < actualRows1.length(); i++) {
actualKeys.add(actualRows1.getJSONArray(i).getString(0));
}
for (int i = 0; i < actualRows2.length(); i++) {
actualKeys.add(actualRows2.getJSONArray(i).getString(0));
}

assertEquals(expectedKeys, actualKeys);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
{
"mappings" : {
"properties" : {
"key" : {
"type" : "keyword"
},
"num0" : {
"type" : "double"
},
"num1" : {
"type" : "double"
},
"num2" : {
"type" : "double"
},
"num3" : {
"type" : "double"
},
"num4" : {
"type" : "double"
},
"str0" : {
"type" : "keyword"
},
"str1" : {
"type" : "keyword"
},
"str2" : {
"type" : "keyword"
},
"str3" : {
"type" : "keyword"
},
"int0" : {
"type" : "integer"
},
"int1" : {
"type" : "integer"
},
"int2" : {
"type" : "integer"
},
"int3" : {
"type" : "integer"
},
"bool0" : {
"type" : "boolean"
},
"bool1" : {
"type" : "boolean"
},
"bool2" : {
"type" : "boolean"
},
"bool3" : {
"type" : "boolean"
},
"date0" : {
"type" : "date",
"format": "year_month_day"
},
"date1" : {
"type" : "date",
"format": "year_month_day"
},
"date2" : {
"type" : "date",
"format": "year_month_day"
},
"date3" : {
"type" : "date",
"format": "year_month_day"
},
"time0" : {
"type" : "date",
"format": "date_time_no_millis"
},
"time1" : {
"type" : "date",
"format": "hour_minute_second"
},
"datetime0" : {
"type" : "date",
"format": "date_time_no_millis"
},
"datetime1" : {
"type" : "date"
},
"zzz" : {
"type" : "keyword"
}
}
},
"settings": {
"index": {
"number_of_shards": 4
}
}
}

0 comments on commit e143507

Please sign in to comment.