Skip to content

Commit 5a1ff2c

Browse files
Xingbo Wangfacebook-github-bot
authored andcommitted
Force caller to pass comparator in MultiScanArgs (#13970)
Summary: Force caller of MultiScanArgs to pass comparator. Pass comparator from CF handle to MultiScanArgs in NewMultiScan. Expand MultiScanArgs unit test with different comparator. Pull Request resolved: #13970 Test Plan: unit test Reviewed By: cbi42 Differential Revision: D82739270 Pulled By: xingbowang fbshipit-source-id: e709f4a333ad547c0ba6d24d8fb2b22e50e8a12f
1 parent 6a202c5 commit 5a1ff2c

File tree

9 files changed

+94
-46
lines changed

9 files changed

+94
-46
lines changed

db/version_set.cc

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,6 +1101,27 @@ class LevelIterator final : public InternalIterator {
11011101
read_seq_ = read_seq;
11021102
}
11031103

1104+
inline bool FileHasMultiScanArg(size_t file_index) {
1105+
if (file_to_scan_opts_.get()) {
1106+
auto it = file_to_scan_opts_->find(file_index);
1107+
if (it != file_to_scan_opts_->end()) {
1108+
return !it->second.empty();
1109+
}
1110+
}
1111+
return false;
1112+
}
1113+
1114+
MultiScanArgs& GetMultiScanArgForFile(size_t file_index) {
1115+
auto multi_scan_args_it = file_to_scan_opts_->find(file_index);
1116+
if (multi_scan_args_it == file_to_scan_opts_->end()) {
1117+
auto ret = file_to_scan_opts_->emplace(
1118+
file_index, MultiScanArgs(user_comparator_.user_comparator()));
1119+
multi_scan_args_it = ret.first;
1120+
assert(ret.second);
1121+
}
1122+
return multi_scan_args_it->second;
1123+
}
1124+
11041125
void Prepare(const MultiScanArgs* so) override {
11051126
// We assume here that scan_opts is sorted such that
11061127
// scan_opts[0].range.start < scan_opts[1].range.start, and non overlapping
@@ -1109,6 +1130,9 @@ class LevelIterator final : public InternalIterator {
11091130
}
11101131
scan_opts_ = so;
11111132

1133+
// Verify comparator is consistent
1134+
assert(so->GetComparator() == user_comparator_.user_comparator());
1135+
11121136
file_to_scan_opts_ = std::make_unique<ScanOptionsMap>();
11131137
for (size_t k = 0; k < scan_opts_->size(); k++) {
11141138
const ScanOptions& opt = scan_opts_->GetScanRanges().at(k);
@@ -1157,8 +1181,8 @@ class LevelIterator final : public InternalIterator {
11571181
// 3. [ S ] ...... [ E ]
11581182
for (auto i = fstart; i <= fend; i++) {
11591183
if (i < flevel_->num_files) {
1160-
(*file_to_scan_opts_)[i].insert(start.value(), end.value(),
1161-
opt.property_bag);
1184+
auto args = GetMultiScanArgForFile(i);
1185+
args.insert(start.value(), end.value(), opt.property_bag);
11621186
}
11631187
}
11641188
}
@@ -1562,9 +1586,10 @@ bool LevelIterator::SkipEmptyFileForward() {
15621586
if (file_iter_.iter() != nullptr) {
15631587
// If we are doing prepared scan opts then we should seek to the values
15641588
// specified by the scan opts
1565-
if (scan_opts_ && (*file_to_scan_opts_)[file_index_].size()) {
1589+
1590+
if (scan_opts_ && FileHasMultiScanArg(file_index_)) {
15661591
const ScanOptions& opts =
1567-
file_to_scan_opts_->at(file_index_).GetScanRanges().front();
1592+
GetMultiScanArgForFile(file_index_).GetScanRanges().front();
15681593
if (opts.range.start.has_value()) {
15691594
InternalKey target(*opts.range.start.AsPtr(), kMaxSequenceNumber,
15701595
kValueTypeForSeek);
@@ -1621,9 +1646,8 @@ void LevelIterator::SetFileIterator(InternalIterator* iter) {
16211646

16221647
InternalIterator* old_iter = file_iter_.Set(iter);
16231648
if (iter && scan_opts_) {
1624-
if (file_to_scan_opts_.get() &&
1625-
file_to_scan_opts_->find(file_index_) != file_to_scan_opts_->end()) {
1626-
const MultiScanArgs& new_opts = file_to_scan_opts_->at(file_index_);
1649+
if (FileHasMultiScanArg(file_index_)) {
1650+
const MultiScanArgs& new_opts = GetMultiScanArgForFile(file_index_);
16271651
file_iter_.Prepare(&new_opts);
16281652
} else {
16291653
file_iter_.Prepare(scan_opts_);

db_stress_tool/db_stress_test_base.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1692,7 +1692,8 @@ Status StressTest::TestMultiScan(ThreadState* thread,
16921692

16931693
std::vector<std::string> start_key_strs;
16941694
std::vector<std::string> end_key_strs;
1695-
MultiScanArgs scan_opts;
1695+
// TODO support reverse BytewiseComparator in the stress test
1696+
MultiScanArgs scan_opts(BytewiseComparator());
16961697
scan_opts.use_async_io = FLAGS_multiscan_use_async_io;
16971698
start_key_strs.reserve(num_scans);
16981699
end_key_strs.reserve(num_scans);

include/rocksdb/db.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,11 +1118,11 @@ class DB {
11181118
// // Check ex.what()
11191119
// }
11201120
virtual std::unique_ptr<MultiScan> NewMultiScan(
1121-
const ReadOptions& /*options*/, ColumnFamilyHandle* /*column_family*/,
1121+
const ReadOptions& /*options*/, ColumnFamilyHandle* column_family,
11221122
const MultiScanArgs& /*scan_opts*/) {
11231123
std::unique_ptr<Iterator> iter(NewErrorIterator(Status::NotSupported()));
1124-
std::unique_ptr<MultiScan> ms_iter =
1125-
std::make_unique<MultiScan>(std::move(iter));
1124+
std::unique_ptr<MultiScan> ms_iter = std::make_unique<MultiScan>(
1125+
column_family->GetComparator(), std::move(iter));
11261126
return ms_iter;
11271127
}
11281128

include/rocksdb/multi_scan.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,9 @@ class MultiScan {
155155
MultiScan(const ReadOptions& read_options, const MultiScanArgs& scan_opts,
156156
DB* db, ColumnFamilyHandle* cfh);
157157

158-
explicit MultiScan(std::unique_ptr<Iterator>&& db_iter)
159-
: db_iter_(std::move(db_iter)) {}
158+
explicit MultiScan(const Comparator* comp,
159+
std::unique_ptr<Iterator>&& db_iter)
160+
: scan_opts_(comp), db_iter_(std::move(db_iter)) {}
160161

161162
class MultiScanIterator {
162163
public:

include/rocksdb/options.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,8 +1782,7 @@ struct ScanOptions {
17821782
class MultiScanArgs {
17831783
public:
17841784
// Constructor that takes a comparator
1785-
explicit MultiScanArgs(const Comparator* comparator = BytewiseComparator())
1786-
: comp_(comparator) {}
1785+
explicit MultiScanArgs(const Comparator* comparator) : comp_(comparator) {}
17871786

17881787
// Copy Constructor
17891788
MultiScanArgs(const MultiScanArgs& other) {
@@ -1855,6 +1854,8 @@ class MultiScanArgs {
18551854
return original_ranges_;
18561855
}
18571856

1857+
const Comparator* GetComparator() const { return comp_; }
1858+
18581859
uint64_t io_coalesce_threshold = 16 << 10; // 16KB by default
18591860

18601861
// Maximum size (in bytes) for the data blocks loaded by a MultiScan.

table/block_based/block_based_table_reader_test.cc

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ class BlockBasedTableReaderBaseTest : public testing::Test {
5252
// user defined timestamps and different sequence number to differentiate them
5353
static std::vector<std::pair<std::string, std::string>> GenerateKVMap(
5454
int num_block = 2, bool mixed_with_human_readable_string_value = false,
55-
size_t ts_sz = 0, bool same_key_diff_ts = false) {
55+
size_t ts_sz = 0, bool same_key_diff_ts = false,
56+
const Comparator* comparator = BytewiseComparator()) {
5657
std::vector<std::pair<std::string, std::string>> kv;
5758

5859
SequenceNumber seq_no = 0;
@@ -100,6 +101,10 @@ class BlockBasedTableReaderBaseTest : public testing::Test {
100101
}
101102
}
102103
}
104+
auto comparator_name = std::string(comparator->Name());
105+
if (comparator_name.find("Reverse") != std::string::npos) {
106+
std::reverse(kv.begin(), kv.end());
107+
}
103108
return kv;
104109
}
105110

@@ -128,6 +133,7 @@ class BlockBasedTableReaderBaseTest : public testing::Test {
128133

129134
InternalKeyComparator comparator(ioptions.user_comparator);
130135
ColumnFamilyOptions cf_options;
136+
cf_options.comparator = ioptions.user_comparator;
131137
cf_options.prefix_extractor = options_.prefix_extractor;
132138
MutableCFOptions moptions(cf_options);
133139
CompressionOptions compression_opts;
@@ -255,11 +261,13 @@ class BlockBasedTableReaderBaseTest : public testing::Test {
255261
// generate keys with different user provided key, same user-defined
256262
// timestamps (if udt enabled), same sequence number. This test mode is
257263
// used for testing `Get`, `MultiGet`, and `NewIterator`.
264+
// Param 9: test both the default comparator and a reverse comparator.
258265
class BlockBasedTableReaderTest
259266
: public BlockBasedTableReaderBaseTest,
260-
public testing::WithParamInterface<std::tuple<
261-
CompressionType, bool, BlockBasedTableOptions::IndexType, bool,
262-
test::UserDefinedTimestampTestMode, uint32_t, uint32_t, bool>> {
267+
public testing::WithParamInterface<
268+
std::tuple<CompressionType, bool, BlockBasedTableOptions::IndexType,
269+
bool, test::UserDefinedTimestampTestMode, uint32_t,
270+
uint32_t, bool, const Comparator*>> {
263271
protected:
264272
void SetUp() override {
265273
compression_type_ = std::get<0>(GetParam());
@@ -270,6 +278,7 @@ class BlockBasedTableReaderTest
270278
compression_parallel_threads_ = std::get<5>(GetParam());
271279
compression_dict_bytes_ = std::get<6>(GetParam());
272280
same_key_diff_ts_ = std::get<7>(GetParam());
281+
comparator_ = std::get<8>(GetParam());
273282
BlockBasedTableReaderBaseTest::SetUp();
274283
}
275284

@@ -295,6 +304,7 @@ class BlockBasedTableReaderTest
295304
uint32_t compression_parallel_threads_;
296305
uint32_t compression_dict_bytes_;
297306
bool same_key_diff_ts_;
307+
const Comparator* comparator_{};
298308
};
299309

300310
class BlockBasedTableReaderGetTest : public BlockBasedTableReaderTest {};
@@ -1022,14 +1032,16 @@ TEST_P(BlockBasedTableReaderTest, MultiScanPrepare) {
10221032
SCOPED_TRACE(std::string("use_async_io=") + std::to_string(use_async_io));
10231033
Options options;
10241034
options.statistics = CreateDBStatistics();
1035+
options.comparator = comparator_;
10251036
std::shared_ptr<FileSystem> fs = options.env->GetFileSystem();
10261037
ReadOptions read_opts;
10271038
read_opts.fill_cache = fill_cache;
10281039
size_t ts_sz = options.comparator->timestamp_size();
10291040
std::vector<std::pair<std::string, std::string>> kv =
10301041
BlockBasedTableReaderBaseTest::GenerateKVMap(
10311042
100 /* num_block */,
1032-
true /* mixed_with_human_readable_string_value */, ts_sz);
1043+
true /* mixed_with_human_readable_string_value */, ts_sz,
1044+
same_key_diff_ts_, comparator_);
10331045
std::string table_name = "BlockBasedTableReaderTest_NewIterator" +
10341046
CompressionTypeToString(compression_type_) +
10351047
"_async" + std::to_string(use_async_io);
@@ -1052,7 +1064,7 @@ TEST_P(BlockBasedTableReaderTest, MultiScanPrepare) {
10521064
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
10531065
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
10541066

1055-
MultiScanArgs scan_options(BytewiseComparator());
1067+
MultiScanArgs scan_options(comparator_);
10561068
scan_options.use_async_io = use_async_io;
10571069
scan_options.insert(ExtractUserKey(kv[0].first),
10581070
ExtractUserKey(kv[kEntriesPerBlock].first));
@@ -1087,7 +1099,7 @@ TEST_P(BlockBasedTableReaderTest, MultiScanPrepare) {
10871099
iter.reset(table->NewIterator(
10881100
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
10891101
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
1090-
scan_options = MultiScanArgs(BytewiseComparator());
1102+
scan_options = MultiScanArgs(comparator_);
10911103
scan_options.insert(ExtractUserKey(kv[70 * kEntriesPerBlock].first),
10921104
ExtractUserKey(kv[75 * kEntriesPerBlock].first));
10931105
scan_options.insert(ExtractUserKey(kv[90 * kEntriesPerBlock].first),
@@ -1125,7 +1137,7 @@ TEST_P(BlockBasedTableReaderTest, MultiScanPrepare) {
11251137
// From reads above, blocks 70-75 and 90-95 already in cache
11261138
// So we should read 50-70 76-89 96-99 in three I/Os.
11271139
// If fill_cache is false, then we'll do one giant I/O.
1128-
scan_options = MultiScanArgs(BytewiseComparator());
1140+
scan_options = MultiScanArgs(comparator_);
11291141
scan_options.use_async_io = use_async_io;
11301142
scan_options.insert(ExtractUserKey(kv[50 * kEntriesPerBlock].first));
11311143
read_count_before =
@@ -1165,7 +1177,7 @@ TEST_P(BlockBasedTableReaderTest, MultiScanPrepare) {
11651177
iter.reset(table->NewIterator(
11661178
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
11671179
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
1168-
scan_options = MultiScanArgs(BytewiseComparator());
1180+
scan_options = MultiScanArgs(comparator_);
11691181
scan_options.use_async_io = use_async_io;
11701182
scan_options.insert(ExtractUserKey(kv[10 * kEntriesPerBlock].first),
11711183
ExtractUserKey(kv[20 * kEntriesPerBlock].first));
@@ -1195,7 +1207,7 @@ TEST_P(BlockBasedTableReaderTest, MultiScanPrepare) {
11951207
iter.reset(table->NewIterator(
11961208
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
11971209
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
1198-
scan_options = MultiScanArgs(BytewiseComparator());
1210+
scan_options = MultiScanArgs(comparator_);
11991211
scan_options.use_async_io = use_async_io;
12001212
scan_options.insert(ExtractUserKey(kv[10 * kEntriesPerBlock].first));
12011213
scan_options.insert(ExtractUserKey(kv[11 * kEntriesPerBlock].first));
@@ -1226,14 +1238,15 @@ TEST_P(BlockBasedTableReaderTest, MultiScanPrefetchSizeLimit) {
12261238
return;
12271239
}
12281240
Options options;
1241+
options.comparator = comparator_;
12291242
ReadOptions read_opts;
12301243
size_t ts_sz = options.comparator->timestamp_size();
12311244

12321245
// Generate data that spans multiple blocks
12331246
std::vector<std::pair<std::string, std::string>> kv =
12341247
BlockBasedTableReaderBaseTest::GenerateKVMap(
12351248
20 /* num_block */, true /* mixed_with_human_readable_string_value */,
1236-
ts_sz);
1249+
ts_sz, same_key_diff_ts_, comparator_);
12371250

12381251
std::string table_name = "BlockBasedTableReaderTest_PrefetchSizeLimit" +
12391252
CompressionTypeToString(compression_type_);
@@ -1259,7 +1272,7 @@ TEST_P(BlockBasedTableReaderTest, MultiScanPrefetchSizeLimit) {
12591272
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
12601273
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
12611274

1262-
MultiScanArgs scan_options(BytewiseComparator());
1275+
MultiScanArgs scan_options(comparator_);
12631276
scan_options.max_prefetch_size = 1024; // less than block size
12641277
scan_options.insert(ExtractUserKey(kv[0].first),
12651278
ExtractUserKey(kv[5].first));
@@ -1279,7 +1292,7 @@ TEST_P(BlockBasedTableReaderTest, MultiScanPrefetchSizeLimit) {
12791292
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
12801293
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
12811294

1282-
MultiScanArgs scan_options(BytewiseComparator());
1295+
MultiScanArgs scan_options(comparator_);
12831296
scan_options.max_prefetch_size = 9 * 1024; // 9KB - 2 blocks with buffer
12841297
scan_options.insert(ExtractUserKey(kv[1 * kEntriesPerBlock].first),
12851298
ExtractUserKey(kv[8 * kEntriesPerBlock].first));
@@ -1310,7 +1323,7 @@ TEST_P(BlockBasedTableReaderTest, MultiScanPrefetchSizeLimit) {
13101323
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
13111324
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
13121325

1313-
MultiScanArgs scan_options(BytewiseComparator());
1326+
MultiScanArgs scan_options(comparator_);
13141327
scan_options.max_prefetch_size = 3 * 4 * 1024 + 1024; // 3 blocks + 1KB
13151328
scan_options.insert(ExtractUserKey(kv[0].first),
13161329
ExtractUserKey(kv[5 * kEntriesPerBlock].first));
@@ -1336,7 +1349,7 @@ TEST_P(BlockBasedTableReaderTest, MultiScanPrefetchSizeLimit) {
13361349
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
13371350
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
13381351

1339-
MultiScanArgs scan_options(BytewiseComparator());
1352+
MultiScanArgs scan_options(comparator_);
13401353
scan_options.max_prefetch_size = 5 * 4 * 1024 + 1024; // 5 blocks + 1KB
13411354
// Will read 5 entries from first scan range, and 4 blocks from the second
13421355
// scan range
@@ -1373,7 +1386,7 @@ TEST_P(BlockBasedTableReaderTest, MultiScanPrefetchSizeLimit) {
13731386
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
13741387
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
13751388

1376-
MultiScanArgs scan_options(BytewiseComparator());
1389+
MultiScanArgs scan_options(comparator_);
13771390
scan_options.max_prefetch_size = 10 * 1024 * 1024; // 10MB
13781391
scan_options.insert(ExtractUserKey(kv[0].first),
13791392
ExtractUserKey(kv[5].first));
@@ -1440,7 +1453,8 @@ INSTANTIATE_TEST_CASE_P(
14401453
BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey),
14411454
::testing::Values(false), ::testing::ValuesIn(test::GetUDTTestModes()),
14421455
::testing::Values(1, 2), ::testing::Values(0, 4096),
1443-
::testing::Values(false)));
1456+
::testing::Values(false),
1457+
::testing::Values(BytewiseComparator(), ReverseBytewiseComparator())));
14441458
INSTANTIATE_TEST_CASE_P(
14451459
BlockBasedTableReaderGetTest, BlockBasedTableReaderGetTest,
14461460
::testing::Combine(
@@ -1452,7 +1466,8 @@ INSTANTIATE_TEST_CASE_P(
14521466
BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey),
14531467
::testing::Values(false), ::testing::ValuesIn(test::GetUDTTestModes()),
14541468
::testing::Values(1, 2), ::testing::Values(0, 4096),
1455-
::testing::Values(false, true)));
1469+
::testing::Values(false, true),
1470+
::testing::Values(BytewiseComparator(), ReverseBytewiseComparator())));
14561471
INSTANTIATE_TEST_CASE_P(
14571472
StrictCapacityLimitReaderTest, StrictCapacityLimitReaderTest,
14581473
::testing::Combine(
@@ -1461,7 +1476,8 @@ INSTANTIATE_TEST_CASE_P(
14611476
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch),
14621477
::testing::Values(false), ::testing::ValuesIn(test::GetUDTTestModes()),
14631478
::testing::Values(1, 2), ::testing::Values(0),
1464-
::testing::Values(false, true)));
1479+
::testing::Values(false, true),
1480+
::testing::Values(BytewiseComparator(), ReverseBytewiseComparator())));
14651481
INSTANTIATE_TEST_CASE_P(
14661482
VerifyChecksum, BlockBasedTableReaderTestVerifyChecksum,
14671483
::testing::Combine(
@@ -1470,8 +1486,8 @@ INSTANTIATE_TEST_CASE_P(
14701486
::testing::Values(
14711487
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch),
14721488
::testing::Values(true), ::testing::ValuesIn(test::GetUDTTestModes()),
1473-
::testing::Values(1, 2), ::testing::Values(0),
1474-
::testing::Values(false)));
1489+
::testing::Values(1, 2), ::testing::Values(0), ::testing::Values(false),
1490+
::testing::Values(BytewiseComparator(), ReverseBytewiseComparator())));
14751491

14761492
} // namespace ROCKSDB_NAMESPACE
14771493

table/table_test.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8056,7 +8056,7 @@ TEST_F(UserDefinedIndexTest, IngestTest) {
80568056
ro.iterate_upper_bound = nullptr;
80578057
iter.reset(db->NewIterator(ro, cfh));
80588058
ASSERT_NE(iter, nullptr);
8059-
MultiScanArgs scan_opts;
8059+
MultiScanArgs scan_opts(options.comparator);
80608060
std::unordered_map<std::string, std::string> property_bag;
80618061
property_bag["count"] = std::to_string(25);
80628062
scan_opts.insert(Slice("key20"), std::optional(property_bag));
@@ -8146,7 +8146,7 @@ TEST_F(UserDefinedIndexTest, EmptyRangeTest) {
81468146

81478147
ro.table_index_factory = user_defined_index_factory.get();
81488148
std::vector<int> key_counts;
8149-
MultiScanArgs scan_opts;
8149+
MultiScanArgs scan_opts(options.comparator);
81508150
std::unordered_map<std::string, std::string> property_bag;
81518151
property_bag["count"] = std::to_string(5);
81528152
// Empty scans
@@ -8405,7 +8405,7 @@ TEST_F(UserDefinedIndexTest, ConfigTest) {
84058405
ro.table_index_factory = user_defined_index_factory.get();
84068406
std::unique_ptr<Iterator> iter(db->NewIterator(ro, cfh));
84078407
ASSERT_NE(iter, nullptr);
8408-
MultiScanArgs scan_opts;
8408+
MultiScanArgs scan_opts(options.comparator);
84098409
std::unordered_map<std::string, std::string> property_bag;
84108410
property_bag["count"] = std::to_string(25);
84118411
scan_opts.insert(Slice("key20"), std::optional(property_bag));

0 commit comments

Comments
 (0)