Skip to content

Commit 2620c85

Browse files
cbi42facebook-github-bot
authored andcommitted
Support async IO for MultiScan (#13932)
Summary: add option MultiScanArgs::use_async_io option and implementation for using ReadAsync() for multiscan. Read requests are submitted during Prepare() and polled during actual scanning. Pull Request resolved: #13932 Test Plan: - updated existing unit test to use async_io. - crash test: `python3 -u ./tools/db_crashtest.py whitebox --iterpercent=60 --prefix_size=-1 --prefixpercent=0 --readpercent=0 --test_batches_snapshots=0 --use_multiscan=1 --read_fault_one_in=0 --kill_random_test=88888 --interval=60 --multiscan_use_async_io=1 --mmap_read=0` Benchmark: - Default multiscan benchmark: ``` Set up: /db_bench --benchmarks="fillseq,compact" --disable_wal=1 --threads=1 --num_levels=1 --compaction_style=2 --fifo_compaction_max_table_files_size_mb=1000 --write_buffer_size=268435456 Without async IO: ./db_bench --db="/tmp/rocksdbtest-543376/dbbench" --use_existing_db=1 --benchmarks=multiscan --disable_auto_compactions=1 --seek_nexts=100 --threads=32 --duration=10 --statistics=1 --use_direct_reads=1 --multiscan_use_async_io=0 multiscan : 415.569 micros/op 75805 ops/sec 10.355 seconds 784968 operations; (multscans:24999) rocksdb.read.async.micros COUNT : 0 With asycn IO: ./db_bench --db="/tmp/rocksdbtest-543376/dbbench" --use_existing_db=1 --benchmarks=multiscan --disable_auto_compactions=1 --seek_nexts=100 --threads=32 --duration=10 --statistics=1 --use_direct_reads=1 --multiscan_use_async_io=1 multiscan : 413.236 micros/op 76044 ops/sec 10.375 seconds 788968 operations; (multscans:24999) rocksdb.read.async.micros COUNT : 3916499 Similar performance. ``` - Larger scan, more scans per multiscan, do not coalesce IO so that async IO can progress while scanning, and use one thread: ``` multiscan_stride = 1000 multiscan_size = 100 seek_nexts = 1000 ./db_bench --db="/tmp/rocksdbtest-543376/dbbench" --use_existing_db=1 --benchmarks=multiscan --disable_auto_compactions=1 --threads=1 --duration=10 --statistics=0 --use_direct_reads=1 --cache_size=2097152 --multiscan_size=100 --multiscan_stride=1000 --seek_nexts=1000 --seed=1 --multiscan_coalesce_threshold=0 --multiscan_use_async_io=0 Without async IO: multiscan : 20495.205 micros/op 48 ops/sec 10.002 seconds 488 operations; (multscans:488) With async IO: multiscan : 18337.883 micros/op 54 ops/sec 10.013 seconds 546 operations; (multscans:546) ~10% improvement in throughput ``` Reviewed By: xingbowang Differential Revision: D82077818 Pulled By: cbi42 fbshipit-source-id: 66e32cf4039183c4841827409286dfbaa6dfbcd8
1 parent 29d9798 commit 2620c85

File tree

13 files changed

+931
-524
lines changed

13 files changed

+931
-524
lines changed

db/version_set.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,7 @@ class LevelIterator final : public InternalIterator {
11651165
// Propagate io colaescing threshold
11661166
for (auto& file_to_arg : *file_to_scan_opts_) {
11671167
file_to_arg.second.io_coalesce_threshold = so->io_coalesce_threshold;
1168+
file_to_arg.second.use_async_io = so->use_async_io;
11681169
}
11691170
}
11701171

db_stress_tool/db_stress_common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,7 @@ DECLARE_uint32(memtable_avg_op_scan_flush_trigger);
432432
DECLARE_uint32(ingest_wbwi_one_in);
433433
DECLARE_bool(universal_reduce_file_locking);
434434
DECLARE_bool(use_multiscan);
435+
DECLARE_bool(multiscan_use_async_io);
435436

436437
// Compaction deletion trigger declarations for stress testing
437438
DECLARE_bool(enable_compaction_on_deletion_trigger);

db_stress_tool/db_stress_gflags.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1535,4 +1535,7 @@ DEFINE_bool(
15351535
DEFINE_bool(use_multiscan, false,
15361536
"If set, use the batched MultiScan API for scans.");
15371537

1538+
DEFINE_bool(multiscan_use_async_io, false,
1539+
"If set, enable async_io for MultiScan operations.");
1540+
15381541
#endif // GFLAGS

db_stress_tool/db_stress_test_base.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1693,6 +1693,7 @@ Status StressTest::TestMultiScan(ThreadState* thread,
16931693
std::vector<std::string> start_key_strs;
16941694
std::vector<std::string> end_key_strs;
16951695
MultiScanArgs scan_opts;
1696+
scan_opts.use_async_io = FLAGS_multiscan_use_async_io;
16961697
start_key_strs.reserve(num_scans);
16971698
end_key_strs.reserve(num_scans);
16981699

env/fs_posix.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1153,7 +1153,7 @@ class PosixFileSystem : public FileSystem {
11531153
return IOStatus::OK();
11541154
#else
11551155
(void)io_handles;
1156-
return IOStatus::NotSupported("Poll");
1156+
return IOStatus::NotSupported("Poll not implemented");
11571157
#endif
11581158
}
11591159

file/random_access_file_reader.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,13 @@ IOStatus RandomAccessFileReader::PrepareIOOptions(const ReadOptions& ro,
497497
}
498498
}
499499

500+
// Notes for when direct_io is enabled:
501+
// Unless req.offset, req.len, req.scratch are all already aligned,
502+
// RandomAccessFileReader will creats aligned requests and aligned buffer for
503+
// the request. User should only provide either req.scratch or aligned_buf. If
504+
// only req.scratch is provided, result will be copied from allocated aligned
505+
// buffer to req.scratch. If only alignd_buf is provided, it will be set to
506+
// the ailgned buf allocated by RandomAccessFileReader and saves a copy.
500507
IOStatus RandomAccessFileReader::ReadAsync(
501508
FSReadRequest& req, const IOOptions& opts,
502509
std::function<void(FSReadRequest&, void*)> cb, void* cb_arg,

include/rocksdb/options.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1791,10 +1791,12 @@ class MultiScanArgs {
17911791
original_ranges_ = other.original_ranges_;
17921792
io_coalesce_threshold = other.io_coalesce_threshold;
17931793
max_prefetch_size = other.max_prefetch_size;
1794+
use_async_io = other.use_async_io;
17941795
}
17951796
MultiScanArgs(MultiScanArgs&& other) noexcept
17961797
: io_coalesce_threshold(other.io_coalesce_threshold),
17971798
max_prefetch_size(other.max_prefetch_size),
1799+
use_async_io(other.use_async_io),
17981800
comp_(other.comp_),
17991801
original_ranges_(std::move(other.original_ranges_)) {}
18001802

@@ -1803,6 +1805,7 @@ class MultiScanArgs {
18031805
original_ranges_ = other.original_ranges_;
18041806
io_coalesce_threshold = other.io_coalesce_threshold;
18051807
max_prefetch_size = other.max_prefetch_size;
1808+
use_async_io = other.use_async_io;
18061809
return *this;
18071810
}
18081811

@@ -1812,6 +1815,7 @@ class MultiScanArgs {
18121815
original_ranges_ = std::move(other.original_ranges_);
18131816
io_coalesce_threshold = other.io_coalesce_threshold;
18141817
max_prefetch_size = other.max_prefetch_size;
1818+
use_async_io = other.use_async_io;
18151819
}
18161820
return *this;
18171821
}
@@ -1865,6 +1869,11 @@ class MultiScanArgs {
18651869
// Note that this limit is per file and applies to compressed block size.
18661870
uint64_t max_prefetch_size = 0;
18671871

1872+
// Enable async I/O for multi-scan operations
1873+
// When true, BlockBasedTableIterator will use ReadAsync() for reading blocks
1874+
// When false, it will use synchronous MultiRead().
1875+
bool use_async_io = false;
1876+
18681877
private:
18691878
// The comparator used for ordering ranges
18701879
const Comparator* comp_;

0 commit comments

Comments
 (0)