Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/seastar/core/reactor_config.hh
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct reactor_config {
bool strict_o_direct = true;
bool bypass_fsync = false;
bool no_poll_aio = false;
bool aio_nowait_works = false;
std::optional<bool> aio_nowait_works = false;
bool abort_on_too_long_task_queue = false;
};
/// \endcond
Expand Down
14 changes: 8 additions & 6 deletions src/core/file-impl.hh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ struct fs_info;

}

enum class nowait_mode { yes, no, read_only };

class posix_file_handle_impl : public seastar::file_handle_impl {
int _fd;
std::atomic<unsigned>* _refcount;
Expand All @@ -48,14 +50,14 @@ class posix_file_handle_impl : public seastar::file_handle_impl {
uint32_t _disk_read_dma_alignment;
uint32_t _disk_write_dma_alignment;
uint32_t _disk_overwrite_dma_alignment;
bool _nowait_works;
const nowait_mode _nowait_works;
public:
posix_file_handle_impl(int fd, open_flags f, std::atomic<unsigned>* refcount, dev_t device_id,
uint32_t memory_dma_alignment,
uint32_t disk_read_dma_alignment,
uint32_t disk_write_dma_alignment,
uint32_t disk_overwrite_dma_alignment,
bool nowait_works)
nowait_mode nowait_works)
: _fd(fd), _refcount(refcount), _device_id(device_id), _open_flags(f)
, _memory_dma_alignment(memory_dma_alignment)
, _disk_read_dma_alignment(disk_read_dma_alignment)
Expand All @@ -72,21 +74,21 @@ public:

class posix_file_impl : public file_impl {
std::atomic<unsigned>* _refcount = nullptr;
const bool _nowait_works;
const nowait_mode _nowait_works;
const dev_t _device_id;
io_queue& _io_queue;
const open_flags _open_flags;
protected:
int _fd;

posix_file_impl(int fd, open_flags, file_open_options options, dev_t device_id, bool nowait_works);
posix_file_impl(int fd, open_flags, file_open_options options, dev_t device_id, nowait_mode nowait_works);
posix_file_impl(int fd, open_flags, file_open_options options, dev_t device_id, const internal::fs_info& fsi);
posix_file_impl(int fd, open_flags, std::atomic<unsigned>* refcount, dev_t device_id,
uint32_t memory_dma_alignment,
uint32_t disk_read_dma_alignment,
uint32_t disk_write_dma_alignment,
uint32_t disk_overwrite_dma_alignment,
bool nowait_works);
nowait_mode nowait_works);
public:
virtual ~posix_file_impl() override;
future<> flush() noexcept override;
Expand Down Expand Up @@ -157,7 +159,7 @@ public:
posix_file_real_impl(int fd, open_flags of, file_open_options options, const internal::fs_info& fsi, dev_t device_id)
: posix_file_impl(fd, of, std::move(options), device_id, fsi) {}
posix_file_real_impl(int fd, open_flags of, std::atomic<unsigned>* refcount, dev_t device_id,
uint32_t memory_dma_alignment, uint32_t disk_read_dma_alignment, uint32_t disk_write_dma_alignment, uint32_t disk_overwrite_dma_alignment, bool nowait_works)
uint32_t memory_dma_alignment, uint32_t disk_read_dma_alignment, uint32_t disk_write_dma_alignment, uint32_t disk_overwrite_dma_alignment, nowait_mode nowait_works)
: posix_file_impl(fd, of, refcount, device_id, memory_dma_alignment, disk_read_dma_alignment, disk_write_dma_alignment, disk_overwrite_dma_alignment, nowait_works) {}
virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, io_intent* intent) noexcept override;
virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, io_intent* intent) noexcept override;
Expand Down
48 changes: 32 additions & 16 deletions src/core/file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ struct fs_info {
bool append_challenged;
unsigned append_concurrency;
bool fsync_is_exclusive;
bool nowait_works;
nowait_mode nowait_works;
std::optional<dioattr> dioinfo;
};

Expand Down Expand Up @@ -116,7 +116,7 @@ file_handle::to_file() && {
return file(std::move(*_impl).to_file());
}

posix_file_impl::posix_file_impl(int fd, open_flags f, file_open_options options, dev_t device_id, bool nowait_works)
posix_file_impl::posix_file_impl(int fd, open_flags f, file_open_options options, dev_t device_id, nowait_mode nowait_works)
: _nowait_works(nowait_works)
, _device_id(device_id)
, _io_queue(engine().get_io_queue(_device_id))
Expand Down Expand Up @@ -180,7 +180,7 @@ posix_file_impl::posix_file_impl(int fd, open_flags f, std::atomic<unsigned>* re
uint32_t disk_read_dma_alignment,
uint32_t disk_write_dma_alignment,
uint32_t disk_overwrite_dma_alignment,
bool nowait_works)
nowait_mode nowait_works)
: _refcount(refcount)
, _nowait_works(nowait_works)
, _device_id(device_id)
Expand Down Expand Up @@ -478,27 +478,27 @@ posix_file_impl::list_directory(std::function<future<> (directory_entry de)> nex

future<size_t>
posix_file_impl::do_write_dma(uint64_t pos, const void* buffer, size_t len, io_intent* intent) noexcept {
auto req = internal::io_request::make_write(_fd, pos, buffer, len, _nowait_works);
auto req = internal::io_request::make_write(_fd, pos, buffer, len, _nowait_works == nowait_mode::yes);
return _io_queue.submit_io_write(len, std::move(req), intent);
}

future<size_t>
posix_file_impl::do_write_dma(uint64_t pos, std::vector<iovec> iov, io_intent* intent) noexcept {
auto len = internal::sanitize_iovecs(iov, _disk_write_dma_alignment);
auto req = internal::io_request::make_writev(_fd, pos, iov, _nowait_works);
auto req = internal::io_request::make_writev(_fd, pos, iov, _nowait_works == nowait_mode::yes);
return _io_queue.submit_io_write(len, std::move(req), intent, std::move(iov));
}

future<size_t>
posix_file_impl::do_read_dma(uint64_t pos, void* buffer, size_t len, io_intent* intent) noexcept {
auto req = internal::io_request::make_read(_fd, pos, buffer, len, _nowait_works);
auto req = internal::io_request::make_read(_fd, pos, buffer, len, _nowait_works == nowait_mode::yes || _nowait_works == nowait_mode::read_only);
return _io_queue.submit_io_read(len, std::move(req), intent);
}

future<size_t>
posix_file_impl::do_read_dma(uint64_t pos, std::vector<iovec> iov, io_intent* intent) noexcept {
auto len = internal::sanitize_iovecs(iov, _disk_read_dma_alignment);
auto req = internal::io_request::make_readv(_fd, pos, iov, _nowait_works);
auto req = internal::io_request::make_readv(_fd, pos, iov, _nowait_works == nowait_mode::yes || _nowait_works == nowait_mode::read_only);
return _io_queue.submit_io_read(len, std::move(req), intent, std::move(iov));
}

Expand Down Expand Up @@ -620,7 +620,7 @@ static bool blockdev_nowait_works(dev_t device_id) {
}

blockdev_file_impl::blockdev_file_impl(int fd, open_flags f, file_open_options options, dev_t device_id, size_t block_size)
: posix_file_impl(fd, f, options, device_id, blockdev_nowait_works(device_id)) {
: posix_file_impl(fd, f, options, device_id, blockdev_nowait_works(device_id) ? nowait_mode::yes : nowait_mode::no) {
// Configure DMA alignment requirements based on block device block size
_memory_dma_alignment = block_size;
_disk_read_dma_alignment = block_size;
Expand Down Expand Up @@ -1051,7 +1051,7 @@ make_file_impl(int fd, file_open_options options, int flags, struct stat st) noe
// query it here. Just provide something reasonable.
internal::fs_info fsi;
fsi.block_size = 4096;
fsi.nowait_works = false;
fsi.nowait_works = nowait_mode::no;
return make_ready_future<shared_ptr<file_impl>>(make_shared<posix_file_real_impl>(fd, open_flags(flags), options, fsi, st.st_dev));
}

Expand All @@ -1063,6 +1063,7 @@ make_file_impl(int fd, file_open_options options, int flags, struct stat st) noe
return engine().fstatfs(fd).then([fd, options = std::move(options), flags, st = std::move(st)] (struct statfs sfs) {
internal::fs_info fsi;
fsi.block_size = sfs.f_bsize;
bool fs_nowait_works = false;
switch (sfs.f_type) {
case internal::fs_magic::xfs:
dioattr da;
Expand All @@ -1074,40 +1075,55 @@ make_file_impl(int fd, file_open_options options, int flags, struct stat st) noe
static auto xc = xfs_concurrency_from_kernel_version();
fsi.append_concurrency = xc;
fsi.fsync_is_exclusive = true;
fsi.nowait_works = internal::kernel_uname().whitelisted({"4.13"});
fs_nowait_works = internal::kernel_uname().whitelisted({"4.13"});
break;
case internal::fs_magic::nfs:
fsi.append_challenged = false;
fsi.append_concurrency = 0;
fsi.fsync_is_exclusive = false;
fsi.nowait_works = internal::kernel_uname().whitelisted({"4.13"});
fs_nowait_works = internal::kernel_uname().whitelisted({"4.13"});
break;
case internal::fs_magic::ext4:
fsi.append_challenged = true;
fsi.append_concurrency = 0;
fsi.fsync_is_exclusive = false;
fsi.nowait_works = internal::kernel_uname().whitelisted({"5.5"});
fs_nowait_works = internal::kernel_uname().whitelisted({"5.5"});
break;
case internal::fs_magic::btrfs:
fsi.append_challenged = true;
fsi.append_concurrency = 0;
fsi.fsync_is_exclusive = true;
fsi.nowait_works = internal::kernel_uname().whitelisted({"5.9"});
fs_nowait_works = internal::kernel_uname().whitelisted({"5.9"});
break;
case internal::fs_magic::tmpfs:
case internal::fs_magic::fuse:
fsi.append_challenged = false;
fsi.append_concurrency = 999;
fsi.fsync_is_exclusive = false;
fsi.nowait_works = false;
break;
default:
fsi.append_challenged = true;
fsi.append_concurrency = 0;
fsi.fsync_is_exclusive = true;
fsi.nowait_works = false;
}
fsi.nowait_works &= engine()._cfg.aio_nowait_works;

if (!fs_nowait_works) {
fsi.nowait_works = nowait_mode::no;
} else if (engine()._cfg.aio_nowait_works.has_value()) {
if (*engine()._cfg.aio_nowait_works) {
fsi.nowait_works = nowait_mode::yes;
} else {
fsi.nowait_works = nowait_mode::no;
}
} else {
if (internal::kernel_uname().whitelisted({"6.0"})) {
fsi.nowait_works = nowait_mode::read_only; // seastar issue #2974
} else if (internal::kernel_uname().whitelisted({"4.13"})) {
fsi.nowait_works = nowait_mode::yes;
} else {
fsi.nowait_works = nowait_mode::no;
}
}
s_fstype.insert(std::make_pair(st.st_dev, std::move(fsi)));
return make_file_impl(fd, std::move(options), flags, std::move(st));
});
Expand Down
6 changes: 3 additions & 3 deletions src/core/reactor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1493,7 +1493,7 @@ reactor::test::get_stall_detector_report_function() {
}

bool reactor::test::linux_aio_nowait() {
return engine()._cfg.aio_nowait_works;
return engine()._cfg.aio_nowait_works.value_or(true);
}

reactor::test::long_task_queue_state
Expand Down Expand Up @@ -3905,7 +3905,7 @@ reactor_options::reactor_options(program_options::option_group* parent_group)
, blocked_reactor_reports_per_minute(*this, "blocked-reactor-reports-per-minute", 5, "Maximum number of backtraces reported by stall detector per minute")
, blocked_reactor_report_format_oneline(*this, "blocked-reactor-report-format-oneline", true, "Print a simplified backtrace on a single line")
, relaxed_dma(*this, "relaxed-dma", "allow using buffered I/O if DMA is not available (reduces performance)")
, linux_aio_nowait(*this, "linux-aio-nowait", internal::kernel_uname().whitelisted({"4.13"}), // base version where this works
, linux_aio_nowait(*this, "linux-aio-nowait", {},
"use the Linux NOWAIT AIO feature, which reduces reactor stalls due to aio (autodetected)")
, unsafe_bypass_fsync(*this, "unsafe-bypass-fsync", false, "Bypass fsync(), may result in data loss. Use for testing on consumer drives")
, kernel_page_cache(*this, "kernel-page-cache", false,
Expand Down Expand Up @@ -4450,7 +4450,7 @@ void smp::configure(const smp_options& smp_opts, const reactor_options& reactor_
.strict_o_direct = !reactor_opts.relaxed_dma,
.bypass_fsync = reactor_opts.unsafe_bypass_fsync.get_value(),
.no_poll_aio = !reactor_opts.poll_aio.get_value() || (reactor_opts.poll_aio.defaulted() && reactor_opts.overprovisioned),
.aio_nowait_works = reactor_opts.linux_aio_nowait.get_value(), // Mixed in with filesystem-provided values later
.aio_nowait_works = reactor_opts.linux_aio_nowait.defaulted() ? std::optional<bool>(std::nullopt) : std::optional<bool>(reactor_opts.linux_aio_nowait.get_value()), // Mixed in with filesystem-provided values later
.abort_on_too_long_task_queue = reactor_opts.abort_on_too_long_task_queue.get_value(),
};

Expand Down