Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions db/db_wal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3121,22 +3121,24 @@ TEST_F(DBWALTest, RecoveryFlushSwitchWALOnEmptyMemtable) {
Destroy(options);
}

TEST_F(DBWALTest, WALWriteErrorNoRecovery) {
TEST_F(DBWALTest, WALWriteErrorNoAutoRecovery) {
Options options = CurrentOptions();
auto fault_fs = std::make_shared<FaultInjectionTestFS>(FileSystem::Default());
std::unique_ptr<Env> fault_fs_env(NewCompositeEnv(fault_fs));
options.env = fault_fs_env.get();
options.manual_wal_flush = true;
options.atomic_flush = false;
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);

fault_fs->SetThreadLocalErrorContext(
FaultInjectionIOType::kWrite, 7 /* seed*/, 1 /* one_in */,
true /* retryable */, false /* has_data_loss*/);
fault_fs->EnableThreadLocalErrorInjection(FaultInjectionIOType::kWrite);

ASSERT_OK(Put("k", "v"));
Status s;
s = db_->FlushWAL(false);
Status s = Put("k", "v");
ASSERT_TRUE(s.IsIOError());
ASSERT_TRUE(
s.ToString().find("injected write error failed to write to WAL") !=
std::string::npos);
s = dbfull()->TEST_GetBGError();
ASSERT_EQ(s.severity(), Status::Severity::kFatalError);
ASSERT_FALSE(dbfull()->TEST_IsRecoveryInProgress());
Expand Down
19 changes: 11 additions & 8 deletions db/error_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -423,14 +423,17 @@ void ErrorHandler::SetBGError(const Status& bg_status,
reason == BackgroundErrorReason::kMemTable ||
reason == BackgroundErrorReason::kFlush);
}
if (db_options_.manual_wal_flush && wal_related && bg_io_err.IsIOError()) {
// With manual_wal_flush, a WAL write failure can drop buffered WAL writes.
// Memtables and WAL then become inconsistent. A successful memtable flush
// on one CF can cause CFs to be inconsistent upon restart. Before we fix
// the bug in auto recovery from WAL write failures that can flush one CF
// at a time, we set the error severity to fatal to disallow auto recovery.
// TODO: remove parameter `wal_related` once we can automatically recover
// from WAL write failures.

// When `atomic_flush = false` with multiple column families, when
// encountering WAL related IO error, individual CF flushing during auto
// recovery can create data inconsistencies where some column families advance
// past the corruption point while others remain behind, preventing successful
// database restart. Therefore we disable auto recovery by setting a higher
// severity `Status::Severity::kFatalError`.
bool has_multiple_cfs =
db_->versions_->GetColumnFamilySet()->NumberOfColumnFamilies() > 1;
if (!db_options_.atomic_flush && has_multiple_cfs && wal_related &&
bg_io_err.IsIOError()) {
bool auto_recovery = false;
Status bg_err(new_bg_io_err, Status::Severity::kFatalError);
CheckAndSetRecoveryAndBGError(bg_err);
Expand Down
2 changes: 2 additions & 0 deletions db/error_handler_fs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ TEST_F(DBErrorHandlingFSTest, FlushWALWriteRetryableError) {
options.create_if_missing = true;
options.listeners.emplace_back(listener);
options.max_bgerror_resume_count = 0;
options.atomic_flush = true;
Status s;

IOStatus error_msg = IOStatus::IOError("Retryable IO Error");
Expand Down Expand Up @@ -1463,6 +1464,7 @@ TEST_F(DBErrorHandlingFSTest, MultiCFWALWriteError) {
options.create_if_missing = true;
options.writable_file_max_buffer_size = 32768;
options.listeners.emplace_back(listener);
options.atomic_flush = true;
Random rnd(301);

listener->EnableAutoRecovery();
Expand Down
3 changes: 2 additions & 1 deletion db_stress_tool/db_stress_test_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ class StressTest {
assert(!error_s.ok());
return error_s.getState() &&
FaultInjectionTestFS::IsInjectedError(error_s) &&
!status_to_io_status(Status(error_s)).GetDataLoss();
!status_to_io_status(Status(error_s)).GetDataLoss() &&
error_s.severity() <= Status::Severity::kHardError;
}

void ProcessStatus(SharedState* shared, std::string msg, const Status& s,
Expand Down
10 changes: 5 additions & 5 deletions tools/db_crashtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,7 @@ def finalize_and_sanitize(src_params):
if (
dest_params.get("reopen", 0) > 0
or (
dest_params.get("manual_wal_flush_one_in")
dest_params.get("atomic_flush") != 1
and dest_params.get("column_families") != 1
)
or (
Expand All @@ -1010,10 +1010,10 @@ def finalize_and_sanitize(src_params):
# To simplify, we disable any WAL write error injection.
# TODO(hx235): support WAL write error injection with reopen
#
# 2. WAL write failure can drop buffered WAL data. This can cause
# inconsistency when one CF has a successful flush during auto
# recovery. Disable the fault injection in this path for now until
# we have a fix that allows auto recovery.
# 2. When `atomic_flush = false` with multiple column families, when encountering WAL related IO error,
# individual CF flushing during auto recovery can create data inconsistencies where some column families advance
# past the corruption point while others remain behind, preventing successful database restart.
# Therefore we disable auto recovery and testing for this case in crash test
#
# 3. Pessimistic transactions use 2PC, which can't auto-recover from WAL write errors.
# This is because RocksDB cannot easily discard the corrupted WAL without risking the
Expand Down
Loading