From e975dd1ff84f6a3e4bbff76dad2639cde77e8b4b Mon Sep 17 00:00:00 2001 From: HypenZou <543668641@qq.com> Date: Tue, 21 May 2024 20:31:20 +0800 Subject: [PATCH] Don't take archived log size into account when calculating log size for flush --- db/db_filesnapshot.cc | 7 ++++ db/wal_manager.cc | 3 ++ include/rocksdb/utilities/checkpoint.h | 6 ++- .../checkpoint_log_flush_improve.md | 1 + utilities/checkpoint/checkpoint_test.cc | 40 +++++++++++++++++++ 5 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 unreleased_history/behavior_changes/checkpoint_log_flush_improve.md diff --git a/db/db_filesnapshot.cc b/db/db_filesnapshot.cc index 526e8fcd220..7289cf1e3b7 100644 --- a/db/db_filesnapshot.cc +++ b/db/db_filesnapshot.cc @@ -21,6 +21,7 @@ #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/metadata.h" +#include "rocksdb/transaction_log.h" #include "rocksdb/types.h" #include "test_util/sync_point.h" #include "util/file_checksum_helper.h" @@ -29,6 +30,7 @@ namespace ROCKSDB_NAMESPACE { Status DBImpl::FlushForGetLiveFiles() { + TEST_SYNC_POINT("DBImpl::FlushForGetLiveFiles"); return DBImpl::FlushAllColumnFamilies(FlushOptions(), FlushReason::kGetLiveFiles); } @@ -217,6 +219,11 @@ Status DBImpl::GetLiveFilesStorageInfo( // We may be able to cover 2PC case too. uint64_t total_wal_size = 0; for (auto& wal : live_wal_files) { + // Don't take archived log size into account + // when calculating log size for flush + if (wal->Type() == kArchivedLogFile) { + continue; + } total_wal_size += wal->SizeFileBytes(); } if (total_wal_size < opts.wal_size_for_flush) { diff --git a/db/wal_manager.cc b/db/wal_manager.cc index 11f2b042ddb..5cc4c2e1e88 100644 --- a/db/wal_manager.cc +++ b/db/wal_manager.cc @@ -285,6 +285,9 @@ void WalManager::ArchiveWALFile(const std::string& fname, uint64_t number) { Status s = env_->RenameFile(fname, archived_log_name); // The sync point below is used in (DBTest,TransactionLogIteratorRace) TEST_SYNC_POINT("WalManager::PurgeObsoleteFiles:2"); + // The sync point below is used in + // (CheckPointTest, CheckpointWithArchievedLog) + TEST_SYNC_POINT("WalManager::ArchiveWALFile"); ROCKS_LOG_INFO(db_options_.info_log, "Move log file %s to %s -- %s\n", fname.c_str(), archived_log_name.c_str(), s.ToString().c_str()); diff --git a/include/rocksdb/utilities/checkpoint.h b/include/rocksdb/utilities/checkpoint.h index 6509f38d969..d0834ac7891 100644 --- a/include/rocksdb/utilities/checkpoint.h +++ b/include/rocksdb/utilities/checkpoint.h @@ -32,8 +32,10 @@ class Checkpoint { // same filesystem as the database, and copied otherwise. // (2) other required files (like MANIFEST) are always copied. // log_size_for_flush: if the total log file size is equal or larger than - // this value, then a flush is triggered for all the column families. The - // default value is 0, which means flush is always triggered. If you move + // this value, then a flush is triggered for all the column families. + // The archived log size will not be included when calculating the total log + // size. + // The default value is 0, which means flush is always triggered. If you move // away from the default, the checkpoint may not contain up-to-date data // if WAL writing is not always enabled. // Flush will always trigger if it is 2PC. diff --git a/unreleased_history/behavior_changes/checkpoint_log_flush_improve.md b/unreleased_history/behavior_changes/checkpoint_log_flush_improve.md new file mode 100644 index 00000000000..f43e94c3f41 --- /dev/null +++ b/unreleased_history/behavior_changes/checkpoint_log_flush_improve.md @@ -0,0 +1 @@ +When calculating total log size for the `log_size_for_flush` argument in `CreateCheckpoint` API, the size of the archived log will not be included to avoid unnecessary flush diff --git a/utilities/checkpoint/checkpoint_test.cc b/utilities/checkpoint/checkpoint_test.cc index 111e81f3224..0229281c16f 100644 --- a/utilities/checkpoint/checkpoint_test.cc +++ b/utilities/checkpoint/checkpoint_test.cc @@ -13,6 +13,7 @@ #ifndef OS_WIN #include #endif +#include #include #include #include @@ -23,6 +24,7 @@ #include "port/stack_trace.h" #include "rocksdb/db.h" #include "rocksdb/env.h" +#include "rocksdb/rocksdb_namespace.h" #include "rocksdb/utilities/transaction_db.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" @@ -970,6 +972,44 @@ TEST_F(CheckpointTest, CheckpointWithDbPath) { delete checkpoint; } +TEST_F(CheckpointTest, CheckpointWithArchievedLog) { + std::atomic_bool flushed = false; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"WalManager::ArchiveWALFile", + "CheckpointTest:CheckpointWithArchievedLog"}}); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::FlushForGetLiveFiles", [&](void*) { flushed = true; }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + Options options = CurrentOptions(); + options.WAL_ttl_seconds = 3600; + DestroyAndReopen(options); + + ASSERT_OK(Put("key1", std::string(1024 * 1024, 'a'))); + // flush and archive the first log + ASSERT_OK(Flush()); + ASSERT_OK(Put("key2", std::string(1024, 'a'))); + + Checkpoint* checkpoint; + ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); + TEST_SYNC_POINT("CheckpointTest:CheckpointWithArchievedLog"); + // unflushed log size < 1024 * 1024 < total file size including archived log, + // flush shouldn't occur + ASSERT_OK(checkpoint->CreateCheckpoint(snapshot_name_, 1024 * 1024)); + ASSERT_TRUE(!flushed); + + DB* snapshot_db; + ASSERT_OK(DB::Open(options, snapshot_name_, &snapshot_db)); + ReadOptions read_opts; + std::string get_result; + ASSERT_OK(snapshot_db->Get(read_opts, "key1", &get_result)); + ASSERT_EQ(std::string(1024 * 1024, 'a'), get_result); + ASSERT_OK(snapshot_db->Get(read_opts, "key2", &get_result)); + ASSERT_EQ(std::string(1024, 'a'), get_result); + delete snapshot_db; +} + +TEST(test, test) {} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) {