Skip to content

Commit

Permalink
Cleaning up
Browse files Browse the repository at this point in the history
Signed-off-by: v01dstar <[email protected]>
  • Loading branch information
v01dstar committed Aug 28, 2024
1 parent 724982e commit 1f0ecf1
Show file tree
Hide file tree
Showing 17 changed files with 42 additions and 63 deletions.
3 changes: 0 additions & 3 deletions include/titan/db.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,6 @@ class TitanDB : public StackableDB {
// "rocksdb.titandb.discardable_ratio_le100_file_num" - returns count of
// file whose discardable ratio is less or equal to 100%.
static const std::string kNumDiscardableRatioLE100File;
// "rockdb.titandb.kNumHolePunchableBlobSize" - returns the size of hole
// punchable blobs (no longer referenced in SSTs) in the database.
static const std::string kHolePunchableBlobSize;
};

bool GetProperty(ColumnFamilyHandle* column_family, const Slice& property,
Expand Down
4 changes: 1 addition & 3 deletions include/titan/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,7 @@ struct TitanCFOptions : public ColumnFamilyOptions {
// requirement for blob entries and Titan has to distinguish between real
// data's 0s and 0s created by punch holes).
uint64_t block_size{4096};
bool enable_punch_hole_gc{false};
uint64_t punch_hole_threshold{4 * 1024 * 1024};
uint64_t punch_hole_threshold{0};

TitanCFOptions() = default;
explicit TitanCFOptions(const ColumnFamilyOptions& options)
Expand Down Expand Up @@ -220,7 +219,6 @@ struct ImmutableTitanCFOptions {
bool skip_value_in_compaction_filter;

uint64_t block_size;
bool enable_punch_hole_gc;
};

struct MutableTitanCFOptions {
Expand Down
5 changes: 2 additions & 3 deletions src/blob_file_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ BlobFileBuilder::BlobFileBuilder(const TitanDBOptions& db_options,
return;
#endif
}
block_size_ = cf_options.enable_punch_hole_gc ? cf_options.block_size : 0;
block_size_ = cf_options.punch_hole_threshold > 0 ? cf_options.block_size : 0;
WriteHeader();
}

Expand Down Expand Up @@ -165,8 +165,7 @@ void BlobFileBuilder::WriteEncoderData(BlobHandle* handle) {
handle->offset = file_->GetFileSize();
handle->size = encoder_.GetEncodedSize();
if (block_size_ > 0) {
live_data_size_ +=
(handle->size + block_size_ - 1) / block_size_ * block_size_;
live_data_size_ += Roundup(handle->size, block_size_);
} else {
live_data_size_ += handle->size;
}
Expand Down
2 changes: 0 additions & 2 deletions src/blob_file_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ class BlobFileIterator {
Slice value() const;
Status status() const { return status_; }
uint64_t header_size() const { return header_size_; }
// Returns the size of the "footer", this includes the meta blocks.
uint64_t footer_size() const { return file_size_ - end_of_blob_record_; }

void IterateForPrev(uint64_t);

Expand Down
5 changes: 2 additions & 3 deletions src/blob_file_iterator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class BlobFileIteratorTest : public testing::Test {
TitanDBOptions db_options(titan_options_);
TitanCFOptions cf_options(titan_options_);
if (with_blocks) {
cf_options.enable_punch_hole_gc = true;
cf_options.punch_hole_threshold = 4096;
cf_options.block_size = 4096;
}
BlobFileCache cache(db_options, cf_options, {NewLRUCache(128)}, nullptr);
Expand Down Expand Up @@ -194,8 +194,7 @@ TEST_F(BlobFileIteratorTest, IterateForPrev) {
blob_index = blob_file_iterator_->GetBlobIndex();
ASSERT_EQ(blob_handle, blob_index.blob_handle);

while ((idx = Random::GetTLSInstance()->Uniform(n)) == 0)
;
while ((idx = Random::GetTLSInstance()->Uniform(n)) == 0);
blob_handle = contexts[idx]->new_blob_index.blob_handle;
blob_file_iterator_->IterateForPrev(blob_handle.offset - kRecordHeaderSize -
1);
Expand Down
2 changes: 1 addition & 1 deletion src/blob_file_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class BlobFileSet {
uint64_t GetBlockSize(uint32_t cf_id) {
MutexLock l(mutex_);
auto storage = GetBlobStorage(cf_id).lock();
if (storage != nullptr && storage->cf_options().enable_punch_hole_gc) {
if (storage != nullptr && storage->cf_options().punch_hole_threshold > 0) {
return storage->cf_options().block_size;
}
return 0;
Expand Down
7 changes: 0 additions & 7 deletions src/blob_format.cc
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ void BlobFileMeta::EncodeTo(std::string* dst) const {
PutVarint64(dst, block_size_);
PutLengthPrefixedSlice(dst, smallest_key_);
PutLengthPrefixedSlice(dst, largest_key_);
PutVarint64(dst, effective_file_size_);
}

Status BlobFileMeta::DecodeFromV1(Slice* src) {
Expand Down Expand Up @@ -190,12 +189,6 @@ Status BlobFileMeta::DecodeFrom(Slice* src) {
} else {
return Status::Corruption("BlobLargestKey decode failed");
}
uint64_t effective_file_size;
if (!GetVarint64(src, &effective_file_size)) {
return Status::Corruption(
"BlobFileMeta hole_punchable_size_ decode failed");
}
effective_file_size_ = effective_file_size;
return Status::OK();
}

Expand Down
26 changes: 11 additions & 15 deletions src/blob_format.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,6 @@ class BlobFileMeta {
(file_size_ - kBlobMaxHeaderSize - kBlobFooterSize));
}
TitanInternalStats::StatsType GetDiscardableRatioLevel() const;
// This should be called with db mutex held.
uint64_t GetHolePunchableSize() const {
return effective_file_size_ - live_data_size_;
}
Expand All @@ -301,18 +300,6 @@ class BlobFileMeta {
std::string smallest_key_;
std::string largest_key_;

// The effective size of current file. This is different from `file_size_`, as
// `file_size_` is the original size of the file, and does not consider space
// reclaimed by punch hole GC.
// We can't use file system's `st_blocks` to get the logical size, because
// the file system's block size may be different from Titan's block size.
// This is used to calculate the size of the punchable hole. i.e.
// effective_file_size_ - live_data_size_.
// This might be bigger than the actual size of the file, when Titan crashes
// before updating the `effective_file_size_` during punch hole GC. This is
// fine, as it will be corrected when the file is chose for GC next time.
int64_t effective_file_size_{0};

// Not persistent field

// Size of data with reference from SST files.
Expand All @@ -327,8 +314,17 @@ class BlobFileMeta {
// `OnCompactionCompleted()` is called.
// The size is aligned with block size, when punch hole GC is enabled.
std::atomic<int64_t> live_data_size_{0};
// This is different from `file_size_`, as `file_size_` is the original size
// of the file, and does not consider space reclaimed by punch hole GC.
// This is used to calculate the size of the punchable hole. i.e.
// effective_file_size_ - live_data_size_.
// The effective size of current file. This is different from `file_size_`, as
// `file_size_` is the original size of the file, and does not consider space
// reclaimed by punch hole GC.
// This might be bigger than the actual effective size of the file, when Titan
// crashes or restarts. This is fine, as it will be corrected when the file is
// chose for GC next time.
int64_t effective_file_size_{0};
// Disk usage of the file, This is different from `effective_file_size_`, when
// block size does not align with file system block size.
int64_t disk_usage_{0};
std::atomic<FileState> state_{FileState::kNone};
};
Expand Down
16 changes: 8 additions & 8 deletions src/blob_gc_picker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ std::unique_ptr<BlobGC> BasicBlobGCPicker::PickRegularBlobGC(
uint64_t batch_size = 0;
uint64_t estimate_output_size = 0;
bool stop_picking = false;
bool maybe_continue_next_time = false;
bool need_trigger_next = false;
uint64_t next_gc_size = 0;
bool in_fallback = cf_options_.blob_run_mode == TitanBlobRunMode::kFallback;

Expand Down Expand Up @@ -71,14 +71,14 @@ std::unique_ptr<BlobGC> BasicBlobGCPicker::PickRegularBlobGC(
estimate_output_size += blob_file->live_data_size();
if (batch_size >= cf_options_.max_gc_batch_size ||
estimate_output_size >= cf_options_.blob_file_target_size) {
// Stop pick file for this gc, but still check file for whether need
// trigger gc after this
// Stop picking file for this gc, but still check file for whether
// another round of gc is needed.
stop_picking = true;
}
} else {
next_gc_size += blob_file->file_size();
if (next_gc_size > cf_options_.min_gc_batch_size || in_fallback) {
maybe_continue_next_time = true;
need_trigger_next = true;
RecordTick(statistics(stats_), TITAN_GC_REMAIN, 1);
TITAN_LOG_INFO(db_options_.info_log,
"remain more than %" PRIu64
Expand Down Expand Up @@ -112,7 +112,7 @@ std::unique_ptr<BlobGC> BasicBlobGCPicker::PickRegularBlobGC(
}

return std::unique_ptr<BlobGC>(new BlobGC(
std::move(blob_files), std::move(cf_options_), maybe_continue_next_time));
std::move(blob_files), std::move(cf_options_), need_trigger_next));
}

std::unique_ptr<BlobGC> BasicBlobGCPicker::PickPunchHoleGC(
Expand All @@ -123,7 +123,7 @@ std::unique_ptr<BlobGC> BasicBlobGCPicker::PickPunchHoleGC(
uint64_t batch_size = 0;
uint64_t estimate_output_size = 0;
bool stop_picking = false;
bool maybe_continue_next_time = false;
bool need_trigger_next = false;
uint64_t next_gc_size = 0;

for (auto& gc_score : blob_storage->punch_hole_score()) {
Expand All @@ -146,13 +146,13 @@ std::unique_ptr<BlobGC> BasicBlobGCPicker::PickPunchHoleGC(
}
} else {
// TODO: add a batch threshold for punch hole gc.
maybe_continue_next_time = true;
need_trigger_next = true;
break;
}
}
if (blob_files.empty()) return nullptr;
return std::unique_ptr<BlobGC>(new BlobGC(
std::move(blob_files), std::move(cf_options_), maybe_continue_next_time,
std::move(blob_files), std::move(cf_options_), need_trigger_next,
/*punch_hole_gc=*/true));
}

Expand Down
8 changes: 2 additions & 6 deletions src/blob_storage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ Status BlobStorage::GetBlobFilesInRanges(
Status BlobStorage::InitPunchHoleGCOnStart() {
MutexLock l(&mutex_);
for (auto& file : files_) {
assert(file.second->file_state() == BlobFileMeta::FileState::kPendingInit);
struct stat file_stat;
if (stat(BlobFileName(db_options_.dirname, file.second->file_number())
.c_str(),
Expand Down Expand Up @@ -267,7 +266,7 @@ void BlobStorage::UpdateStats() {
levels_file_count_.assign(cf_options_.num_levels, 0);
uint64_t live_blob_file_size = 0, num_live_blob_file = 0;
uint64_t obsolete_blob_file_size = 0, num_obsolete_blob_file = 0;
uint64_t pending_punch_hole_size = 0, effective_blob_file_size = 0;
uint64_t pending_punch_hole_size = 0;
std::unordered_map<int, uint64_t> ratio_levels;

// collect metrics
Expand All @@ -287,7 +286,6 @@ void BlobStorage::UpdateStats() {
if (file.second->file_state() != BlobFileMeta::FileState::kPendingInit) {
live_blob_file_size += file.second->file_size();
pending_punch_hole_size += file.second->GetHolePunchableSize();
effective_blob_file_size += file.second->effective_file_size();
ratio_levels[static_cast<int>(file.second->GetDiscardableRatioLevel())] +=
1;
}
Expand All @@ -309,8 +307,6 @@ void BlobStorage::UpdateStats() {
}
SetStats(stats_, cf_id_, TitanInternalStats::PENDING_PUNCH_HOLE_SIZE,
pending_punch_hole_size);
SetStats(stats_, cf_id_, TitanInternalStats::EFFECTIVE_BLOB_FILE_SIZE,
effective_blob_file_size);
}
void BlobStorage::ComputeGCScore() {
UpdateStats();
Expand Down Expand Up @@ -343,7 +339,7 @@ void BlobStorage::ComputeGCScore() {
.score = gc_score,
});
} else {
if (cf_options_.enable_punch_hole_gc &&
if (cf_options_.punch_hole_threshold > 0 &&
file.second->GetHolePunchableSize() >
cf_options_.punch_hole_threshold) {
punch_hole_score_.emplace_back(GCScore{
Expand Down
2 changes: 1 addition & 1 deletion src/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -821,7 +821,7 @@ void TitanDBImpl::ReleaseSnapshot(const Snapshot* snapshot) {
{
MutexLock l(&mutex_);
if (pending_punch_hole_gc_ != nullptr && !punch_hole_gc_running_ &&
pending_punch_hole_gc_->snapshot()->GetSequenceNumber() ==
pending_punch_hole_gc_->snapshot()->GetSequenceNumber() <=
GetOldestSnapshotSequence() &&
bg_gc_scheduled_ < db_options_.max_background_gc) {
if (db_options_.disable_background_gc) return;
Expand Down
2 changes: 1 addition & 1 deletion src/db_impl_gc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ Status TitanDBImpl::BackgroundGC(LogBuffer* log_buffer,
stats_.get());
blob_gc = blob_gc_picker->PickBlobGC(
blob_storage.get(),
/*allow_punch_hole=*/cf_options.enable_punch_hole_gc &&
/*allow_punch_hole=*/cf_options.punch_hole_threshold > 0 &&
pending_punch_hole_gc_ == nullptr);
}

Expand Down
3 changes: 0 additions & 3 deletions src/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ TitanCFOptions::TitanCFOptions(const ColumnFamilyOptions& cf_opts,
skip_value_in_compaction_filter(
immutable_opts.skip_value_in_compaction_filter),
block_size(immutable_opts.block_size),
enable_punch_hole_gc(immutable_opts.enable_punch_hole_gc),
punch_hole_threshold(mutable_opts.punch_hole_threshold) {}

void TitanCFOptions::Dump(Logger* logger) const {
Expand Down Expand Up @@ -94,8 +93,6 @@ void TitanCFOptions::Dump(Logger* logger) const {
TITAN_LOG_HEADER(logger,
"TtitanCFOptions.block_size : %" PRIu64,
block_size);
TITAN_LOG_HEADER(logger, "TitanCFOptions.enable_punch_hole_gc : %d",
enable_punch_hole_gc);
TITAN_LOG_HEADER(logger,
"TitanCFOptions.punch_hole_threshold : %" PRIu64,
punch_hole_threshold);
Expand Down
15 changes: 11 additions & 4 deletions src/punch_hole_gc_job.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "punch_hole_gc_job.h"

#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>

#include "db/db_impl/db_impl.h"
Expand Down Expand Up @@ -44,14 +45,14 @@ Status PunchHoleGCJob::HolePunchBlobFiles() {
Status PunchHoleGCJob::HolePunchSingleBlobFile(
std::shared_ptr<BlobFileMeta> file) {
Status s;
auto fd = open(BlobFileName(db_options_.dirname, file->file_number()).c_str(),
O_WRONLY);
std::unique_ptr<RandomAccessFileReader> file_reader;
s = NewBlobFileReader(file->file_number(), 0, db_options_, env_options_, env_,
&file_reader);
if (!s.ok()) {
return s;
}
auto fd = open(BlobFileName(db_options_.dirname, file->file_number()).c_str(),
O_WRONLY);
uint64_t effective_file_size = 0;
uint64_t aligned_data_size = 0;
std::unique_ptr<BlobFileIterator> iter(
Expand All @@ -74,8 +75,7 @@ Status PunchHoleGCJob::HolePunchSingleBlobFile(
return s;
}

aligned_data_size = (blob_index.blob_handle.size + block_size - 1) /
block_size * block_size;
aligned_data_size = Roundup(blob_index.blob_handle.size, block_size);

if (!discardable) {
effective_file_size += aligned_data_size;
Expand All @@ -97,6 +97,12 @@ Status PunchHoleGCJob::HolePunchSingleBlobFile(
if (!iter->status().ok()) {
return iter->status();
}
struct stat st;
if (fstat(fd, &st) != 0) {
// Do nothing, so far, this is only for stats.
}
close(fd);
disk_usage_map_[file->file_number()] = st.st_blocks * 512;
effective_file_size_map_[file->file_number()] = effective_file_size;
return Status::OK();
}
Expand Down Expand Up @@ -147,6 +153,7 @@ void PunchHoleGCJob::UpdateBlobFilesMeta() {
continue;
}
file->set_effective_file_size(it->second);
file->set_disk_uage(disk_usage_map_[file->file_number()]);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/punch_hole_gc_job.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class PunchHoleGCJob {
std::atomic_bool* shuting_down_{nullptr};

std::unordered_map<uint64_t, uint64_t> effective_file_size_map_;
std::unordered_map<uint64_t, int64_t> disk_usage_map_;

// TODO: Add more stats

Expand Down
3 changes: 1 addition & 2 deletions src/punch_hole_gc_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ TEST_F(PunchHoleGCTest, PunchHole) {
rocksdb::SyncPoint::GetInstance()->EnableProcessing();

DisableMergeSmall();
options_.enable_punch_hole_gc = true;
options_.disable_background_gc = false;
options_.disable_auto_compactions = false;
options_.punch_hole_threshold = 4096;
Expand Down Expand Up @@ -181,9 +180,9 @@ TEST_F(PunchHoleGCTest, PunchHole) {
ASSERT_EQ(value, values[i]);
}
}
options_.enable_punch_hole_gc = false;
options_.disable_background_gc = true;
options_.disable_auto_compactions = true;
options_.punch_hole_threshold = 0;
}

} // namespace titandb
Expand Down
1 change: 0 additions & 1 deletion src/titan_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ class TitanInternalStats {
NUM_DISCARDABLE_RATIO_LE100,

PENDING_PUNCH_HOLE_SIZE,
EFFECTIVE_BLOB_FILE_SIZE,

INTERNAL_STATS_ENUM_MAX,
};
Expand Down

0 comments on commit 1f0ecf1

Please sign in to comment.