From b65f1fbb3e31b7193cf40f71a5c6fefdeaf5013c Mon Sep 17 00:00:00 2001 From: Jian Zhang Date: Mon, 18 Dec 2023 23:42:55 +0800 Subject: [PATCH] chore: replace gsn with psn in wal --- source/concurrency-recovery/HistoryTree.hpp | 9 +- source/concurrency-recovery/Recovery.hpp | 43 +- source/concurrency-recovery/WALEntry.hpp | 21 +- source/concurrency-recovery/Worker.hpp | 4 +- source/storage/btree/BTreeVI.cpp | 7 +- source/storage/btree/core/BTreeGeneric.cpp | 408 +++++++++--------- source/storage/btree/core/BTreeGeneric.hpp | 7 +- source/storage/btree/core/BTreeWALPayload.hpp | 8 + .../buffer-manager/GuardedBufferFrame.hpp | 3 +- source/storage/buffer-manager/Swip.hpp | 15 +- tests/BTreeVILoggingAndRecoveryTest.cpp | 29 +- 11 files changed, 299 insertions(+), 255 deletions(-) diff --git a/source/concurrency-recovery/HistoryTree.hpp b/source/concurrency-recovery/HistoryTree.hpp index c474cac1..343635e7 100644 --- a/source/concurrency-recovery/HistoryTree.hpp +++ b/source/concurrency-recovery/HistoryTree.hpp @@ -31,11 +31,14 @@ using BTreeLL = leanstore::storage::btree::BTreeLL; class HistoryTree : public HistoryTreeInterface { private: struct alignas(64) Session { - BufferFrame *rightmost_bf, *leftmost_bf; - u64 rightmost_version, leftmost_version; + BufferFrame* rightmost_bf; + BufferFrame* leftmost_bf; + u64 rightmost_version; + u64 leftmost_version; s64 rightmost_pos = -1; TXID last_tx_id; - bool rightmost_init = false, leftmost_init = false; + bool rightmost_init = false; + bool leftmost_init = false; }; Session update_sessions[leanstore::cr::STATIC_MAX_WORKERS]; Session remove_sessions[leanstore::cr::STATIC_MAX_WORKERS]; diff --git a/source/concurrency-recovery/Recovery.hpp b/source/concurrency-recovery/Recovery.hpp index d18a0762..a74f99db 100644 --- a/source/concurrency-recovery/Recovery.hpp +++ b/source/concurrency-recovery/Recovery.hpp @@ -3,6 +3,7 @@ #include "Units.hpp" #include "storage/btree/BTreeLL.hpp" #include "storage/btree/BTreeVI.hpp" +#include "storage/btree/core/BTreeExclusiveIterator.hpp" #include "utils/Defer.hpp" #include @@ -154,10 +155,10 @@ inline void Recovery::Analysis() { mActiveTxTable[walEntry->mTxId] = offset; auto& bf = ResolvePage(complexEntry->mPageId); - DCHECK(bf.header.mPageId == complexEntry->mPageId); - DCHECK(bf.page.mBTreeId == complexEntry->mTreeId); + // DCHECK(bf.header.mPageId == complexEntry->mPageId); + // DCHECK(bf.page.mBTreeId == complexEntry->mTreeId); - if (complexEntry->gsn > bf.page.mGSN && + if (complexEntry->mPSN >= bf.page.mPSN && mDirtyPageTable.find(complexEntry->mPageId) == mDirtyPageTable.end()) { // record the first WALEntry that makes the page dirty @@ -195,7 +196,7 @@ inline void Recovery::Redo() { auto complexEntry = reinterpret_cast(walEntryPtr); DCHECK(bytesRead == complexEntry->size); if (mDirtyPageTable.find(complexEntry->mPageId) == mDirtyPageTable.end() || - complexEntry->gsn < mDirtyPageTable[complexEntry->mPageId]) { + offset < mDirtyPageTable[complexEntry->mPageId]) { offset += bytesRead; continue; } @@ -203,9 +204,41 @@ inline void Recovery::Redo() { // TODO(jian.z): redo previous operations on the page auto& bf = ResolvePage(complexEntry->mPageId); DCHECK(bf.header.mPageId == complexEntry->mPageId); - DCHECK(bf.page.mBTreeId == complexEntry->mTreeId); + // DCHECK(bf.page.mBTreeId == complexEntry->mTreeId); SCOPED_DEFER(bf.header.mKeepInMemory = false); + auto walPayload = reinterpret_cast( + complexEntry->payload); + switch (walPayload->type) { + case leanstore::storage::btree::WALPayload::TYPE::WALInsert: { + auto node = reinterpret_cast( + bf.page.mPayload); + auto walInsert = + dynamic_cast(walPayload); + auto key = walInsert->GetKey(); + auto val = walInsert->GetVal(); + auto payloadSize = + val.size() + sizeof(leanstore::storage::btree::BTreeVI::ChainedTuple); + auto slotId = node->insertDoNotCopyPayload(key, payloadSize, -1); + auto payload = MutableSlice(node->ValData(slotId), node->ValSize(slotId)); + // TODO(jian.z): store worker id in wal + // TODO(jian.z): store transaction start ts in wal transaction begin + auto& primaryVersion = + *new (payload.data()) + leanstore::storage::btree::BTreeVI::ChainedTuple( + cr::Worker::my().mWorkerId, cr::activeTX().startTS()); + std::memcpy(primaryVersion.payload, val.data(), val.size()); + + break; + // insert on the btree + // for convenience, use BTreeExclusiveIterator directly + } + default: { + DCHECK(false) << "Unhandled WALPayload::TYPE: " + << std::to_string(static_cast(walPayload->type)); + } + } + offset += bytesRead; continue; } diff --git a/source/concurrency-recovery/WALEntry.hpp b/source/concurrency-recovery/WALEntry.hpp index 5fea2e39..19347943 100644 --- a/source/concurrency-recovery/WALEntry.hpp +++ b/source/concurrency-recovery/WALEntry.hpp @@ -95,10 +95,9 @@ class WALEntrySimple : public WALEntry { class WALEntryComplex : public WALEntry { public: - /// Global sequence number of the WALEntry, used for concurrency control and - /// transaction isolation. Also used to verify whether a page has been - /// modified since last checkpoint. - LID gsn; + /// Page sequence number of the WALEntry, indicate the page version this WAL + /// entry is based on. + LID mPSN; /// The btree ID of the WALEntry, used to identify the btree node together /// with page ID. @@ -115,8 +114,8 @@ class WALEntryComplex : public WALEntry { public: WALEntryComplex() = default; - WALEntryComplex(LID lsn, u64 size, LID gsn, TREEID treeId, PID pageId) - : WALEntry(lsn, size, TYPE::COMPLEX), gsn(gsn), mTreeId(treeId), + WALEntryComplex(LID lsn, u64 size, LID psn, TREEID treeId, PID pageId) + : WALEntry(lsn, size, TYPE::COMPLEX), mPSN(psn), mTreeId(treeId), mPageId(pageId) { } @@ -224,25 +223,25 @@ inline std::unique_ptr WALEntry::ToJSON() { inline std::unique_ptr WALEntryComplex::ToJSON() { auto doc = WALEntry::ToJSON(); - // gsn + // psn { rapidjson::Value member; - member.SetUint64(gsn); - doc->AddMember("GSN", member, doc->GetAllocator()); + member.SetUint64(mPSN); + doc->AddMember("mPSN", member, doc->GetAllocator()); } // treeId { rapidjson::Value member; member.SetInt64(mTreeId); - doc->AddMember("treeId", member, doc->GetAllocator()); + doc->AddMember("mTreeId", member, doc->GetAllocator()); } // pageId { rapidjson::Value member; member.SetUint64(mPageId); - doc->AddMember("pageId", member, doc->GetAllocator()); + doc->AddMember("mPageId", member, doc->GetAllocator()); } return doc; diff --git a/source/concurrency-recovery/Worker.hpp b/source/concurrency-recovery/Worker.hpp index a6f81812..0b087e3f 100644 --- a/source/concurrency-recovery/Worker.hpp +++ b/source/concurrency-recovery/Worker.hpp @@ -455,7 +455,7 @@ inline Logging& Logging::other(WORKERID otherWorkerId) { template inline WALPayloadHandler Logging::ReserveWALEntryComplex(u64 payloadSize, - PID pageId, LID gsn, + PID pageId, LID psn, TREEID treeId, Args&&... args) { SCOPED_DEFER(mPrevLSN = mActiveWALEntryComplex->lsn); @@ -466,7 +466,7 @@ inline WALPayloadHandler Logging::ReserveWALEntryComplex(u64 payloadSize, DCHECK(walContiguousFreeSpace() >= entrySize); mActiveWALEntryComplex = - new (entryPtr) WALEntryComplex(entryLSN, entrySize, gsn, treeId, pageId); + new (entryPtr) WALEntryComplex(entryLSN, entrySize, psn, treeId, pageId); mActiveWALEntryComplex->mPrevLSN = mPrevLSN; mActiveWALEntryComplex->mTxId = leanstore::cr::Worker::my().mActiveTx.mStartTs; diff --git a/source/storage/btree/BTreeVI.cpp b/source/storage/btree/BTreeVI.cpp index 66f1ce0b..f019b0b8 100644 --- a/source/storage/btree/BTreeVI.cpp +++ b/source/storage/btree/BTreeVI.cpp @@ -384,7 +384,7 @@ OP_RESULT BTreeVI::insert(Slice key, Slice val) { cr::activeTX().markAsWrite(); cr::Worker::my().mLogging.walEnsureEnoughSpace(PAGE_SIZE * 1); - u16 payload_length = val.size() + sizeof(ChainedTuple); + u16 payloadSize = val.size() + sizeof(ChainedTuple); while (true) { JUMPMU_TRY() { @@ -409,7 +409,7 @@ OP_RESULT BTreeVI::insert(Slice key, Slice val) { // Not implemented: maybe it has been removed but no GCed } - ret = iterator.enoughSpaceInCurrentNode(key, payload_length); + ret = iterator.enoughSpaceInCurrentNode(key, payloadSize); if (ret == OP_RESULT::NOT_ENOUGH_SPACE) { iterator.splitForKey(key); JUMPMU_CONTINUE; @@ -420,7 +420,8 @@ OP_RESULT BTreeVI::insert(Slice key, Slice val) { key.size() + val.size(), key, val); walHandler.SubmitWal(); - iterator.insertInCurrentNode(key, payload_length); + // insert + iterator.insertInCurrentNode(key, payloadSize); MutableSlice payload = iterator.mutableValue(); auto& primaryVersion = *new (payload.data()) ChainedTuple( cr::Worker::my().mWorkerId, cr::activeTX().startTS()); diff --git a/source/storage/btree/core/BTreeGeneric.cpp b/source/storage/btree/core/BTreeGeneric.cpp index 30e657dd..825b804d 100644 --- a/source/storage/btree/core/BTreeGeneric.cpp +++ b/source/storage/btree/core/BTreeGeneric.cpp @@ -27,115 +27,121 @@ void BTreeGeneric::create(TREEID btreeId, Config config) { mMetaNodeSwip.AsBufferFrame().page.mBTreeId = btreeId; guard.unlock(); - auto root_write_guard_h = GuardedBufferFrame(btreeId); - auto root_write_guard = - ExclusiveGuardedBufferFrame(std::move(root_write_guard_h)); - root_write_guard.InitPayload(true); - - GuardedBufferFrame meta_guard(mMetaNodeSwip); - ExclusiveGuardedBufferFrame meta_page(std::move(meta_guard)); - meta_page->mIsLeaf = false; + auto guardedRoot = GuardedBufferFrame(btreeId); + auto exclusiveGuardedRoot = + ExclusiveGuardedBufferFrame(std::move(guardedRoot)); + exclusiveGuardedRoot.InitPayload(true); + + GuardedBufferFrame guardedMeta(mMetaNodeSwip); + ExclusiveGuardedBufferFrame exclusiveGuardedMeta(std::move(guardedMeta)); + exclusiveGuardedMeta->mIsLeaf = false; // HACK: use upper of meta node as a swip to the storage root - meta_page->mRightMostChildSwip = root_write_guard.bf(); + exclusiveGuardedMeta->mRightMostChildSwip = exclusiveGuardedRoot.bf(); // TODO: write WALs - root_write_guard.IncPageGSN(); - meta_page.IncPageGSN(); + exclusiveGuardedRoot.IncPageGSN(); + exclusiveGuardedMeta.IncPageGSN(); } -void BTreeGeneric::trySplit(BufferFrame& to_split, s16 favored_split_pos) { +void BTreeGeneric::trySplit(BufferFrame& toSplit, s16 favoredSplitPos) { cr::Worker::my().mLogging.walEnsureEnoughSpace(PAGE_SIZE * 1); - auto parent_handler = findParentEager(*this, to_split); - GuardedBufferFrame p_guard = - parent_handler.getParentReadPageGuard(); - GuardedBufferFrame c_guard = GuardedBufferFrame( - p_guard, parent_handler.mChildSwip.CastTo()); - if (c_guard->mNumSeps <= 1) + auto parentHandler = findParentEager(*this, toSplit); + auto guardedParent = parentHandler.getParentReadPageGuard(); + auto guardedChild = GuardedBufferFrame( + guardedParent, parentHandler.mChildSwip.CastTo()); + if (guardedChild->mNumSeps <= 1) { + DLOG(WARNING) << "Split failed, not enough separators in node" + << ", toSplit.header.mPageId=" << toSplit.header.mPageId + << ", favoredSplitPos=" << favoredSplitPos + << ", guardedChild->mNumSeps=" << guardedChild->mNumSeps; return; + } - BTreeNode::SeparatorInfo sep_info; - if (favored_split_pos < 0 || favored_split_pos >= c_guard->mNumSeps - 1) { + BTreeNode::SeparatorInfo sepInfo; + if (favoredSplitPos < 0 || favoredSplitPos >= guardedChild->mNumSeps - 1) { if (config.mUseBulkInsert) { - favored_split_pos = c_guard->mNumSeps - 2; - sep_info = - BTreeNode::SeparatorInfo{c_guard->getFullKeyLen(favored_split_pos), - static_cast(favored_split_pos), false}; + favoredSplitPos = guardedChild->mNumSeps - 2; + sepInfo = + BTreeNode::SeparatorInfo{guardedChild->getFullKeyLen(favoredSplitPos), + static_cast(favoredSplitPos), false}; } else { - sep_info = c_guard->findSep(); + sepInfo = guardedChild->findSep(); } } else { // Split on a specified position, used by contention management - sep_info = - BTreeNode::SeparatorInfo{c_guard->getFullKeyLen(favored_split_pos), - static_cast(favored_split_pos), false}; + sepInfo = + BTreeNode::SeparatorInfo{guardedChild->getFullKeyLen(favoredSplitPos), + static_cast(favoredSplitPos), false}; } - // u8 sep_key[sep_info.length]; - ARRAY_ON_STACK(sep_key, u8, sep_info.length); - if (isMetaNode(p_guard)) { // root split - auto p_x_guard = ExclusiveGuardedBufferFrame(std::move(p_guard)); - auto c_x_guard = ExclusiveGuardedBufferFrame(std::move(c_guard)); - assert(mHeight == 1 || !c_x_guard->mIsLeaf); - // ------------------------------------------------------------------------------------- + // u8 sepKey[sepInfo.length]; + ARRAY_ON_STACK(sepKey, u8, sepInfo.length); + if (isMetaNode(guardedParent)) { + // split the root node + auto xGuardedParent = ExclusiveGuardedBufferFrame(std::move(guardedParent)); + auto xGuardedChild = ExclusiveGuardedBufferFrame(std::move(guardedChild)); + DCHECK(mHeight == 1 || !xGuardedChild->mIsLeaf); + // create new root - auto new_root_h = GuardedBufferFrame(mTreeId, false); - auto new_root = - ExclusiveGuardedBufferFrame(std::move(new_root_h)); - auto new_left_node_h = GuardedBufferFrame(mTreeId); - auto new_left_node = - ExclusiveGuardedBufferFrame(std::move(new_left_node_h)); - // ------------------------------------------------------------------------------------- + auto guardedNewRoot = GuardedBufferFrame(mTreeId, false); + auto xGuardedNewRoot = + ExclusiveGuardedBufferFrame(std::move(guardedNewRoot)); + auto guardedNewLeft = GuardedBufferFrame(mTreeId); + auto xGuardedNewLeft = + ExclusiveGuardedBufferFrame(std::move(guardedNewLeft)); + if (config.mEnableWal) { // TODO: System transactions - new_root.IncPageGSN(); - new_left_node.IncPageGSN(); - c_x_guard.IncPageGSN(); + xGuardedNewRoot.IncPageGSN(); + xGuardedNewLeft.IncPageGSN(); + xGuardedChild.IncPageGSN(); } else { - new_root.MarkAsDirty(); - new_left_node.MarkAsDirty(); - c_x_guard.MarkAsDirty(); + xGuardedNewRoot.MarkAsDirty(); + xGuardedNewLeft.MarkAsDirty(); + xGuardedChild.MarkAsDirty(); } - // ------------------------------------------------------------------------------------- + auto exec = [&]() { - new_root.keepAlive(); - new_root.InitPayload(false); - new_root->mRightMostChildSwip = c_x_guard.bf(); - p_x_guard->mRightMostChildSwip = new_root.bf(); - // ------------------------------------------------------------------------------------- - new_left_node.InitPayload(c_x_guard->mIsLeaf); - c_x_guard->getSep(sep_key, sep_info); - c_x_guard->split(new_root, new_left_node, sep_info.slot, sep_key, - sep_info.length); + xGuardedNewRoot.keepAlive(); + xGuardedNewRoot.InitPayload(false); + xGuardedNewRoot->mRightMostChildSwip = xGuardedChild.bf(); + xGuardedParent->mRightMostChildSwip = xGuardedNewRoot.bf(); + + xGuardedNewLeft.InitPayload(xGuardedChild->mIsLeaf); + xGuardedChild->getSep(sepKey, sepInfo); + xGuardedChild->split(xGuardedNewRoot, xGuardedNewLeft, sepInfo.slot, + sepKey, sepInfo.length); }; if (config.mEnableWal) { auto newRootWalHandler = - new_root.ReserveWALPayload(0, mTreeId); + xGuardedNewRoot.ReserveWALPayload(0, mTreeId); newRootWalHandler.SubmitWal(); auto newLeftWalHandler = - new_left_node.ReserveWALPayload(0, mTreeId); + xGuardedNewLeft.ReserveWALPayload(0, mTreeId); newLeftWalHandler.SubmitWal(); - auto parentPageId(new_root.bf()->header.mPageId); - auto lhsPageId(new_left_node.bf()->header.mPageId); - auto rhsPageId(c_x_guard.bf()->header.mPageId); + auto parentPageId(xGuardedNewRoot.bf()->header.mPageId); + auto lhsPageId(xGuardedNewLeft.bf()->header.mPageId); + auto rhsPageId(xGuardedChild.bf()->header.mPageId); - auto curRightWalHandler = c_x_guard.ReserveWALPayload( - 0, parentPageId, lhsPageId, rhsPageId); + auto curRightWalHandler = + xGuardedChild.ReserveWALPayload( + 0, parentPageId, lhsPageId, rhsPageId); curRightWalHandler.SubmitWal(); - // ------------------------------------------------------------------------------------- + exec(); - // ------------------------------------------------------------------------------------- - auto rootWalHandler = new_root.ReserveWALPayload( + + auto rootWalHandler = xGuardedNewRoot.ReserveWALPayload( 0, parentPageId, lhsPageId, rhsPageId); rootWalHandler.SubmitWal(); - // ------------------------------------------------------------------------------------- - auto leftWalHandler = new_left_node.ReserveWALPayload( + + auto leftWalHandler = xGuardedNewLeft.ReserveWALPayload( 0, parentPageId, lhsPageId, rhsPageId); leftWalHandler.SubmitWal(); } else { exec(); } - // ------------------------------------------------------------------------------------- + mHeight++; COUNTERS_BLOCK() { WorkerCounters::myCounters().dt_split[mTreeId]++; @@ -144,65 +150,68 @@ void BTreeGeneric::trySplit(BufferFrame& to_split, s16 favored_split_pos) { } else { // Parent is not root const u16 space_needed_for_separator = - p_guard->spaceNeeded(sep_info.length, sizeof(SwipType)); - if (p_guard->hasEnoughSpaceFor( - space_needed_for_separator)) { // Is there enough space in the - // parent for the separator? - auto p_x_guard = ExclusiveGuardedBufferFrame(std::move(p_guard)); - auto c_x_guard = ExclusiveGuardedBufferFrame(std::move(c_guard)); - // ------------------------------------------------------------------------------------- - p_x_guard->requestSpaceFor(space_needed_for_separator); - assert(&mMetaNodeSwip.AsBufferFrame() != p_x_guard.bf()); - assert(!p_x_guard->mIsLeaf); - // ------------------------------------------------------------------------------------- - auto new_left_node_h = GuardedBufferFrame(mTreeId); - auto new_left_node = - ExclusiveGuardedBufferFrame(std::move(new_left_node_h)); - // ------------------------------------------------------------------------------------- + guardedParent->spaceNeeded(sepInfo.length, sizeof(SwipType)); + if (guardedParent->hasEnoughSpaceFor(space_needed_for_separator)) { + // Is there enough space in the parent for the separator? + auto xGuardedParent = + ExclusiveGuardedBufferFrame(std::move(guardedParent)); + auto xGuardedChild = ExclusiveGuardedBufferFrame(std::move(guardedChild)); + + xGuardedParent->requestSpaceFor(space_needed_for_separator); + DCHECK(&mMetaNodeSwip.AsBufferFrame() != xGuardedParent.bf()); + DCHECK(!xGuardedParent->mIsLeaf); + + auto guardedNewLeft = GuardedBufferFrame(mTreeId); + auto xGuardedNewLeft = + ExclusiveGuardedBufferFrame(std::move(guardedNewLeft)); + // Increment GSNs before writing WAL to make sure that these pages marked // as dirty regardless of the FLAGS_wal if (config.mEnableWal) { - p_x_guard.IncPageGSN(); - new_left_node.IncPageGSN(); - c_x_guard.IncPageGSN(); + xGuardedParent.IncPageGSN(); + xGuardedNewLeft.IncPageGSN(); + xGuardedChild.IncPageGSN(); } else { - p_x_guard.MarkAsDirty(); - new_left_node.MarkAsDirty(); - c_x_guard.MarkAsDirty(); + xGuardedParent.MarkAsDirty(); + xGuardedNewLeft.MarkAsDirty(); + xGuardedChild.MarkAsDirty(); } - // ------------------------------------------------------------------------------------- + auto exec = [&]() { - new_left_node.InitPayload(c_x_guard->mIsLeaf); - c_x_guard->getSep(sep_key, sep_info); - c_x_guard->split(p_x_guard, new_left_node, sep_info.slot, sep_key, - sep_info.length); + xGuardedNewLeft.InitPayload(xGuardedChild->mIsLeaf); + xGuardedChild->getSep(sepKey, sepInfo); + xGuardedChild->split(xGuardedParent, xGuardedNewLeft, sepInfo.slot, + sepKey, sepInfo.length); }; - // ------------------------------------------------------------------------------------- + if (config.mEnableWal) { auto newLeftWalHandler = - new_left_node.ReserveWALPayload(0, mTreeId); + xGuardedNewLeft.ReserveWALPayload(0, mTreeId); newLeftWalHandler.SubmitWal(); - auto parentPageId = p_x_guard.bf()->header.mPageId; - auto lhsPageId = new_left_node.bf()->header.mPageId; - auto rhsPageId = c_x_guard.bf()->header.mPageId; + auto parentPageId = xGuardedParent.bf()->header.mPageId; + auto lhsPageId = xGuardedNewLeft.bf()->header.mPageId; + auto rhsPageId = xGuardedChild.bf()->header.mPageId; - auto curRightWalHandler = c_x_guard.ReserveWALPayload( - 0, parentPageId, lhsPageId, rhsPageId); + auto curRightWalHandler = + xGuardedChild.ReserveWALPayload( + 0, parentPageId, lhsPageId, rhsPageId); curRightWalHandler.SubmitWal(); exec(); - auto parentWalHandler = p_x_guard.ReserveWALPayload( - 0, parentPageId, lhsPageId, rhsPageId); + auto parentWalHandler = + xGuardedParent.ReserveWALPayload( + 0, parentPageId, lhsPageId, rhsPageId); parentWalHandler.SubmitWal(); newLeftWalHandler = - new_left_node.ReserveWALPayload(0, mTreeId); + xGuardedNewLeft.ReserveWALPayload(0, mTreeId); newLeftWalHandler.SubmitWal(); - auto leftWalHandler = new_left_node.ReserveWALPayload( - 0, parentPageId, lhsPageId, rhsPageId); + auto leftWalHandler = + xGuardedNewLeft.ReserveWALPayload( + 0, parentPageId, lhsPageId, rhsPageId); leftWalHandler.SubmitWal(); } else { exec(); @@ -211,125 +220,130 @@ void BTreeGeneric::trySplit(BufferFrame& to_split, s16 favored_split_pos) { WorkerCounters::myCounters().dt_split[mTreeId]++; } } else { - p_guard.unlock(); - c_guard.unlock(); + guardedParent.unlock(); + guardedChild.unlock(); // Must split parent head to make space for separator - trySplit(*p_guard.mBf); + trySplit(*guardedParent.mBf); } } } bool BTreeGeneric::tryMerge(BufferFrame& to_merge, bool swizzle_sibling) { - // pos == p_guard->mNumSeps means that the current node is the upper swip in - // parent - auto parent_handler = findParentEager(*this, to_merge); - GuardedBufferFrame p_guard = - parent_handler.getParentReadPageGuard(); - GuardedBufferFrame c_guard = GuardedBufferFrame( - p_guard, parent_handler.mChildSwip.CastTo()); - int pos_in_parent = parent_handler.mPosInParent; - if (isMetaNode(p_guard) || - c_guard->freeSpaceAfterCompaction() < BTreeNodeHeader::sUnderFullSize) { - p_guard.unlock(); - c_guard.unlock(); + // pos == guardedParent->mNumSeps means that the current node is the upper + // swip in parent + auto parentHandler = findParentEager(*this, to_merge); + GuardedBufferFrame guardedParent = + parentHandler.getParentReadPageGuard(); + GuardedBufferFrame guardedChild = GuardedBufferFrame( + guardedParent, parentHandler.mChildSwip.CastTo()); + int pos_in_parent = parentHandler.mPosInParent; + if (isMetaNode(guardedParent) || guardedChild->freeSpaceAfterCompaction() < + BTreeNodeHeader::sUnderFullSize) { + guardedParent.unlock(); + guardedChild.unlock(); return false; } // ------------------------------------------------------------------------------------- volatile bool merged_successfully = false; - if (p_guard->mNumSeps > 1) { - assert(pos_in_parent <= p_guard->mNumSeps); + if (guardedParent->mNumSeps > 1) { + assert(pos_in_parent <= guardedParent->mNumSeps); // ------------------------------------------------------------------------------------- - p_guard.JumpIfModifiedByOthers(); - c_guard.JumpIfModifiedByOthers(); + guardedParent.JumpIfModifiedByOthers(); + guardedChild.JumpIfModifiedByOthers(); // ------------------------------------------------------------------------------------- // TODO: write WALs auto merge_left = [&]() { - Swip& l_swip = p_guard->getChild(pos_in_parent - 1); + Swip& l_swip = guardedParent->getChild(pos_in_parent - 1); if (!swizzle_sibling && l_swip.isEVICTED()) { return false; } - auto l_guard = GuardedBufferFrame(p_guard, l_swip); - auto p_x_guard = ExclusiveGuardedBufferFrame(std::move(p_guard)); - auto c_x_guard = ExclusiveGuardedBufferFrame(std::move(c_guard)); + auto l_guard = GuardedBufferFrame(guardedParent, l_swip); + auto xGuardedParent = + ExclusiveGuardedBufferFrame(std::move(guardedParent)); + auto xGuardedChild = ExclusiveGuardedBufferFrame(std::move(guardedChild)); auto l_x_guard = ExclusiveGuardedBufferFrame(std::move(l_guard)); // ------------------------------------------------------------------------------------- - ENSURE(c_x_guard->mIsLeaf == l_x_guard->mIsLeaf); + ENSURE(xGuardedChild->mIsLeaf == l_x_guard->mIsLeaf); // ------------------------------------------------------------------------------------- - if (!l_x_guard->merge(pos_in_parent - 1, p_x_guard, c_x_guard)) { - p_guard = std::move(p_x_guard); - c_guard = std::move(c_x_guard); + if (!l_x_guard->merge(pos_in_parent - 1, xGuardedParent, xGuardedChild)) { + guardedParent = std::move(xGuardedParent); + guardedChild = std::move(xGuardedChild); l_guard = std::move(l_x_guard); return false; } // ------------------------------------------------------------------------------------- if (config.mEnableWal) { - p_guard.IncPageGSN(); - c_guard.IncPageGSN(); + guardedParent.IncPageGSN(); + guardedChild.IncPageGSN(); l_guard.IncPageGSN(); } else { - p_guard.MarkAsDirty(); - c_guard.MarkAsDirty(); + guardedParent.MarkAsDirty(); + guardedChild.MarkAsDirty(); l_guard.MarkAsDirty(); } // ------------------------------------------------------------------------------------- l_x_guard.reclaim(); // ------------------------------------------------------------------------------------- - p_guard = std::move(p_x_guard); - c_guard = std::move(c_x_guard); + guardedParent = std::move(xGuardedParent); + guardedChild = std::move(xGuardedChild); return true; }; auto merge_right = [&]() { - Swip& r_swip = ((pos_in_parent + 1) == p_guard->mNumSeps) - ? p_guard->mRightMostChildSwip - : p_guard->getChild(pos_in_parent + 1); + Swip& r_swip = + ((pos_in_parent + 1) == guardedParent->mNumSeps) + ? guardedParent->mRightMostChildSwip + : guardedParent->getChild(pos_in_parent + 1); if (!swizzle_sibling && r_swip.isEVICTED()) { return false; } - auto r_guard = GuardedBufferFrame(p_guard, r_swip); - auto p_x_guard = ExclusiveGuardedBufferFrame(std::move(p_guard)); - auto c_x_guard = ExclusiveGuardedBufferFrame(std::move(c_guard)); + auto r_guard = GuardedBufferFrame(guardedParent, r_swip); + auto xGuardedParent = + ExclusiveGuardedBufferFrame(std::move(guardedParent)); + auto xGuardedChild = ExclusiveGuardedBufferFrame(std::move(guardedChild)); auto r_x_guard = ExclusiveGuardedBufferFrame(std::move(r_guard)); // ------------------------------------------------------------------------------------- - ENSURE(c_x_guard->mIsLeaf == r_x_guard->mIsLeaf); + ENSURE(xGuardedChild->mIsLeaf == r_x_guard->mIsLeaf); // ------------------------------------------------------------------------------------- - if (!c_x_guard->merge(pos_in_parent, p_x_guard, r_x_guard)) { - p_guard = std::move(p_x_guard); - c_guard = std::move(c_x_guard); + if (!xGuardedChild->merge(pos_in_parent, xGuardedParent, r_x_guard)) { + guardedParent = std::move(xGuardedParent); + guardedChild = std::move(xGuardedChild); r_guard = std::move(r_x_guard); return false; } // ------------------------------------------------------------------------------------- if (config.mEnableWal) { - p_guard.IncPageGSN(); - c_guard.IncPageGSN(); + guardedParent.IncPageGSN(); + guardedChild.IncPageGSN(); r_guard.IncPageGSN(); } else { - p_guard.MarkAsDirty(); - c_guard.MarkAsDirty(); + guardedParent.MarkAsDirty(); + guardedChild.MarkAsDirty(); r_guard.MarkAsDirty(); } // ------------------------------------------------------------------------------------- - c_x_guard.reclaim(); + xGuardedChild.reclaim(); // ------------------------------------------------------------------------------------- - p_guard = std::move(p_x_guard); + guardedParent = std::move(xGuardedParent); r_guard = std::move(r_x_guard); return true; }; - // ATTENTION: don't use c_guard without making sure it was not reclaimed + // ATTENTION: don't use guardedChild without making sure it was not + // reclaimed // ------------------------------------------------------------------------------------- if (pos_in_parent > 0) { merged_successfully = merged_successfully | merge_left(); } - if (!merged_successfully && pos_in_parent < p_guard->mNumSeps) { + if (!merged_successfully && pos_in_parent < guardedParent->mNumSeps) { merged_successfully = merged_successfully | merge_right(); } } // ------------------------------------------------------------------------------------- JUMPMU_TRY() { - GuardedBufferFrame meta_guard(mMetaNodeSwip); - if (!isMetaNode(p_guard) && - p_guard->freeSpaceAfterCompaction() >= BTreeNode::sUnderFullSize) { - if (tryMerge(*p_guard.mBf, true)) { + GuardedBufferFrame guardedMeta(mMetaNodeSwip); + if (!isMetaNode(guardedParent) && + guardedParent->freeSpaceAfterCompaction() >= + BTreeNode::sUnderFullSize) { + if (tryMerge(*guardedParent.mBf, true)) { WorkerCounters::myCounters().dt_merge_parent_succ[mTreeId]++; } else { WorkerCounters::myCounters().dt_merge_parent_fail[mTreeId]++; @@ -439,39 +453,40 @@ s16 BTreeGeneric::mergeLeftIntoRight( // returns true if it has exclusively locked anything BTreeGeneric::XMergeReturnCode BTreeGeneric::XMerge( - GuardedBufferFrame& p_guard, - GuardedBufferFrame& c_guard, ParentSwipHandler& parent_handler) { + GuardedBufferFrame& guardedParent, + GuardedBufferFrame& guardedChild, + ParentSwipHandler& parentHandler) { WorkerCounters::myCounters().dt_researchy[0][1]++; - if (c_guard->fillFactorAfterCompaction() >= 0.9) { + if (guardedChild->fillFactorAfterCompaction() >= 0.9) { return XMergeReturnCode::NOTHING; } const u8 MAX_MERGE_PAGES = FLAGS_xmerge_k; - s16 pos = parent_handler.mPosInParent; + s16 pos = parentHandler.mPosInParent; u8 pages_count = 1; s16 max_right; ARRAY_ON_STACK(guards, GuardedBufferFrame, MAX_MERGE_PAGES); ARRAY_ON_STACK(fully_merged, bool, MAX_MERGE_PAGES); - guards[0] = std::move(c_guard); + guards[0] = std::move(guardedChild); fully_merged[0] = false; double total_fill_factor = guards[0]->fillFactorAfterCompaction(); - // Handle upper swip instead of avoiding p_guard->mNumSeps -1 swip - if (isMetaNode(p_guard) || !guards[0]->mIsLeaf) { - c_guard = std::move(guards[0]); + // Handle upper swip instead of avoiding guardedParent->mNumSeps -1 swip + if (isMetaNode(guardedParent) || !guards[0]->mIsLeaf) { + guardedChild = std::move(guards[0]); return XMergeReturnCode::NOTHING; } for (max_right = pos + 1; (max_right - pos) < MAX_MERGE_PAGES && - (max_right + 1) < p_guard->mNumSeps; + (max_right + 1) < guardedParent->mNumSeps; max_right++) { - if (!p_guard->getChild(max_right).isHOT()) { - c_guard = std::move(guards[0]); + if (!guardedParent->getChild(max_right).isHOT()) { + guardedChild = std::move(guards[0]); return XMergeReturnCode::NOTHING; } - guards[max_right - pos] = - GuardedBufferFrame(p_guard, p_guard->getChild(max_right)); + guards[max_right - pos] = GuardedBufferFrame( + guardedParent, guardedParent->getChild(max_right)); fully_merged[max_right - pos] = false; total_fill_factor += guards[max_right - pos]->fillFactorAfterCompaction(); pages_count++; @@ -482,12 +497,13 @@ BTreeGeneric::XMergeReturnCode BTreeGeneric::XMerge( } } if (((pages_count - std::ceil(total_fill_factor))) < (1)) { - c_guard = std::move(guards[0]); + guardedChild = std::move(guards[0]); return XMergeReturnCode::NOTHING; } - ExclusiveGuardedBufferFrame p_x_guard = std::move(p_guard); - p_x_guard.IncPageGSN(); + ExclusiveGuardedBufferFrame xGuardedParent = + std::move(guardedParent); + xGuardedParent.IncPageGSN(); XMergeReturnCode ret_code = XMergeReturnCode::PARTIAL_MERGE; s16 left_hand, right_hand, ret; @@ -512,7 +528,7 @@ BTreeGeneric::XMergeReturnCode BTreeGeneric::XMerge( right_x_guard.IncPageGSN(); left_x_guard.IncPageGSN(); max_right = left_hand; - ret = mergeLeftIntoRight(p_x_guard, left_hand, left_x_guard, + ret = mergeLeftIntoRight(xGuardedParent, left_hand, left_x_guard, right_x_guard, left_hand == pos); // we unlock only the left page, the right one should not be touched again if (ret == 1) { @@ -529,10 +545,10 @@ BTreeGeneric::XMergeReturnCode BTreeGeneric::XMerge( } } } - if (c_guard.mGuard.mState == GUARD_STATE::MOVED) { - c_guard = std::move(guards[0]); + if (guardedChild.mGuard.mState == GUARD_STATE::MOVED) { + guardedChild = std::move(guards[0]); } - p_guard = std::move(p_x_guard); + guardedParent = std::move(xGuardedParent); return ret_code; } @@ -548,15 +564,15 @@ s64 BTreeGeneric::iterateAllPagesRec(GuardedBufferFrame& node_guard, s64 res = inner(node_guard.ref()); for (u16 i = 0; i < node_guard->mNumSeps; i++) { Swip& c_swip = node_guard->getChild(i); - auto c_guard = GuardedBufferFrame(node_guard, c_swip); - c_guard.JumpIfModifiedByOthers(); - res += iterateAllPagesRec(c_guard, inner, leaf); + auto guardedChild = GuardedBufferFrame(node_guard, c_swip); + guardedChild.JumpIfModifiedByOthers(); + res += iterateAllPagesRec(guardedChild, inner, leaf); } Swip& c_swip = node_guard->mRightMostChildSwip; - auto c_guard = GuardedBufferFrame(node_guard, c_swip); - c_guard.JumpIfModifiedByOthers(); - res += iterateAllPagesRec(c_guard, inner, leaf); + auto guardedChild = GuardedBufferFrame(node_guard, c_swip); + guardedChild.JumpIfModifiedByOthers(); + res += iterateAllPagesRec(guardedChild, inner, leaf); return res; } @@ -565,10 +581,10 @@ s64 BTreeGeneric::iterateAllPages(BTreeNodeCallback inner, BTreeNodeCallback leaf) { while (true) { JUMPMU_TRY() { - GuardedBufferFrame p_guard(mMetaNodeSwip); - GuardedBufferFrame c_guard(p_guard, - p_guard->mRightMostChildSwip); - s64 result = iterateAllPagesRec(c_guard, inner, leaf); + GuardedBufferFrame guardedParent(mMetaNodeSwip); + GuardedBufferFrame guardedChild( + guardedParent, guardedParent->mRightMostChildSwip); + s64 result = iterateAllPagesRec(guardedChild, inner, leaf); JUMPMU_RETURN result; } JUMPMU_CATCH() { @@ -606,8 +622,8 @@ u32 BTreeGeneric::bytesFree() { } void BTreeGeneric::printInfos(uint64_t totalSize) { - GuardedBufferFrame p_guard(mMetaNodeSwip); - GuardedBufferFrame r_guard(p_guard, p_guard->mRightMostChildSwip); + GuardedBufferFrame guardedParent(mMetaNodeSwip); + GuardedBufferFrame r_guard(guardedParent, guardedParent->mRightMostChildSwip); uint64_t cnt = countPages(); cout << "nodes:" << cnt << " innerNodes:" << countInner() << " space:" << (cnt * EFFECTIVE_PAGE_SIZE) / (float)totalSize diff --git a/source/storage/btree/core/BTreeGeneric.hpp b/source/storage/btree/core/BTreeGeneric.hpp index 2b32db18..1672cb95 100644 --- a/source/storage/btree/core/BTreeGeneric.hpp +++ b/source/storage/btree/core/BTreeGeneric.hpp @@ -463,12 +463,13 @@ inline void BTreeGeneric::FindLeafCanJump( WorkerCounters::myCounters().dt_inner_page[mTreeId]++; } - Swip& c_swip = targetGuard->lookupInner(key); + auto& childSwip = targetGuard->lookupInner(key); + DCHECK(!childSwip.IsEmpty()); guardedParent = std::move(targetGuard); if (level == mHeight - 1) { - targetGuard = GuardedBufferFrame(guardedParent, c_swip, mode); + targetGuard = GuardedBufferFrame(guardedParent, childSwip, mode); } else { - targetGuard = GuardedBufferFrame(guardedParent, c_swip); + targetGuard = GuardedBufferFrame(guardedParent, childSwip); } level = level + 1; } diff --git a/source/storage/btree/core/BTreeWALPayload.hpp b/source/storage/btree/core/BTreeWALPayload.hpp index 4383fb9e..33ed27b1 100644 --- a/source/storage/btree/core/BTreeWALPayload.hpp +++ b/source/storage/btree/core/BTreeWALPayload.hpp @@ -149,6 +149,14 @@ struct WALInsert : WALPayload { std::memcpy(payload + key_length, val.data(), value_length); } + Slice GetKey() { + return Slice(payload, key_length); + } + + Slice GetVal() { + return Slice(payload + key_length, value_length); + } + virtual std::unique_ptr ToJSON() override { auto doc = WALPayload::ToJSON(); diff --git a/source/storage/buffer-manager/GuardedBufferFrame.hpp b/source/storage/buffer-manager/GuardedBufferFrame.hpp index e8f3ef8f..d70b62aa 100644 --- a/source/storage/buffer-manager/GuardedBufferFrame.hpp +++ b/source/storage/buffer-manager/GuardedBufferFrame.hpp @@ -191,8 +191,7 @@ template class GuardedBufferFrame { // TODO: verify auto handler = cr::Worker::my().mLogging.ReserveWALEntryComplex( - sizeof(WT) + payloadSize, pageId, - cr::Worker::my().mLogging.GetCurrentGsn(), treeId, + sizeof(WT) + payloadSize, pageId, mBf->page.mPSN, treeId, std::forward(args)...); return handler; } diff --git a/source/storage/buffer-manager/Swip.hpp b/source/storage/buffer-manager/Swip.hpp index 79f85e06..5c446c0c 100644 --- a/source/storage/buffer-manager/Swip.hpp +++ b/source/storage/buffer-manager/Swip.hpp @@ -21,21 +21,26 @@ struct BufferFrame; /// 3. EVICTED. The swip represents a page id. The most most significant bit is /// 1 which marks the swip as "EVICTED". template class Swip { - public: union { u64 mPageId; BufferFrame* bf; }; - Swip() = default; +public: + /// Create an empty swip. + Swip() : mPageId(0){}; + /// Create an swip pointing to the buffer frame. Swip(BufferFrame* bf) : bf(bf) { } + /// Copy construct from another swip. template Swip(Swip& other) : mPageId(other.mPageId) { } +public: + /// Whether two swip is equal. bool operator==(const Swip& other) const { return (raw() == other.raw()); } @@ -52,6 +57,12 @@ template class Swip { return mPageId & evicted_bit; } + /// Indicates whether this swip points to nothing: no evicted bit, no cool + /// bit, the memory pointer is nullptr + bool IsEmpty() { + return mPageId == 0; + } + u64 asPageID() { assert(isEVICTED()); return mPageId & evicted_mask; diff --git a/tests/BTreeVILoggingAndRecoveryTest.cpp b/tests/BTreeVILoggingAndRecoveryTest.cpp index d53d19e9..63e46572 100644 --- a/tests/BTreeVILoggingAndRecoveryTest.cpp +++ b/tests/BTreeVILoggingAndRecoveryTest.cpp @@ -31,15 +31,6 @@ class BTreeVILoggingAndRecoveryTest : public ::testing::Test { }; TEST_F(BTreeVILoggingAndRecoveryTest, SerializeAndDeserialize) { - auto bfCondition = [&](BufferFrame& bf) { return !bf.isFree(); }; - auto bfAction = [&](BufferFrame& bf) { - rapidjson::Document doc(rapidjson::kObjectType); - // doc.SetObject; - bf.ToJSON(&doc, doc.GetAllocator()); - auto jsonStr = leanstore::utils::JsonToStr(&doc); - LOG(INFO) << "Not-free BufferFrame(" << reinterpret_cast(&bf) - << "): " << jsonStr; - }; FLAGS_data_dir = "/tmp/BTreeVILoggingAndRecoveryTest/SerializeAndDeserialize"; std::filesystem::path dirPath = FLAGS_data_dir; std::filesystem::remove_all(dirPath); @@ -77,9 +68,6 @@ TEST_F(BTreeVILoggingAndRecoveryTest, SerializeAndDeserialize) { EXPECT_NE(btree, nullptr); }); - LOG(INFO) << "Buffer pool after registering tree"; - BufferManager::sInstance->DoWithBufferFrameIf(bfCondition, bfAction); - // insert some values cr::CRManager::sInstance->scheduleJobSync(0, [&]() { for (size_t i = 0; i < numKVs; ++i) { @@ -90,9 +78,6 @@ TEST_F(BTreeVILoggingAndRecoveryTest, SerializeAndDeserialize) { } }); - LOG(INFO) << "Buffer pool after inserting values on the tree"; - BufferManager::sInstance->DoWithBufferFrameIf(bfCondition, bfAction); - cr::CRManager::sInstance->scheduleJobSync(0, [&]() { rapidjson::Document doc(rapidjson::kObjectType); leanstore::storage::btree::BTreeGeneric::ToJSON(*btree, &doc); @@ -108,9 +93,6 @@ TEST_F(BTreeVILoggingAndRecoveryTest, SerializeAndDeserialize) { EXPECT_TRUE(mLeanStore->GetBTreeVI(btreeName, &btree)); EXPECT_NE(btree, nullptr); - LOG(INFO) << "Buffer pool after recovering"; - BufferManager::sInstance->DoWithBufferFrameIf(bfCondition, bfAction); - cr::CRManager::sInstance->scheduleJobSync(0, [&]() { rapidjson::Document doc(rapidjson::kObjectType); leanstore::storage::btree::BTreeGeneric::ToJSON(*btree, &doc); @@ -132,17 +114,10 @@ TEST_F(BTreeVILoggingAndRecoveryTest, SerializeAndDeserialize) { } }); - LOG(INFO) << "Buffer pool before unregistering"; - BufferManager::sInstance->DoWithBufferFrameIf(bfCondition, bfAction); - cr::CRManager::sInstance->scheduleJobSync( 1, [&]() { mLeanStore->UnRegisterBTreeVI(btreeName); }); - - LOG(INFO) << "Buffer pool after unregistering"; - BufferManager::sInstance->DoWithBufferFrameIf(bfCondition, bfAction); } -/* TEST_F(BTreeVILoggingAndRecoveryTest, RecoverAfterInsert) { FLAGS_data_dir = "/tmp/BTreeVILoggingAndRecoveryTest/RecoverAfterInsert"; std::filesystem::path dirPath = FLAGS_data_dir; @@ -190,12 +165,11 @@ TEST_F(BTreeVILoggingAndRecoveryTest, RecoverAfterInsert) { // skip dumpping buffer frames on exit LS_DEBUG_ENABLE("skip_CheckpointAllBufferFrames"); SCOPED_DEFER(LS_DEBUG_DISABLE("skip_CheckpointAllBufferFrames")); - mLeanStore.reset(nullptr); + FLAGS_recover = true; // recreate the store, it's expected that all the meta and pages are rebult // based on the WAL entries - FLAGS_recover = true; mLeanStore = std::make_unique(); EXPECT_TRUE(mLeanStore->GetBTreeVI(btreeName, &btree)); EXPECT_NE(btree, nullptr); @@ -215,6 +189,5 @@ TEST_F(BTreeVILoggingAndRecoveryTest, RecoverAfterInsert) { } }); } -*/ } // namespace leanstore \ No newline at end of file