From 5d10f2666869f323719942d341d9638294c7166b Mon Sep 17 00:00:00 2001 From: Jian Zhang Date: Sun, 25 Aug 2024 22:23:29 +0800 Subject: [PATCH] chore: tidy btree related code --- include/leanstore/btree/core/BTreeGeneric.hpp | 12 +- include/leanstore/btree/core/BTreeNode.hpp | 199 +++++++++------- .../core/PessimisticExclusiveIterator.hpp | 6 +- .../btree/core/PessimisticIterator.hpp | 51 ++--- include/leanstore/concurrency/Recovery.hpp | 56 +++-- .../leanstore/concurrency/WorkerThread.hpp | 22 +- src/btree/BasicKV.cpp | 18 +- src/btree/TransactionKV.cpp | 8 +- src/btree/Tuple.cpp | 4 +- src/btree/core/BTreeGeneric.cpp | 75 +++--- src/btree/core/BTreeNode.cpp | 214 +++++++++--------- src/concurrency/HistoryStorage.cpp | 14 +- src/utils/ToJson.hpp | 4 +- 13 files changed, 353 insertions(+), 330 deletions(-) diff --git a/include/leanstore/btree/core/BTreeGeneric.hpp b/include/leanstore/btree/core/BTreeGeneric.hpp index 50f78df4..ed44ac5b 100644 --- a/include/leanstore/btree/core/BTreeGeneric.hpp +++ b/include/leanstore/btree/core/BTreeGeneric.hpp @@ -212,7 +212,7 @@ class BTreeGeneric : public leanstore::storage::BufferManagedTree { inline void BTreeGeneric::freeBTreeNodesRecursive(BTreeGeneric& btree, GuardedBufferFrame& guardedNode) { if (!guardedNode->mIsLeaf) { - for (auto i = 0u; i <= guardedNode->mNumSeps; ++i) { + for (auto i = 0u; i <= guardedNode->mNumSlots; ++i) { auto* childSwip = guardedNode->ChildSwipIncludingRightMost(i); GuardedBufferFrame guardedChild(btree.mStore->mBufferManager.get(), guardedNode, *childSwip); @@ -230,7 +230,7 @@ inline void BTreeGeneric::IterateChildSwips(BufferFrame& bf, std::functionmIsLeaf) { // Replace all child swip to their page ID - for (uint64_t i = 0; i < destNode->mNumSeps; i++) { + for (uint64_t i = 0; i < destNode->mNumSlots; i++) { if (!destNode->ChildSwip(i)->IsEvicted()) { auto& childBf = destNode->ChildSwip(i)->AsBufferFrameMasked(); destNode->ChildSwip(i)->Evict(childBf.mHeader.mPageId); @@ -349,17 +349,17 @@ inline ParentSwipHandler BTreeGeneric::FindParent(BTreeGeneric& btree, BufferFra } auto& nodeToFind = *reinterpret_cast(bfToFind.mPage.mPayload); - const auto isInfinity = nodeToFind.mUpperFence.mOffset == 0; + const auto isInfinity = nodeToFind.mUpperFence.IsInfinity(); const auto keyToFind = nodeToFind.GetUpperFence(); auto posInParent = std::numeric_limits::max(); auto searchCondition = [&](GuardedBufferFrame& guardedNode) { if (isInfinity) { childSwip = &(guardedNode->mRightMostChildSwip); - posInParent = guardedNode->mNumSeps; + posInParent = guardedNode->mNumSlots; } else { posInParent = guardedNode->LowerBound(keyToFind); - if (posInParent == guardedNode->mNumSeps) { + if (posInParent == guardedNode->mNumSlots) { childSwip = &(guardedNode->mRightMostChildSwip); } else { childSwip = guardedNode->ChildSwip(posInParent); diff --git a/include/leanstore/btree/core/BTreeNode.hpp b/include/leanstore/btree/core/BTreeNode.hpp index 9417e599..8e563a84 100644 --- a/include/leanstore/btree/core/BTreeNode.hpp +++ b/include/leanstore/btree/core/BTreeNode.hpp @@ -5,6 +5,7 @@ #include "leanstore/utils/Log.hpp" #include "leanstore/utils/UserThread.hpp" +#include #include namespace leanstore::storage::btree { @@ -33,15 +34,21 @@ class BTreeNodeHeader { } }; + //! The fence key information of a BTreeNode. struct FenceKey { + //! The offset of the fence key in the BTreeNode. uint16_t mOffset; - uint16_t mLength; + //! The length of the fence key. + uint16_t mSize; + + //! Whether the fence key represents infinity. + bool IsInfinity() { + return mOffset == 0; + } }; -public: - //! The swip of the right-most child. - //! TODO(zz-jason): can it be moved to the slot array? + //! The swip of the right-most child, can be nullptr for leaf nodes. Swip mRightMostChildSwip = nullptr; //! The lower fence of the node. Exclusive. @@ -50,10 +57,8 @@ class BTreeNodeHeader { //! The upper fence of the node. Inclusive. FenceKey mUpperFence = {0, 0}; - //! The number of seperators. #slots = #seps + 1. - //! The first mNumSeps children are stored in the payload, while the last child are stored in - //! upper. - uint16_t mNumSeps = 0; + //! Size of the slot array. + uint16_t mNumSlots = 0; //! Indicates whether this node is leaf node without any child. bool mIsLeaf; @@ -77,88 +82,98 @@ class BTreeNodeHeader { //! Needed for GC bool mHasGarbage = false; -public: + //! Constructs a BTreeNodeHeader. BTreeNodeHeader(bool isLeaf, uint16_t size) : mIsLeaf(isLeaf), mDataOffset(size) { } - ~BTreeNodeHeader() { - } + //! Destructs a BTreeNodeHeader. + ~BTreeNodeHeader() = default; -public: - uint8_t* RawPtr() { + //! Returns the start address of the node. + uint8_t* NodeBegin() { return reinterpret_cast(this); } + //! Whether the node is an inner node. bool IsInner() { return !mIsLeaf; } + //! Get the lower fence key slice. Slice GetLowerFence() { - return Slice(GetLowerFenceKey(), mLowerFence.mLength); + return Slice(LowerFenceAddr(), mLowerFence.mSize); } - uint8_t* GetLowerFenceKey() { - return mLowerFence.mOffset ? RawPtr() + mLowerFence.mOffset : nullptr; + //! Get the address of lower fence key. nullptr if the lower fence is infinity. + uint8_t* LowerFenceAddr() { + return mLowerFence.IsInfinity() ? nullptr : NodeBegin() + mLowerFence.mOffset; } + //! Get the upper fence key slice. Slice GetUpperFence() { - return Slice(GetUpperFenceKey(), mUpperFence.mLength); + return Slice(UpperFenceAddr(), mUpperFence.mSize); } - uint8_t* GetUpperFenceKey() { - return mUpperFence.mOffset ? RawPtr() + mUpperFence.mOffset : nullptr; - } - - bool IsUpperFenceInfinity() { - return !mUpperFence.mOffset; - } - - bool IsLowerFenceInfinity() { - return !mLowerFence.mOffset; + //! Get the address of upper fence key. nullptr if the upper fence is infinity. + uint8_t* UpperFenceAddr() { + return mUpperFence.IsInfinity() ? nullptr : NodeBegin() + mUpperFence.mOffset; } }; -class BTreeNode : public BTreeNodeHeader { -public: - //! The slot inside a btree node. Slot records the metadata for the key-value position inside a - //! page. Common prefix among all keys are removed in a btree node. Slot key-value layout: - //! | key without prefix | value | - struct __attribute__((packed)) Slot { +//! The slot inside a btree node. Slot records the metadata for the key-value position inside a +//! page. Common prefix among all keys are removed in a btree node. Slot key-value layout: +//! | key without prefix | value | +struct __attribute__((packed)) BTreeNodeSlot { + //! Data offset of the slot, also the offset of the slot key + uint16_t mOffset; - //! Data offset of the slot, also the offset of the slot key - uint16_t mOffset; + //! Slot key size + uint16_t mKeySizeWithoutPrefix; - //! Slot key size - uint16_t mKeySizeWithoutPrefix; + //! Slot value size + uint16_t mValSize; - //! Slot value size - uint16_t mValSize; + //! The key header, used to improve key comparation performance + union { + HeadType mHead; - //! The key header, used for improve key comparation performance - union { - HeadType mHead; - uint8_t mHeadBytes[4]; - }; + uint8_t mHeadBytes[4]; }; +}; - //! The slot array, which stores all the key-value positions inside a page. - Slot mSlot[]; +class BTreeNode : public BTreeNodeHeader { +public: + //! The slot array, which stores all the key-value positions inside a BTreeNode. + BTreeNodeSlot mSlot[]; //! Creates a BTreeNode. Since BTreeNode creations and utilizations are critical, please use - //! ExclusiveGuardedBufferFrame::InitPayload() or BTreeNode::Init() to construct a BTreeNode on an + //! ExclusiveGuardedBufferFrame::InitPayload() or BTreeNode::New() to construct a BTreeNode on an //! existing buffer which has at least BTreeNode::Size() bytes: //! 1. ExclusiveGuardedBufferFrame::InitPayload() creates a BTreeNode on the holding BufferFrame. - //! 2. BTreeNode::Init(): creates a BTreeNode on the providing buffer. The size of the underlying + //! 2. BTreeNode::New(): creates a BTreeNode on the providing buffer. The size of the underlying //! buffer to store a BTreeNode can be obtained through BTreeNode::Size() BTreeNode(bool isLeaf) : BTreeNodeHeader(isLeaf, BTreeNode::Size()) { } + //! Creates a BTreeNode on the providing buffer. Callers should ensure the buffer has at least + //! BTreeNode::Size() bytes to store the BTreeNode. + //! @param buf: the buffer to store the BTreeNode. + //! @param isLeaf: whether the BTreeNode is a leaf node. + //! @param lowerFence: the lower fence of the BTreeNode. + //! @param upperFence: the upper fence of the BTreeNode. + //! @return the created BTreeNode. + static BTreeNode* New(void* buf, bool isLeaf, Slice lowerFence, Slice upperFence) { + auto* node = new (buf) BTreeNode(isLeaf); + node->setFences(lowerFence, upperFence); + return node; + } + uint16_t FreeSpace() { - return mDataOffset - (reinterpret_cast(mSlot + mNumSeps) - RawPtr()); + return mDataOffset - (reinterpret_cast(mSlot + mNumSlots) - NodeBegin()); } uint16_t FreeSpaceAfterCompaction() { - return BTreeNode::Size() - (reinterpret_cast(mSlot + mNumSeps) - RawPtr()) - + return BTreeNode::Size() - (reinterpret_cast(mSlot + mNumSlots) - NodeBegin()) - mSpaceUsed; } @@ -186,7 +201,7 @@ class BTreeNode : public BTreeNodeHeader { } uint8_t* KeyDataWithoutPrefix(uint16_t slotId) { - return RawPtr() + mSlot[slotId].mOffset; + return NodeBegin() + mSlot[slotId].mOffset; } uint16_t KeySizeWithoutPrefix(uint16_t slotId) { @@ -200,7 +215,8 @@ class BTreeNode : public BTreeNodeHeader { // Each slot is composed of: // key (mKeySizeWithoutPrefix), payload (mValSize) uint8_t* ValData(uint16_t slotId) { - return RawPtr() + mSlot[slotId].mOffset + mSlot[slotId].mKeySizeWithoutPrefix; + auto valOffset = mSlot[slotId].mOffset + mSlot[slotId].mKeySizeWithoutPrefix; + return NodeBegin() + valOffset; } uint16_t ValSize(uint16_t slotId) { @@ -208,7 +224,7 @@ class BTreeNode : public BTreeNodeHeader { } Swip* ChildSwipIncludingRightMost(uint16_t slotId) { - if (slotId == mNumSeps) { + if (slotId == mNumSlots) { return &mRightMostChildSwip; } @@ -216,12 +232,12 @@ class BTreeNode : public BTreeNodeHeader { } Swip* ChildSwip(uint16_t slotId) { - LS_DCHECK(slotId < mNumSeps); + LS_DCHECK(slotId < mNumSlots); return reinterpret_cast(ValData(slotId)); } uint16_t GetKVConsumedSpace(uint16_t slotId) { - return sizeof(Slot) + KeySizeWithoutPrefix(slotId) + ValSize(slotId); + return sizeof(BTreeNodeSlot) + KeySizeWithoutPrefix(slotId) + ValSize(slotId); } // Attention: the caller has to hold a copy of the existing payload @@ -257,8 +273,7 @@ class BTreeNode : public BTreeNodeHeader { std::memcpy(copiedKey, KeyDataWithoutPrefix(slotId), keySizeWithoutPrefix); // release the old space occupied by the payload (keyWithoutPrefix + value) - mSpaceUsed -= oldTotalSize; - mDataOffset += oldTotalSize; + retreatDataOffset(oldTotalSize); mSlot[slotId].mValSize = 0; mSlot[slotId].mKeySizeWithoutPrefix = 0; @@ -266,8 +281,7 @@ class BTreeNode : public BTreeNodeHeader { Compactify(); } LS_DCHECK(FreeSpace() >= newTotalSize); - mSpaceUsed += newTotalSize; - mDataOffset -= newTotalSize; + advanceDataOffset(newTotalSize); mSlot[slotId].mOffset = mDataOffset; mSlot[slotId].mKeySizeWithoutPrefix = keySizeWithoutPrefix; mSlot[slotId].mValSize = targetSize; @@ -275,15 +289,15 @@ class BTreeNode : public BTreeNodeHeader { } Slice KeyPrefix() { - return Slice(GetLowerFenceKey(), mPrefixSize); + return Slice(LowerFenceAddr(), mPrefixSize); } uint8_t* GetPrefix() { - return GetLowerFenceKey(); + return LowerFenceAddr(); } void CopyPrefix(uint8_t* out) { - memcpy(out, GetLowerFenceKey(), mPrefixSize); + memcpy(out, LowerFenceAddr(), mPrefixSize); } void CopyKeyWithoutPrefix(uint16_t slotId, uint8_t* dest) { @@ -302,7 +316,7 @@ class BTreeNode : public BTreeNodeHeader { } void MakeHint() { - uint16_t dist = mNumSeps / (sHintCount + 1); + uint16_t dist = mNumSlots / (sHintCount + 1); for (uint16_t i = 0; i < sHintCount; i++) mHint[i] = mSlot[dist * (i + 1)].mHead; } @@ -347,7 +361,7 @@ class BTreeNode : public BTreeNodeHeader { uint32_t MergeSpaceUpperBound(ExclusiveGuardedBufferFrame& xGuardedRight); uint32_t SpaceUsedBySlot(uint16_t slotId) { - return sizeof(BTreeNode::Slot) + KeySizeWithoutPrefix(slotId) + ValSize(slotId); + return sizeof(BTreeNodeSlot) + KeySizeWithoutPrefix(slotId) + ValSize(slotId); } // NOLINTNEXTLINE @@ -364,8 +378,6 @@ class BTreeNode : public BTreeNodeHeader { void InsertFence(FenceKey& fk, Slice key); - void SetFences(Slice lowerKey, Slice upperKey); - void Split(ExclusiveGuardedBufferFrame& xGuardedParent, ExclusiveGuardedBufferFrame& xGuardedNewLeft, const BTreeNode::SeparatorInfo& sepInfo); @@ -384,9 +396,11 @@ class BTreeNode : public BTreeNodeHeader { void Reset(); private: + void setFences(Slice lowerKey, Slice upperKey); + void generateSeparator(const SeparatorInfo& sepInfo, uint8_t* sepKey) { // prefix - memcpy(sepKey, GetLowerFenceKey(), mPrefixSize); + memcpy(sepKey, LowerFenceAddr(), mPrefixSize); if (sepInfo.mTrunc) { memcpy(sepKey + mPrefixSize, KeyDataWithoutPrefix(sepInfo.mSlotId + 1), @@ -447,12 +461,7 @@ class BTreeNode : public BTreeNodeHeader { static int32_t CmpKeys(Slice lhs, Slice rhs); static uint16_t SpaceNeeded(uint16_t keySize, uint16_t valSize, uint16_t prefixSize) { - return sizeof(Slot) + (keySize - prefixSize) + valSize; - } - - template - static BTreeNode* Init(void* addr, Args&&... args) { - return new (addr) BTreeNode(std::forward(args)...); + return sizeof(BTreeNodeSlot) + (keySize - prefixSize) + valSize; } static uint16_t Size() { @@ -462,22 +471,35 @@ class BTreeNode : public BTreeNodeHeader { static uint16_t UnderFullSize() { return BTreeNode::Size() * 0.6; } + +private: + //! Advance the data offset by size + void advanceDataOffset(uint16_t size) { + mDataOffset -= size; + mSpaceUsed += size; + } + + //! Oppsite of advanceDataOffset + void retreatDataOffset(uint16_t size) { + mDataOffset += size; + mSpaceUsed -= size; + } }; template -int16_t BTreeNode::LinearSearchWithBias(Slice key, uint16_t startPos, bool higher) { - if (key.size() < mPrefixSize || (bcmp(key.data(), GetLowerFenceKey(), mPrefixSize) != 0)) { +inline int16_t BTreeNode::LinearSearchWithBias(Slice key, uint16_t startPos, bool higher) { + if (key.size() < mPrefixSize || (bcmp(key.data(), LowerFenceAddr(), mPrefixSize) != 0)) { return -1; } - LS_DCHECK(key.size() >= mPrefixSize && bcmp(key.data(), GetLowerFenceKey(), mPrefixSize) == 0); + LS_DCHECK(key.size() >= mPrefixSize && bcmp(key.data(), LowerFenceAddr(), mPrefixSize) == 0); // the compared key has the same prefix key.remove_prefix(mPrefixSize); if (higher) { auto cur = startPos + 1; - for (; cur < mNumSeps; cur++) { + for (; cur < mNumSlots; cur++) { if (CmpKeys(key, KeyWithoutPrefix(cur)) == 0) { return cur; } @@ -497,33 +519,33 @@ int16_t BTreeNode::LinearSearchWithBias(Slice key, uint16_t startPos, bool highe } template -int16_t BTreeNode::LowerBound(Slice key, bool* isEqual) { +inline int16_t BTreeNode::LowerBound(Slice key, bool* isEqual) { if (isEqual != nullptr && mIsLeaf) { *isEqual = false; } // compare prefix firstly if (equalityOnly) { - if ((key.size() < mPrefixSize) || (bcmp(key.data(), GetLowerFenceKey(), mPrefixSize) != 0)) { + if ((key.size() < mPrefixSize) || (bcmp(key.data(), LowerFenceAddr(), mPrefixSize) != 0)) { return -1; } } else if (mPrefixSize != 0) { Slice keyPrefix(key.data(), std::min(key.size(), mPrefixSize)); - Slice lowerFencePrefix(GetLowerFenceKey(), mPrefixSize); + Slice lowerFencePrefix(LowerFenceAddr(), mPrefixSize); int cmpPrefix = CmpKeys(keyPrefix, lowerFencePrefix); if (cmpPrefix < 0) { return 0; } if (cmpPrefix > 0) { - return mNumSeps; + return mNumSlots; } } // the compared key has the same prefix key.remove_prefix(mPrefixSize); uint16_t lower = 0; - uint16_t upper = mNumSeps; + uint16_t upper = mNumSlots; HeadType keyHead = Head(key); SearchHint(keyHead, lower, upper); while (lower < upper) { @@ -544,4 +566,17 @@ int16_t BTreeNode::LowerBound(Slice key, bool* isEqual) { return equalityOnly ? -1 : lower; } +inline void BTreeNode::setFences(Slice lowerKey, Slice upperKey) { + InsertFence(mLowerFence, lowerKey); + InsertFence(mUpperFence, upperKey); + LS_DCHECK(LowerFenceAddr() == nullptr || UpperFenceAddr() == nullptr || + *LowerFenceAddr() <= *UpperFenceAddr()); + + // prefix compression + for (mPrefixSize = 0; (mPrefixSize < std::min(lowerKey.size(), upperKey.size())) && + (lowerKey[mPrefixSize] == upperKey[mPrefixSize]); + mPrefixSize++) + ; +} + } // namespace leanstore::storage::btree diff --git a/include/leanstore/btree/core/PessimisticExclusiveIterator.hpp b/include/leanstore/btree/core/PessimisticExclusiveIterator.hpp index 018fc07d..5e7c000b 100644 --- a/include/leanstore/btree/core/PessimisticExclusiveIterator.hpp +++ b/include/leanstore/btree/core/PessimisticExclusiveIterator.hpp @@ -110,7 +110,7 @@ class PessimisticExclusiveIterator : public PessimisticIterator { } LS_DCHECK(mSlotId != -1 && targetSize > mGuardedLeaf->ValSize(mSlotId)); while (!mGuardedLeaf->CanExtendPayload(mSlotId, targetSize)) { - if (mGuardedLeaf->mNumSeps == 1) { + if (mGuardedLeaf->mNumSlots == 1) { return false; } AssembleKey(); @@ -156,7 +156,7 @@ class PessimisticExclusiveIterator : public PessimisticIterator { contentionStats.Reset(); if (lastUpdatedSlot != mSlotId && contentionPct >= utils::tlsStore->mStoreOption->mContentionSplitThresholdPct && - mGuardedLeaf->mNumSeps > 2) { + mGuardedLeaf->mNumSlots > 2) { int16_t splitSlot = std::min(lastUpdatedSlot, mSlotId); mGuardedLeaf.unlock(); @@ -186,7 +186,7 @@ class PessimisticExclusiveIterator : public PessimisticIterator { } virtual OpCode RemoveCurrent() { - if (!(mGuardedLeaf.mBf != nullptr && mSlotId >= 0 && mSlotId < mGuardedLeaf->mNumSeps)) { + if (!(mGuardedLeaf.mBf != nullptr && mSlotId >= 0 && mSlotId < mGuardedLeaf->mNumSlots)) { LS_DCHECK(false, "RemoveCurrent failed, pageId={}, slotId={}", mGuardedLeaf.mBf->mHeader.mPageId, mSlotId); return OpCode::kOther; diff --git a/include/leanstore/btree/core/PessimisticIterator.hpp b/include/leanstore/btree/core/PessimisticIterator.hpp index a6b06812..e55c8653 100644 --- a/include/leanstore/btree/core/PessimisticIterator.hpp +++ b/include/leanstore/btree/core/PessimisticIterator.hpp @@ -108,7 +108,7 @@ class PessimisticIterator : public Iterator { //! Seek to the position of the first key void SeekToFirst() override { seekToTargetPage([](GuardedBufferFrame&) { return 0; }); - if (mGuardedLeaf->mNumSeps == 0) { + if (mGuardedLeaf->mNumSlots == 0) { SetToInvalid(); return; } @@ -120,7 +120,7 @@ class PessimisticIterator : public Iterator { seekToTargetPageOnDemand(key); mSlotId = mGuardedLeaf->LowerBound(key); - if (mSlotId < mGuardedLeaf->mNumSeps) { + if (mSlotId < mGuardedLeaf->mNumSlots) { return; } @@ -136,12 +136,12 @@ class PessimisticIterator : public Iterator { } // If we are not at the end of the leaf, return true - if (mSlotId < mGuardedLeaf->mNumSeps - 1) { + if (mSlotId < mGuardedLeaf->mNumSlots - 1) { return true; } // No more keys in the BTree, return false - if (mGuardedLeaf->mUpperFence.mLength == 0) { + if (mGuardedLeaf->mUpperFence.IsInfinity()) { return false; } @@ -153,13 +153,13 @@ class PessimisticIterator : public Iterator { //! Seek to the position of the last key void SeekToLast() override { - seekToTargetPage([](GuardedBufferFrame& parent) { return parent->mNumSeps; }); - if (mGuardedLeaf->mNumSeps == 0) { + seekToTargetPage([](GuardedBufferFrame& parent) { return parent->mNumSlots; }); + if (mGuardedLeaf->mNumSlots == 0) { SetToInvalid(); return; } - mSlotId = mGuardedLeaf->mNumSeps - 1; + mSlotId = mGuardedLeaf->mNumSlots - 1; } //! Seek to the position of the last key which <= the given key @@ -194,7 +194,7 @@ class PessimisticIterator : public Iterator { } // No more keys in the BTree, return false - if (mGuardedLeaf->mLowerFence.mLength == 0) { + if (mGuardedLeaf->mLowerFence.IsInfinity()) { return false; } @@ -278,8 +278,8 @@ class PessimisticIterator : public Iterator { bool IsLastOne() { LS_DCHECK(mSlotId != -1); - LS_DCHECK(mSlotId != mGuardedLeaf->mNumSeps); - return (mSlotId + 1) == mGuardedLeaf->mNumSeps; + LS_DCHECK(mSlotId != mGuardedLeaf->mNumSlots); + return (mSlotId + 1) == mGuardedLeaf->mNumSlots; } void Reset() { @@ -304,13 +304,12 @@ class PessimisticIterator : public Iterator { void seekToTargetPage(std::function&)> childPosGetter); void assembleUpperFence() { - mFenceSize = mGuardedLeaf->mUpperFence.mLength + 1; + mFenceSize = mGuardedLeaf->mUpperFence.mSize + 1; mIsUsingUpperFence = true; if (mBuffer.size() < mFenceSize) { mBuffer.resize(mFenceSize, 0); } - std::memcpy(mBuffer.data(), mGuardedLeaf->GetUpperFenceKey(), - mGuardedLeaf->mUpperFence.mLength); + std::memcpy(mBuffer.data(), mGuardedLeaf->UpperFenceAddr(), mGuardedLeaf->mUpperFence.mSize); mBuffer[mFenceSize - 1] = 0; } @@ -331,13 +330,13 @@ inline void PessimisticIterator::Next() { ENSURE(mGuardedLeaf.mGuard.mState != GuardState::kOptimisticShared); // If we are not at the end of the leaf, return the next key in the leaf. - if ((mSlotId + 1) < mGuardedLeaf->mNumSeps) { + if ((mSlotId + 1) < mGuardedLeaf->mNumSlots) { mSlotId += 1; return; } // No more keys in the BTree, return false - if (mGuardedLeaf->mUpperFence.mLength == 0) { + if (mGuardedLeaf->mUpperFence.IsInfinity()) { SetToInvalid(); return; } @@ -358,7 +357,7 @@ inline void PessimisticIterator::Next() { if (utils::tlsStore->mStoreOption->mEnableOptimisticScan && mLeafPosInParent != -1) { JUMPMU_TRY() { - if ((mLeafPosInParent + 1) <= mGuardedParent->mNumSeps) { + if ((mLeafPosInParent + 1) <= mGuardedParent->mNumSlots) { int32_t nextLeafPos = mLeafPosInParent + 1; auto* nextLeafSwip = mGuardedParent->ChildSwipIncludingRightMost(nextLeafPos); GuardedBufferFrame guardedNextLeaf(mBTree.mStore->mBufferManager.get(), @@ -379,10 +378,10 @@ inline void PessimisticIterator::Next() { mFuncEnterLeaf(mGuardedLeaf); } - if (mGuardedLeaf->mNumSeps == 0) { + if (mGuardedLeaf->mNumSlots == 0) { JUMPMU_CONTINUE; } - ENSURE(mSlotId < mGuardedLeaf->mNumSeps); + ENSURE(mSlotId < mGuardedLeaf->mNumSlots); COUNTERS_BLOCK() { WorkerCounters::MyCounters().dt_next_tuple_opt[mBTree.mTreeId]++; } @@ -399,7 +398,7 @@ inline void PessimisticIterator::Next() { return guardedNode->LowerBound(fenceKey); }); - if (mGuardedLeaf->mNumSeps == 0) { + if (mGuardedLeaf->mNumSlots == 0) { SetCleanUpCallback([&, toMerge = mGuardedLeaf.mBf]() { JUMPMU_TRY() { mBTree.TryMergeMayJump(*toMerge, true); @@ -413,7 +412,7 @@ inline void PessimisticIterator::Next() { continue; } mSlotId = mGuardedLeaf->LowerBound(assembedFence()); - if (mSlotId == mGuardedLeaf->mNumSeps) { + if (mSlotId == mGuardedLeaf->mNumSlots) { continue; } return; @@ -435,18 +434,18 @@ inline void PessimisticIterator::Prev() { } // No more keys in the BTree, return false - if (mGuardedLeaf->mLowerFence.mLength == 0) { + if (mGuardedLeaf->mLowerFence.IsInfinity()) { SetToInvalid(); return; } // Construct the previous key (upper bound) - mFenceSize = mGuardedLeaf->mLowerFence.mLength; + mFenceSize = mGuardedLeaf->mLowerFence.mSize; mIsUsingUpperFence = false; if (mBuffer.size() < mFenceSize) { mBuffer.resize(mFenceSize, 0); } - std::memcpy(&mBuffer[0], mGuardedLeaf->GetLowerFenceKey(), mFenceSize); + std::memcpy(&mBuffer[0], mGuardedLeaf->LowerFenceAddr(), mFenceSize); // callback before exiting current leaf if (mFuncExitLeaf != nullptr) { @@ -479,14 +478,14 @@ inline void PessimisticIterator::Prev() { mGuardedLeaf.JumpIfModifiedByOthers(); mGuardedLeaf = std::move(guardedNextLeaf); mLeafPosInParent = nextLeafPos; - mSlotId = mGuardedLeaf->mNumSeps - 1; + mSlotId = mGuardedLeaf->mNumSlots - 1; mIsPrefixCopied = false; if (mFuncEnterLeaf != nullptr) { mFuncEnterLeaf(mGuardedLeaf); } - if (mGuardedLeaf->mNumSeps == 0) { + if (mGuardedLeaf->mNumSlots == 0) { JUMPMU_CONTINUE; } COUNTERS_BLOCK() { @@ -505,7 +504,7 @@ inline void PessimisticIterator::Prev() { return guardedNode->LowerBound(fenceKey); }); - if (mGuardedLeaf->mNumSeps == 0) { + if (mGuardedLeaf->mNumSlots == 0) { COUNTERS_BLOCK() { WorkerCounters::MyCounters().dt_empty_leaf[mBTree.mTreeId]++; } diff --git a/include/leanstore/concurrency/Recovery.hpp b/include/leanstore/concurrency/Recovery.hpp index f52bf8a2..e03974a2 100644 --- a/include/leanstore/concurrency/Recovery.hpp +++ b/include/leanstore/concurrency/Recovery.hpp @@ -31,8 +31,8 @@ class Recovery { //! Size of the written WAL file. uint64_t mWalSize; - //! Stores the dirty page ID and the offset to the first WalEntry that caused - //! that page to become dirty. + //! Stores the dirty page ID and the offset to the first WalEntry that caused that page to become + //! dirty. std::map mDirtyPageTable; //! Stores the active transaction and the offset to the last created WalEntry. @@ -55,36 +55,32 @@ class Recovery { Recovery(const Recovery&) = delete; public: - //! The ARIES algorithm relies on logging of all database operations with - //! ascending sequence numbers. The resulting logfile is stored on so-called - //! “stable storage”, which is a storage medium that is assumed to survive - //! crashes and hardware failures. To gather the necessary information for - //! the logs, two data structures have to be maintained: the dirty page table - //! (DPT) and the transaction table (TT). The dirty page table keeps record of - //! all the pages that have been modified, and not yet written to disk, and - //! the first sequence number that caused that page to become dirty. The - //! transaction table contains all currently running transactions and the - //! sequence number of the last log entry they created. + //! The ARIES algorithm relies on logging of all database operations with ascending sequence + //! numbers. The resulting logfile is stored on so-called “stable storage”, which is a storage + //! medium that is assumed to survive crashes and hardware failures. To gather the necessary + //! information for the logs, two data structures have to be maintained: the dirty page table + //! (DPT) and the transaction table (TT). The dirty page table keeps record of all the pages that + //! have been modified, and not yet written to disk, and the first sequence number that caused + //! that page to become dirty. The transaction table contains all currently running transactions + //! and the sequence number of the last log entry they created. /// - //! The recovery works in three phases: analysis, redo, and undo. During the - //! analysis phase, all the necessary information is computed from the - //! logfile. During the redo phase, ARIES retraces the actions of a database - //! before the crash and brings the system back to the exact state that it was - //! in before the crash. During the undo phase, ARIES undoes the transactions - //! still active at crash time. + //! The recovery works in three phases: analysis, redo, and undo. During the analysis phase, all + //! the necessary information is computed from the logfile. During the redo phase, ARIES retraces + //! the actions of a database before the crash and brings the system back to the exact state that + //! it was in before the crash. During the undo phase, ARIES undoes the transactions still active + //! at crash time. bool Run(); private: - //! During the analysis phase, the DPT and TT are restored to their state at - //! the time of the crash. The logfile is scanned from the beginning or the - //! last checkpoint, and all transactions for which we encounter Begin - //! Transaction entries are added to the TT. Whenever an End Log entry is - //! found, the corresponding transaction is removed. + //! During the analysis phase, the DPT and TT are restored to their state at the time of the + //! crash. The logfile is scanned from the beginning or the last checkpoint, and all transactions + //! for which we encounter begin transaction entries are added to the TT. Whenever an End Log + //! entry is found, the corresponding transaction is removed. Result analysis(); - //! During the redo phase, the DPT is used to find the set of pages in the - //! buffer pool that were dirty at the time of the crash. All these pages are - //! read from disk and redone from the first log record that makes them dirty. + //! During the redo phase, the DPT is used to find the set of pages in the buffer pool that were + //! dirty at the time of the crash. All these pages are read from disk and redone from the first + //! log record that makes them dirty. Result redo(); Result nextWalComplexToRedo(uint64_t& offset, WalEntryComplex* walEntryPtr); @@ -107,10 +103,10 @@ class Recovery { void redoSplitNonRoot(storage::BufferFrame& bf, WalEntryComplex* complexEntry); - //! During the undo phase, the TT is used to undo the transactions still - //! active at crash time. In the case of an aborted transaction, it’s possible - //! to traverse the log file in reverse order using the previous sequence - //! numbers, undoing all actions taken within the specific transaction. + //! During the undo phase, the TT is used to undo the transactions still active at crash time. In + //! the case of an aborted transaction, it’s possible to traverse the log file in reverse order + //! using the previous sequence numbers, undoing all actions taken within the specific + //! transaction. void undo() { } diff --git a/include/leanstore/concurrency/WorkerThread.hpp b/include/leanstore/concurrency/WorkerThread.hpp index f37f9710..34ed6f59 100644 --- a/include/leanstore/concurrency/WorkerThread.hpp +++ b/include/leanstore/concurrency/WorkerThread.hpp @@ -8,6 +8,7 @@ #include "leanstore/utils/UserThread.hpp" #include +#include #include #include @@ -24,6 +25,12 @@ namespace leanstore::cr { //! job senders. class WorkerThread : public utils::UserThread { public: + enum JobStatus : uint8_t { + kJobIsEmpty = 0, + kJobIsSet, + kJobIsFinished, + }; + //! The id of the worker thread. const WORKERID mWorkerId; @@ -37,7 +44,7 @@ class WorkerThread : public utils::UserThread { std::function mJob; //! Whether the current job is done. - bool mJobDone; + JobStatus mJobStatus; public: //! Constructor. @@ -45,7 +52,7 @@ class WorkerThread : public utils::UserThread { : utils::UserThread(store, "Worker" + std::to_string(workerId), cpu), mWorkerId(workerId), mJob(nullptr), - mJobDone(false) { + mJobStatus(kJobIsEmpty) { } //! Destructor. @@ -81,7 +88,7 @@ inline void WorkerThread::runImpl() { while (mKeepRunning) { // wait until there is a job std::unique_lock guard(mMutex); - mCv.wait(guard, [&]() { return !mKeepRunning || (mJob != nullptr && !mJobDone); }); + mCv.wait(guard, [&]() { return !mKeepRunning || (mJobStatus == kJobIsSet); }); // check thread status if (!mKeepRunning) { @@ -92,7 +99,7 @@ inline void WorkerThread::runImpl() { mJob(); // Set job done, change the worker state to (jobSet, jobDone), notify the job sender - mJobDone = true; + mJobStatus = kJobIsFinished; guard.unlock(); mCv.notify_all(); @@ -115,10 +122,11 @@ inline void WorkerThread::Stop() { inline void WorkerThread::SetJob(std::function job) { // wait the previous job to finish std::unique_lock guard(mMutex); - mCv.wait(guard, [&]() { return mJob == nullptr && !mJobDone; }); + mCv.wait(guard, [&]() { return mJobStatus == kJobIsEmpty; }); // set a new job, change the worker state to (jobSet, jobNotDone), notify the worker thread mJob = std::move(job); + mJobStatus = kJobIsSet; guard.unlock(); mCv.notify_all(); @@ -126,11 +134,11 @@ inline void WorkerThread::SetJob(std::function job) { inline void WorkerThread::Wait() { std::unique_lock guard(mMutex); - mCv.wait(guard, [&]() { return mJob != nullptr && mJobDone; }); + mCv.wait(guard, [&]() { return mJobStatus == kJobIsFinished; }); // reset the job, change the worker state to (jobNotSet, jobDone), notify other job senders mJob = nullptr; - mJobDone = false; + mJobStatus = kJobIsEmpty; guard.unlock(); mCv.notify_all(); diff --git a/src/btree/BasicKV.cpp b/src/btree/BasicKV.cpp index c4cdb462..dd57dd49 100644 --- a/src/btree/BasicKV.cpp +++ b/src/btree/BasicKV.cpp @@ -85,10 +85,10 @@ bool BasicKV::IsRangeEmpty(Slice startKey, Slice endKey) { Slice upperFence = guardedLeaf->GetUpperFence(); LS_DCHECK(startKey >= guardedLeaf->GetLowerFence()); - if ((guardedLeaf->mUpperFence.mOffset == 0 || endKey <= upperFence) && - guardedLeaf->mNumSeps == 0) { + if ((guardedLeaf->mUpperFence.IsInfinity() || endKey <= upperFence) && + guardedLeaf->mNumSlots == 0) { int32_t pos = guardedLeaf->LowerBound(startKey); - if (pos == guardedLeaf->mNumSeps) { + if (pos == guardedLeaf->mNumSlots) { guardedLeaf.JumpIfModifiedByOthers(); JUMPMU_RETURN true; } @@ -201,7 +201,7 @@ OpCode BasicKV::PrefixLookup(Slice key, PrefixLookupCallback callback) { JUMPMU_RETURN OpCode::kOK; } - if (cur < guardedLeaf->mNumSeps) { + if (cur < guardedLeaf->mNumSlots) { auto fullKeySize = guardedLeaf->GetFullKeyLen(cur); auto fullKeyBuf = utils::JumpScopedArray(fullKeySize); guardedLeaf->CopyFullKey(cur, fullKeyBuf->get()); @@ -368,7 +368,7 @@ OpCode BasicKV::RangeRemove(Slice startKey, Slice endKey, bool pageWise) { } auto ret = xIter.RemoveCurrent(); ENSURE(ret == OpCode::kOK); - if (xIter.mSlotId == xIter.mGuardedLeaf->mNumSeps) { + if (xIter.mSlotId == xIter.mGuardedLeaf->mNumSlots) { xIter.Next(); ret = xIter.Valid() ? OpCode::kOK : OpCode::kNotFound; } @@ -381,7 +381,7 @@ OpCode BasicKV::RangeRemove(Slice startKey, Slice endKey, bool pageWise) { bool didPurgeFullPage = false; xIter.SetEnterLeafCallback([&](GuardedBufferFrame& guardedLeaf) { - if (guardedLeaf->mNumSeps == 0) { + if (guardedLeaf->mNumSlots == 0) { return; } @@ -392,15 +392,15 @@ OpCode BasicKV::RangeRemove(Slice startKey, Slice endKey, bool pageWise) { Slice pageStartKey(firstKey->get(), firstKeySize); // page end key - auto lastKeySize = guardedLeaf->GetFullKeyLen(guardedLeaf->mNumSeps - 1); + auto lastKeySize = guardedLeaf->GetFullKeyLen(guardedLeaf->mNumSlots - 1); auto lastKey = utils::JumpScopedArray(lastKeySize); - guardedLeaf->CopyFullKey(guardedLeaf->mNumSeps - 1, lastKey->get()); + guardedLeaf->CopyFullKey(guardedLeaf->mNumSlots - 1, lastKey->get()); Slice pageEndKey(lastKey->get(), lastKeySize); if (pageStartKey >= startKey && pageEndKey <= endKey) { // Purge the whole page COUNTERS_BLOCK() { - WorkerCounters::MyCounters().dt_range_removed[mTreeId] += guardedLeaf->mNumSeps; + WorkerCounters::MyCounters().dt_range_removed[mTreeId] += guardedLeaf->mNumSlots; } guardedLeaf->Reset(); didPurgeFullPage = true; diff --git a/src/btree/TransactionKV.cpp b/src/btree/TransactionKV.cpp index 7cac13fb..8bff9c8c 100644 --- a/src/btree/TransactionKV.cpp +++ b/src/btree/TransactionKV.cpp @@ -681,7 +681,7 @@ SpaceCheckResult TransactionKV::CheckSpaceUtilization(BufferFrame& bf) { guardedNode.ToExclusiveMayJump(); guardedNode.SyncGSNBeforeWrite(); - for (uint16_t i = 0; i < guardedNode->mNumSeps; i++) { + for (uint16_t i = 0; i < guardedNode->mNumSlots; i++) { auto& tuple = *Tuple::From(guardedNode->ValData(i)); if (tuple.mFormat == TupleFormat::kFat) { auto& fatTuple = *FatTuple::From(guardedNode->ValData(i)); @@ -968,8 +968,7 @@ OpCode TransactionKV::scan4LongRunningTx(Slice key, ScanCallback callback) { iter.AssembleKey(); // Now it begins - graveyardUpperBound = - Slice(iter.mGuardedLeaf->GetUpperFenceKey(), iter.mGuardedLeaf->mUpperFence.mLength); + graveyardUpperBound = iter.mGuardedLeaf->GetUpperFence(); auto gRange = [&]() { gIter.Reset(); if (mGraveyard->IsRangeEmpty(graveyardLowerBound, graveyardUpperBound)) { @@ -1015,8 +1014,7 @@ OpCode TransactionKV::scan4LongRunningTx(Slice key, ScanCallback callback) { iter.mBuffer = std::move(newBuffer); } graveyardLowerBound = Slice(&iter.mBuffer[0], iter.mFenceSize + 1); - graveyardUpperBound = - Slice(iter.mGuardedLeaf->GetUpperFenceKey(), iter.mGuardedLeaf->mUpperFence.mLength); + graveyardUpperBound = iter.mGuardedLeaf->GetUpperFence(); gRange(); } return true; diff --git a/src/btree/Tuple.cpp b/src/btree/Tuple.cpp index 776c8d37..78a19193 100644 --- a/src/btree/Tuple.cpp +++ b/src/btree/Tuple.cpp @@ -325,9 +325,9 @@ void FatTuple::GarbageCollection() { // NOLINTEND bool FatTuple::HasSpaceFor(const UpdateDesc& updateDesc) { - const uint32_t SpaceNeeded = + const uint32_t spaceNeeded = updateDesc.SizeWithDelta() + sizeof(uint16_t) + sizeof(FatTupleDelta); - return (mDataOffset - (mValSize + (mNumDeltas * sizeof(uint16_t)))) >= SpaceNeeded; + return (mDataOffset - (mValSize + (mNumDeltas * sizeof(uint16_t)))) >= spaceNeeded; } template diff --git a/src/btree/core/BTreeGeneric.cpp b/src/btree/core/BTreeGeneric.cpp index 93c9d1f3..75b31177 100644 --- a/src/btree/core/BTreeGeneric.cpp +++ b/src/btree/core/BTreeGeneric.cpp @@ -66,19 +66,19 @@ void BTreeGeneric::TrySplitMayJump(BufferFrame& toSplit, int16_t favoredSplitPos mStore->mBufferManager.get(), std::move(parentHandler.mParentGuard), parentHandler.mParentBf); auto guardedChild = GuardedBufferFrame(mStore->mBufferManager.get(), guardedParent, parentHandler.mChildSwip); - if (guardedChild->mNumSeps <= 1) { + if (guardedChild->mNumSlots <= 1) { Log::Warn("Split failed, not enough separators in node, " "toSplit.mHeader.mPageId={}, favoredSplitPos={}, " - "guardedChild->mNumSeps={}", - toSplit.mHeader.mPageId, favoredSplitPos, guardedChild->mNumSeps); + "guardedChild->mNumSlots={}", + toSplit.mHeader.mPageId, favoredSplitPos, guardedChild->mNumSlots); return; } // init the separator info BTreeNode::SeparatorInfo sepInfo; - if (favoredSplitPos < 0 || favoredSplitPos >= guardedChild->mNumSeps - 1) { + if (favoredSplitPos < 0 || favoredSplitPos >= guardedChild->mNumSlots - 1) { if (mConfig.mUseBulkInsert) { - favoredSplitPos = guardedChild->mNumSeps - 2; + favoredSplitPos = guardedChild->mNumSlots - 2; sepInfo = BTreeNode::SeparatorInfo{guardedChild->GetFullKeyLen(favoredSplitPos), static_cast(favoredSplitPos), false}; } else { @@ -231,13 +231,13 @@ bool BTreeGeneric::TryMergeMayJump(BufferFrame& toMerge, bool swizzleSibling) { return false; } - if (guardedParent->mNumSeps <= 1) { + if (guardedParent->mNumSlots <= 1) { return false; } - LS_DCHECK(posInParent <= guardedParent->mNumSeps, + LS_DCHECK(posInParent <= guardedParent->mNumSlots, "Invalid position in parent, posInParent={}, childSizeOfParent={}", posInParent, - guardedParent->mNumSeps); + guardedParent->mNumSlots); guardedParent.JumpIfModifiedByOthers(); guardedChild.JumpIfModifiedByOthers(); @@ -274,7 +274,7 @@ bool BTreeGeneric::TryMergeMayJump(BufferFrame& toMerge, bool swizzleSibling) { return true; }; auto mergeAndReclaimRight = [&]() { - auto& rightSwip = ((posInParent + 1) == guardedParent->mNumSeps) + auto& rightSwip = ((posInParent + 1) == guardedParent->mNumSlots) ? guardedParent->mRightMostChildSwip : *guardedParent->ChildSwip(posInParent + 1); if (!swizzleSibling && rightSwip.IsEvicted()) { @@ -325,7 +325,7 @@ bool BTreeGeneric::TryMergeMayJump(BufferFrame& toMerge, bool swizzleSibling) { if (posInParent > 0) { succeed = mergeAndReclaimLeft(); } - if (!succeed && posInParent < guardedParent->mNumSeps) { + if (!succeed && posInParent < guardedParent->mNumSlots) { succeed = mergeAndReclaimRight(); } @@ -363,24 +363,24 @@ int16_t BTreeGeneric::mergeLeftIntoRight(ExclusiveGuardedBufferFrame& // Do a partial merge // Remove a key at a time from the merge and check if now it fits int16_t tillSlotId = -1; - for (int16_t i = 0; i < xGuardedLeft->mNumSeps; i++) { + for (int16_t i = 0; i < xGuardedLeft->mNumSlots; i++) { spaceUpperBound -= - sizeof(BTreeNode::Slot) + xGuardedLeft->KeySizeWithoutPrefix(i) + xGuardedLeft->ValSize(i); - if (spaceUpperBound + (xGuardedLeft->GetFullKeyLen(i) - xGuardedRight->mLowerFence.mLength) < + sizeof(BTreeNodeSlot) + xGuardedLeft->KeySizeWithoutPrefix(i) + xGuardedLeft->ValSize(i); + if (spaceUpperBound + (xGuardedLeft->GetFullKeyLen(i) - xGuardedRight->mLowerFence.mSize) < BTreeNode::Size() * 1.0) { tillSlotId = i + 1; break; } } - if (!(tillSlotId != -1 && tillSlotId < (xGuardedLeft->mNumSeps - 1))) { + if (!(tillSlotId != -1 && tillSlotId < (xGuardedLeft->mNumSlots - 1))) { return 0; // false } assert((spaceUpperBound + (xGuardedLeft->GetFullKeyLen(tillSlotId - 1) - - xGuardedRight->mLowerFence.mLength)) < BTreeNode::Size() * 1.0); + xGuardedRight->mLowerFence.mSize)) < BTreeNode::Size() * 1.0); assert(tillSlotId > 0); - uint16_t copyFromCount = xGuardedLeft->mNumSeps - tillSlotId; + uint16_t copyFromCount = xGuardedLeft->mNumSlots - tillSlotId; uint16_t newLeftUpperFenceSize = xGuardedLeft->GetFullKeyLen(tillSlotId - 1); ENSURE(newLeftUpperFenceSize > 0); @@ -394,35 +394,34 @@ int16_t BTreeGeneric::mergeLeftIntoRight(ExclusiveGuardedBufferFrame& auto nodeBuf = utils::JumpScopedArray(BTreeNode::Size()); { - auto* tmp = BTreeNode::Init(nodeBuf->get(), true); - - tmp->SetFences(Slice(newLeftUpperFence, newLeftUpperFenceSize), xGuardedRight->GetUpperFence()); + Slice newLowerFence{newLeftUpperFence, newLeftUpperFenceSize}; + Slice newUpperFence{xGuardedRight->GetUpperFence()}; + auto* tmp = BTreeNode::New(nodeBuf->get(), true, newLowerFence, newUpperFence); xGuardedLeft->CopyKeyValueRange(tmp, 0, tillSlotId, copyFromCount); - xGuardedRight->CopyKeyValueRange(tmp, copyFromCount, 0, xGuardedRight->mNumSeps); + xGuardedRight->CopyKeyValueRange(tmp, copyFromCount, 0, xGuardedRight->mNumSlots); memcpy(xGuardedRight.GetPagePayloadPtr(), tmp, BTreeNode::Size()); xGuardedRight->MakeHint(); // Nothing to do for the right node's separator - assert(xGuardedRight->CompareKeyWithBoundaries( - Slice(newLeftUpperFence, newLeftUpperFenceSize)) == 1); + assert(xGuardedRight->CompareKeyWithBoundaries(newLowerFence) == 1); } + { - auto* tmp = BTreeNode::Init(nodeBuf->get(), true); + Slice newLowerFence{xGuardedLeft->GetLowerFence()}; + Slice newUpperFence{newLeftUpperFence, newLeftUpperFenceSize}; + auto* tmp = BTreeNode::New(nodeBuf->get(), true, newLowerFence, newUpperFence); - tmp->SetFences(xGuardedLeft->GetLowerFence(), Slice(newLeftUpperFence, newLeftUpperFenceSize)); - // ------------------------------------------------------------------------------------- - xGuardedLeft->CopyKeyValueRange(tmp, 0, 0, xGuardedLeft->mNumSeps - copyFromCount); + xGuardedLeft->CopyKeyValueRange(tmp, 0, 0, xGuardedLeft->mNumSlots - copyFromCount); memcpy(xGuardedLeft.GetPagePayloadPtr(), tmp, BTreeNode::Size()); xGuardedLeft->MakeHint(); - // ------------------------------------------------------------------------------------- - assert(xGuardedLeft->CompareKeyWithBoundaries( - Slice(newLeftUpperFence, newLeftUpperFenceSize)) == 0); - // ------------------------------------------------------------------------------------- + + assert(xGuardedLeft->CompareKeyWithBoundaries(newUpperFence) == 0); + xGuardedParent->RemoveSlot(lhsSlotId); - ENSURE(xGuardedParent->PrepareInsert(xGuardedLeft->mUpperFence.mLength, sizeof(Swip))); + ENSURE(xGuardedParent->PrepareInsert(xGuardedLeft->mUpperFence.mSize, sizeof(Swip))); auto swip = xGuardedLeft.swip(); - Slice key(xGuardedLeft->GetUpperFenceKey(), xGuardedLeft->mUpperFence.mLength); + Slice key = xGuardedLeft->GetUpperFence(); Slice val(reinterpret_cast(&swip), sizeof(Swip)); xGuardedParent->Insert(key, val); } @@ -450,13 +449,13 @@ BTreeGeneric::XMergeReturnCode BTreeGeneric::XMerge(GuardedBufferFrameFillFactorAfterCompaction(); - // Handle upper swip instead of avoiding guardedParent->mNumSeps -1 swip + // Handle upper swip instead of avoiding guardedParent->mNumSlots -1 swip if (isMetaNode(guardedParent) || !guardedNodes[0]->mIsLeaf) { guardedChild = std::move(guardedNodes[0]); return XMergeReturnCode::kNothing; } for (maxRight = pos + 1; - (maxRight - pos) < maxMergePages && (maxRight + 1) < guardedParent->mNumSeps; maxRight++) { + (maxRight - pos) < maxMergePages && (maxRight + 1) < guardedParent->mNumSlots; maxRight++) { if (!guardedParent->ChildSwip(maxRight)->IsHot()) { guardedChild = std::move(guardedNodes[0]); return XMergeReturnCode::kNothing; @@ -549,7 +548,7 @@ int64_t BTreeGeneric::iterateAllPagesRecursive(GuardedBufferFrame& gu return leaf(guardedNode.ref()); } int64_t res = inner(guardedNode.ref()); - for (uint16_t i = 0; i < guardedNode->mNumSeps; i++) { + for (uint16_t i = 0; i < guardedNode->mNumSlots; i++) { auto* childSwip = guardedNode->ChildSwip(i); auto guardedChild = GuardedBufferFrame(mStore->mBufferManager.get(), guardedNode, *childSwip); @@ -572,7 +571,7 @@ uint64_t BTreeGeneric::GetHeight() { uint64_t BTreeGeneric::CountEntries() { return iterateAllPages([](BTreeNode&) { return 0; }, - [](BTreeNode& node) { return node.mNumSeps; }); + [](BTreeNode& node) { return node.mNumSlots; }); } uint64_t BTreeGeneric::CountAllPages() { @@ -595,7 +594,7 @@ void BTreeGeneric::PrintInfo(uint64_t totalSize) { uint64_t numAllPages = CountAllPages(); std::cout << "nodes:" << numAllPages << ", innerNodes:" << CountInnerPages() << ", space:" << (numAllPages * BTreeNode::Size()) / (float)totalSize - << ", height:" << mHeight << ", rootCnt:" << guardedRoot->mNumSeps + << ", height:" << mHeight << ", rootCnt:" << guardedRoot->mNumSlots << ", freeSpaceAfterCompaction:" << FreeSpaceAfterCompaction() << std::endl; } @@ -677,7 +676,7 @@ void BTreeGeneric::Deserialize(StringMap map) { // } // // rapidjson::Value childrenJson(rapidjson::kArrayType); -// for (auto i = 0u; i < guardedNode->mNumSeps; ++i) { +// for (auto i = 0u; i < guardedNode->mNumSlots; ++i) { // auto* childSwip = guardedNode->ChildSwip(i); // GuardedBufferFrame guardedChild(btree.mStore->mBufferManager.get(), guardedNode, // *childSwip); diff --git a/src/btree/core/BTreeNode.cpp b/src/btree/core/BTreeNode.cpp index df84bd62..843ea7c0 100644 --- a/src/btree/core/BTreeNode.cpp +++ b/src/btree/core/BTreeNode.cpp @@ -1,17 +1,21 @@ #include "leanstore/btree/core/BTreeNode.hpp" +#include "leanstore/Exceptions.hpp" +#include "leanstore/Slice.hpp" #include "leanstore/buffer-manager/GuardedBufferFrame.hpp" #include "leanstore/profiling/counters/WorkerCounters.hpp" +#include "leanstore/utils/Defer.hpp" #include "leanstore/utils/Log.hpp" #include +#include namespace leanstore::storage::btree { void BTreeNode::UpdateHint(uint16_t slotId) { - uint16_t dist = mNumSeps / (sHintCount + 1); + uint16_t dist = mNumSlots / (sHintCount + 1); uint16_t begin = 0; - if ((mNumSeps > sHintCount * 2 + 1) && (((mNumSeps - 1) / (sHintCount + 1)) == dist) && + if ((mNumSlots > sHintCount * 2 + 1) && (((mNumSlots - 1) / (sHintCount + 1)) == dist) && ((slotId / dist) > 1)) begin = (slotId / dist) - 1; for (uint16_t i = begin; i < sHintCount; i++) @@ -21,10 +25,10 @@ void BTreeNode::UpdateHint(uint16_t slotId) { } void BTreeNode::SearchHint(HeadType keyHead, uint16_t& lowerOut, uint16_t& upperOut) { - if (mNumSeps > sHintCount * 2) { + if (mNumSlots > sHintCount * 2) { if (utils::tlsStore->mStoreOption->mBTreeHints == 2) { #ifdef __AVX512F__ - const uint16_t dist = mNumSeps / (sHintCount + 1); + const uint16_t dist = mNumSlots / (sHintCount + 1); uint16_t pos, pos2; __m512i key_head_reg = _mm512_set1_epi32(keyHead); __m512i chunk = _mm512_loadu_si512(hint); @@ -46,7 +50,7 @@ void BTreeNode::SearchHint(HeadType keyHead, uint16_t& lowerOut, uint16_t& upper Log::Error("Search hint with AVX512 failed: __AVX512F__ not found"); #endif } else if (utils::tlsStore->mStoreOption->mBTreeHints == 1) { - const uint16_t dist = mNumSeps / (sHintCount + 1); + const uint16_t dist = mNumSlots / (sHintCount + 1); uint16_t pos, pos2; for (pos = 0; pos < sHintCount; pos++) { @@ -81,22 +85,25 @@ int16_t BTreeNode::InsertDoNotCopyPayload(Slice key, uint16_t valSize, int32_t p LS_DCHECK(CanInsert(key.size(), valSize)); PrepareInsert(key.size(), valSize); + // calculate taret slotId for insertion int32_t slotId = (pos == -1) ? LowerBound(key) : pos; - memmove(mSlot + slotId + 1, mSlot + slotId, sizeof(Slot) * (mNumSeps - slotId)); - // StoreKeyValue + // 1. move mSlot[slotId..mNumSlots] to mSlot[slotId+1..mNumSlots+1] + memmove(mSlot + slotId + 1, mSlot + slotId, sizeof(BTreeNodeSlot) * (mNumSlots - slotId)); + + // remove common key prefix key.remove_prefix(mPrefixSize); + // mSlot[slotId].mHead = Head(key); mSlot[slotId].mKeySizeWithoutPrefix = key.size(); mSlot[slotId].mValSize = valSize; - const uint16_t space = key.size() + valSize; - mDataOffset -= space; - mSpaceUsed += space; + auto totalKeyValSize = key.size() + valSize; + advanceDataOffset(totalKeyValSize); mSlot[slotId].mOffset = mDataOffset; memcpy(KeyDataWithoutPrefix(slotId), key.data(), key.size()); - mNumSeps++; + mNumSlots++; UpdateHint(slotId); return slotId; } @@ -111,9 +118,9 @@ int32_t BTreeNode::Insert(Slice key, Slice val) { PrepareInsert(key.size(), val.size()); int32_t slotId = LowerBound(key); - memmove(mSlot + slotId + 1, mSlot + slotId, sizeof(Slot) * (mNumSeps - slotId)); + memmove(mSlot + slotId + 1, mSlot + slotId, sizeof(BTreeNodeSlot) * (mNumSlots - slotId)); StoreKeyValue(slotId, key, val); - mNumSeps++; + mNumSlots++; UpdateHint(slotId); return slotId; @@ -126,32 +133,39 @@ int32_t BTreeNode::Insert(Slice key, Slice val) { } void BTreeNode::Compactify() { - uint16_t should = FreeSpaceAfterCompaction(); - static_cast(should); + uint16_t spaceAfterCompaction [[maybe_unused]] = 0; + DEBUG_BLOCK() { + spaceAfterCompaction = FreeSpaceAfterCompaction(); + } + SCOPED_DEFER(DEBUG_BLOCK() { LS_DCHECK(spaceAfterCompaction == FreeSpace()); }); + // generate a temp node to store the compacted data auto tmpNodeBuf = utils::JumpScopedArray(BTreeNode::Size()); - auto* tmp = BTreeNode::Init(tmpNodeBuf->get(), mIsLeaf); + auto* tmp = BTreeNode::New(tmpNodeBuf->get(), mIsLeaf, GetLowerFence(), GetUpperFence()); + + // copy the keys and values + CopyKeyValueRange(tmp, 0, 0, mNumSlots); - tmp->SetFences(GetLowerFence(), GetUpperFence()); - CopyKeyValueRange(tmp, 0, 0, mNumSeps); + // copy the right most child tmp->mRightMostChildSwip = mRightMostChildSwip; + + // copy back memcpy(reinterpret_cast(this), tmp, BTreeNode::Size()); MakeHint(); - assert(FreeSpace() == should); } uint32_t BTreeNode::MergeSpaceUpperBound(ExclusiveGuardedBufferFrame& xGuardedRight) { LS_DCHECK(xGuardedRight->mIsLeaf); auto tmpNodeBuf = utils::JumpScopedArray(BTreeNode::Size()); - auto* tmp = BTreeNode::Init(tmpNodeBuf->get(), true); + auto* tmp = + BTreeNode::New(tmpNodeBuf->get(), true, GetLowerFence(), xGuardedRight->GetUpperFence()); - tmp->SetFences(GetLowerFence(), xGuardedRight->GetUpperFence()); - uint32_t leftGrow = (mPrefixSize - tmp->mPrefixSize) * mNumSeps; - uint32_t rightGrow = (xGuardedRight->mPrefixSize - tmp->mPrefixSize) * xGuardedRight->mNumSeps; + uint32_t leftGrow = (mPrefixSize - tmp->mPrefixSize) * mNumSlots; + uint32_t rightGrow = (xGuardedRight->mPrefixSize - tmp->mPrefixSize) * xGuardedRight->mNumSlots; uint32_t spaceUpperBound = mSpaceUsed + xGuardedRight->mSpaceUsed + - (reinterpret_cast(mSlot + mNumSeps + xGuardedRight->mNumSeps) - RawPtr()) + + (reinterpret_cast(mSlot + mNumSlots + xGuardedRight->mNumSlots) - NodeBegin()) + leftGrow + rightGrow; return spaceUpperBound; } @@ -164,20 +178,19 @@ bool BTreeNode::merge(uint16_t slotId, ExclusiveGuardedBufferFrame& x assert(xGuardedParent->IsInner()); auto tmpNodeBuf = utils::JumpScopedArray(BTreeNode::Size()); - auto* tmp = BTreeNode::Init(tmpNodeBuf->get(), true); - - tmp->SetFences(GetLowerFence(), xGuardedRight->GetUpperFence()); - uint16_t leftGrow = (mPrefixSize - tmp->mPrefixSize) * mNumSeps; - uint16_t rightGrow = (xGuardedRight->mPrefixSize - tmp->mPrefixSize) * xGuardedRight->mNumSeps; + auto* tmp = + BTreeNode::New(tmpNodeBuf->get(), true, GetLowerFence(), xGuardedRight->GetUpperFence()); + uint16_t leftGrow = (mPrefixSize - tmp->mPrefixSize) * mNumSlots; + uint16_t rightGrow = (xGuardedRight->mPrefixSize - tmp->mPrefixSize) * xGuardedRight->mNumSlots; uint16_t spaceUpperBound = mSpaceUsed + xGuardedRight->mSpaceUsed + - (reinterpret_cast(mSlot + mNumSeps + xGuardedRight->mNumSeps) - RawPtr()) + + (reinterpret_cast(mSlot + mNumSlots + xGuardedRight->mNumSlots) - NodeBegin()) + leftGrow + rightGrow; if (spaceUpperBound > BTreeNode::Size()) { return false; } - CopyKeyValueRange(tmp, 0, 0, mNumSeps); - xGuardedRight->CopyKeyValueRange(tmp, mNumSeps, 0, xGuardedRight->mNumSeps); + CopyKeyValueRange(tmp, 0, 0, mNumSlots); + xGuardedRight->CopyKeyValueRange(tmp, mNumSlots, 0, xGuardedRight->mNumSlots); xGuardedParent->RemoveSlot(slotId); xGuardedRight->mHasGarbage |= mHasGarbage; @@ -192,26 +205,25 @@ bool BTreeNode::merge(uint16_t slotId, ExclusiveGuardedBufferFrame& x LS_DCHECK(xGuardedParent->IsInner()); auto tmpNodeBuf = utils::JumpScopedArray(BTreeNode::Size()); - auto* tmp = BTreeNode::Init(tmpNodeBuf->get(), mIsLeaf); - - tmp->SetFences(GetLowerFence(), xGuardedRight->GetUpperFence()); - uint16_t leftGrow = (mPrefixSize - tmp->mPrefixSize) * mNumSeps; - uint16_t rightGrow = (xGuardedRight->mPrefixSize - tmp->mPrefixSize) * xGuardedRight->mNumSeps; + auto* tmp = + BTreeNode::New(tmpNodeBuf->get(), mIsLeaf, GetLowerFence(), xGuardedRight->GetUpperFence()); + uint16_t leftGrow = (mPrefixSize - tmp->mPrefixSize) * mNumSlots; + uint16_t rightGrow = (xGuardedRight->mPrefixSize - tmp->mPrefixSize) * xGuardedRight->mNumSlots; uint16_t extraKeyLength = xGuardedParent->GetFullKeyLen(slotId); uint16_t spaceUpperBound = mSpaceUsed + xGuardedRight->mSpaceUsed + - (reinterpret_cast(mSlot + mNumSeps + xGuardedRight->mNumSeps) - RawPtr()) + + (reinterpret_cast(mSlot + mNumSlots + xGuardedRight->mNumSlots) - NodeBegin()) + leftGrow + rightGrow + SpaceNeeded(extraKeyLength, sizeof(Swip), tmp->mPrefixSize); if (spaceUpperBound > BTreeNode::Size()) return false; - CopyKeyValueRange(tmp, 0, 0, mNumSeps); + CopyKeyValueRange(tmp, 0, 0, mNumSlots); // Allocate in the stack, freed when the calling function exits. auto extraKey = utils::JumpScopedArray(extraKeyLength); xGuardedParent->CopyFullKey(slotId, extraKey->get()); - tmp->StoreKeyValue(mNumSeps, Slice(extraKey->get(), extraKeyLength), + tmp->StoreKeyValue(mNumSlots, Slice(extraKey->get(), extraKeyLength), Slice(reinterpret_cast(&mRightMostChildSwip), sizeof(Swip))); - tmp->mNumSeps++; - xGuardedRight->CopyKeyValueRange(tmp, tmp->mNumSeps, 0, xGuardedRight->mNumSeps); + tmp->mNumSlots++; + xGuardedRight->CopyKeyValueRange(tmp, tmp->mNumSlots, 0, xGuardedRight->mNumSlots); xGuardedParent->RemoveSlot(slotId); tmp->mRightMostChildSwip = xGuardedRight->mRightMostChildSwip; tmp->MakeHint(); @@ -227,47 +239,32 @@ void BTreeNode::StoreKeyValue(uint16_t slotId, Slice key, Slice val) { mSlot[slotId].mValSize = val.size(); // Value - const uint16_t space = key.size() + val.size(); - mDataOffset -= space; - mSpaceUsed += space; + advanceDataOffset(key.size() + val.size()); mSlot[slotId].mOffset = mDataOffset; memcpy(KeyDataWithoutPrefix(slotId), key.data(), key.size()); memcpy(ValData(slotId), val.data(), val.size()); - assert(RawPtr() + mDataOffset >= reinterpret_cast(mSlot + mNumSeps)); } -// ATTENTION: dstSlot then srcSlot !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! void BTreeNode::CopyKeyValueRange(BTreeNode* dst, uint16_t dstSlot, uint16_t srcSlot, uint16_t count) { if (mPrefixSize == dst->mPrefixSize) { - // Fast path - memcpy(dst->mSlot + dstSlot, mSlot + srcSlot, sizeof(Slot) * count); - DEBUG_BLOCK() { - uint32_t totalSpaceUsed [[maybe_unused]] = mUpperFence.mLength + mLowerFence.mLength; - for (uint16_t i = 0; i < this->mNumSeps; i++) { - totalSpaceUsed += KeySizeWithoutPrefix(i) + ValSize(i); - } - assert(totalSpaceUsed == this->mSpaceUsed); - } - for (uint16_t i = 0; i < count; i++) { + // copy slot array + memcpy(dst->mSlot + dstSlot, mSlot + srcSlot, sizeof(BTreeNodeSlot) * count); + + for (auto i = 0u; i < count; i++) { + // consolidate the offset of each slot uint32_t kvSize = KeySizeWithoutPrefix(srcSlot + i) + ValSize(srcSlot + i); - dst->mDataOffset -= kvSize; - dst->mSpaceUsed += kvSize; + dst->advanceDataOffset(kvSize); dst->mSlot[dstSlot + i].mOffset = dst->mDataOffset; - DEBUG_BLOCK() { - [[maybe_unused]] int64_t offBy = reinterpret_cast(dst->mSlot + dstSlot + count) - - (dst->RawPtr() + dst->mDataOffset); - assert(offBy <= 0); - } - memcpy(dst->RawPtr() + dst->mDataOffset, RawPtr() + mSlot[srcSlot + i].mOffset, kvSize); + + // copy the key value pair + memcpy(dst->NodeBegin() + dst->mDataOffset, NodeBegin() + mSlot[srcSlot + i].mOffset, kvSize); } } else { for (uint16_t i = 0; i < count; i++) CopyKeyValue(srcSlot + i, dst, dstSlot + i); } - dst->mNumSeps += count; - assert((dst->RawPtr() + dst->mDataOffset) >= - reinterpret_cast(dst->mSlot + dst->mNumSeps)); + dst->mNumSlots += count; } void BTreeNode::CopyKeyValue(uint16_t srcSlot, BTreeNode* dst, uint16_t dstSlot) { @@ -284,36 +281,22 @@ void BTreeNode::InsertFence(BTreeNodeHeader::FenceKey& fk, Slice key) { } assert(FreeSpace() >= key.size()); - mDataOffset -= key.size(); - mSpaceUsed += key.size(); + advanceDataOffset(key.size()); fk.mOffset = mDataOffset; - fk.mLength = key.size(); - memcpy(RawPtr() + mDataOffset, key.data(), key.size()); -} - -void BTreeNode::SetFences(Slice lowerKey, Slice upperKey) { - InsertFence(mLowerFence, lowerKey); - InsertFence(mUpperFence, upperKey); - LS_DCHECK(GetLowerFenceKey() == nullptr || GetUpperFenceKey() == nullptr || - *GetLowerFenceKey() <= *GetUpperFenceKey()); - - // prefix compression - for (mPrefixSize = 0; (mPrefixSize < std::min(lowerKey.size(), upperKey.size())) && - (lowerKey[mPrefixSize] == upperKey[mPrefixSize]); - mPrefixSize++) - ; + fk.mSize = key.size(); + memcpy(NodeBegin() + mDataOffset, key.data(), key.size()); } uint16_t BTreeNode::CommonPrefix(uint16_t slotA, uint16_t slotB) { - if (mNumSeps == 0) { + if (mNumSlots == 0) { // Do not prefix compress if only one tuple is in to // avoid corner cases (e.g., SI Version) return 0; } // TODO: the following two checks work only in single threaded - // assert(aPos < mNumSeps); - // assert(bPos < mNumSeps); + // assert(aPos < mNumSlots); + // assert(bPos < mNumSlots); uint32_t limit = std::min(mSlot[slotA].mKeySizeWithoutPrefix, mSlot[slotB].mKeySizeWithoutPrefix); uint8_t *a = KeyDataWithoutPrefix(slotA), *b = KeyDataWithoutPrefix(slotB); uint32_t i; @@ -324,19 +307,19 @@ uint16_t BTreeNode::CommonPrefix(uint16_t slotA, uint16_t slotB) { } BTreeNode::SeparatorInfo BTreeNode::FindSep() { - LS_DCHECK(mNumSeps > 1); + LS_DCHECK(mNumSlots > 1); // Inner nodes are split in the middle if (IsInner()) { - uint16_t slotId = mNumSeps / 2; + uint16_t slotId = mNumSlots / 2; return SeparatorInfo{GetFullKeyLen(slotId), slotId, false}; } // Find good separator slot uint16_t bestPrefixLength, bestSlot; - if (mNumSeps > 16) { - uint16_t lower = (mNumSeps / 2) - (mNumSeps / 16); - uint16_t upper = (mNumSeps / 2); + if (mNumSlots > 16) { + uint16_t lower = (mNumSlots / 2) - (mNumSlots / 16); + uint16_t upper = (mNumSlots / 2); bestPrefixLength = CommonPrefix(lower, 0); bestSlot = lower; @@ -346,13 +329,13 @@ BTreeNode::SeparatorInfo BTreeNode::FindSep() { (bestSlot < upper) && (CommonPrefix(bestSlot, 0) == bestPrefixLength); bestSlot++) ; } else { - bestSlot = (mNumSeps - 1) / 2; + bestSlot = (mNumSlots - 1) / 2; // bestPrefixLength = CommonPrefix(bestSlot, 0); } // Try to truncate separator uint16_t common = CommonPrefix(bestSlot, bestSlot + 1); - if ((bestSlot + 1 < mNumSeps) && (mSlot[bestSlot].mKeySizeWithoutPrefix > common) && + if ((bestSlot + 1 < mNumSlots) && (mSlot[bestSlot].mKeySizeWithoutPrefix > common) && (mSlot[bestSlot + 1].mKeySizeWithoutPrefix > (common + 1))) return SeparatorInfo{static_cast(mPrefixSize + common + 1), bestSlot, true}; @@ -377,7 +360,7 @@ int32_t BTreeNode::CompareKeyWithBoundaries(Slice key) { Swip& BTreeNode::LookupInner(Slice key) { int32_t slotId = LowerBound(key); - if (slotId == mNumSeps) { + if (slotId == mNumSlots) { LS_DCHECK(!mRightMostChildSwip.IsEmpty()); return mRightMostChildSwip; } @@ -386,10 +369,10 @@ Swip& BTreeNode::LookupInner(Slice key) { return *childSwip; } -//! This = right -//! PRE: current, xGuardedParent and xGuardedLeft are x locked -//! assert(sepSlot > 0); -//! TODO: really ? +//! xGuardedParent xGuardedParent +//! | | | +//! this xGuardedNewLeft this +//! void BTreeNode::Split(ExclusiveGuardedBufferFrame& xGuardedParent, ExclusiveGuardedBufferFrame& xGuardedNewLeft, const BTreeNode::SeparatorInfo& sepInfo) { @@ -398,26 +381,31 @@ void BTreeNode::Split(ExclusiveGuardedBufferFrame& xGuardedParent, // generate separator key uint8_t sepKey[sepInfo.mSize]; generateSeparator(sepInfo, sepKey); + Slice seperator{sepKey, sepInfo.mSize}; - xGuardedNewLeft->SetFences(GetLowerFence(), Slice(sepKey, sepInfo.mSize)); + xGuardedNewLeft->setFences(GetLowerFence(), seperator); uint8_t tmpRightBuf[BTreeNode::Size()]; - auto* tmpRight = BTreeNode::Init(tmpRightBuf, mIsLeaf); + auto* tmpRight = BTreeNode::New(tmpRightBuf, mIsLeaf, seperator, GetUpperFence()); - tmpRight->SetFences(Slice(sepKey, sepInfo.mSize), GetUpperFence()); + // insert (seperator, xGuardedNewLeft) into xGuardedParent auto swip = xGuardedNewLeft.swip(); - xGuardedParent->Insert(Slice(sepKey, sepInfo.mSize), - Slice(reinterpret_cast(&swip), sizeof(Swip))); + xGuardedParent->Insert(seperator, Slice(reinterpret_cast(&swip), sizeof(Swip))); + if (mIsLeaf) { + // move slot 0..sepInfo.mSlotId to xGuardedNewLeft CopyKeyValueRange(xGuardedNewLeft.GetPagePayload(), 0, 0, sepInfo.mSlotId + 1); - CopyKeyValueRange(tmpRight, 0, xGuardedNewLeft->mNumSeps, mNumSeps - xGuardedNewLeft->mNumSeps); + + // move slot sepInfo.mSlotId+1..mNumSlots to tmpRight + CopyKeyValueRange(tmpRight, 0, xGuardedNewLeft->mNumSlots, + mNumSlots - xGuardedNewLeft->mNumSlots); tmpRight->mHasGarbage = mHasGarbage; xGuardedNewLeft->mHasGarbage = mHasGarbage; } else { CopyKeyValueRange(xGuardedNewLeft.GetPagePayload(), 0, 0, sepInfo.mSlotId); - CopyKeyValueRange(tmpRight, 0, xGuardedNewLeft->mNumSeps + 1, - mNumSeps - xGuardedNewLeft->mNumSeps - 1); - xGuardedNewLeft->mRightMostChildSwip = *ChildSwip(xGuardedNewLeft->mNumSeps); + CopyKeyValueRange(tmpRight, 0, xGuardedNewLeft->mNumSlots + 1, + mNumSlots - xGuardedNewLeft->mNumSlots - 1); + xGuardedNewLeft->mRightMostChildSwip = *ChildSwip(xGuardedNewLeft->mNumSlots); tmpRight->mRightMostChildSwip = mRightMostChildSwip; } xGuardedNewLeft->MakeHint(); @@ -427,8 +415,8 @@ void BTreeNode::Split(ExclusiveGuardedBufferFrame& xGuardedParent, bool BTreeNode::RemoveSlot(uint16_t slotId) { mSpaceUsed -= KeySizeWithoutPrefix(slotId) + ValSize(slotId); - memmove(mSlot + slotId, mSlot + slotId + 1, sizeof(Slot) * (mNumSeps - slotId - 1)); - mNumSeps--; + memmove(mSlot + slotId, mSlot + slotId + 1, sizeof(BTreeNodeSlot) * (mNumSlots - slotId - 1)); + mNumSlots--; MakeHint(); return true; } @@ -443,9 +431,9 @@ bool BTreeNode::Remove(Slice key) { } void BTreeNode::Reset() { - mSpaceUsed = mUpperFence.mLength + mLowerFence.mLength; + mSpaceUsed = mUpperFence.mSize + mLowerFence.mSize; mDataOffset = BTreeNode::Size() - mSpaceUsed; - mNumSeps = 0; + mNumSlots = 0; } int32_t BTreeNode::CmpKeys(Slice lhs, Slice rhs) { diff --git a/src/concurrency/HistoryStorage.cpp b/src/concurrency/HistoryStorage.cpp index cd9fe8c7..8ab42511 100644 --- a/src/concurrency/HistoryStorage.cpp +++ b/src/concurrency/HistoryStorage.cpp @@ -210,10 +210,10 @@ void HistoryStorage::PurgeVersions(TXID fromTxId, TXID toTxId, // lock successfull, check whether the page can be purged auto* leafNode = reinterpret_cast(bf->mPage.mPayload); - if (leafNode->mLowerFence.mLength == 0 && leafNode->mNumSeps > 0) { - auto lastKeySize = leafNode->GetFullKeyLen(leafNode->mNumSeps - 1); + if (leafNode->mLowerFence.IsInfinity() && leafNode->mNumSlots > 0) { + auto lastKeySize = leafNode->GetFullKeyLen(leafNode->mNumSlots - 1); uint8_t lastKey[lastKeySize]; - leafNode->CopyFullKey(leafNode->mNumSeps - 1, lastKey); + leafNode->CopyFullKey(leafNode->mNumSlots - 1, lastKey); // optimistic unlock, jump if invalid bfGuard.Unlock(); @@ -251,7 +251,7 @@ void HistoryStorage::PurgeVersions(TXID fromTxId, TXID toTxId, // check whether the whole page can be purged when enter a leaf xIter.SetEnterLeafCallback( [&](leanstore::storage::GuardedBufferFrame& guardedLeaf) { - if (guardedLeaf->mNumSeps == 0) { + if (guardedLeaf->mNumSlots == 0) { return; } @@ -263,15 +263,15 @@ void HistoryStorage::PurgeVersions(TXID fromTxId, TXID toTxId, utils::Unfold(firstKey, txIdInFirstKey); // get the transaction id in the last key - auto lastKeySize = guardedLeaf->GetFullKeyLen(guardedLeaf->mNumSeps - 1); + auto lastKeySize = guardedLeaf->GetFullKeyLen(guardedLeaf->mNumSlots - 1); uint8_t lastKey[lastKeySize]; - guardedLeaf->CopyFullKey(guardedLeaf->mNumSeps - 1, lastKey); + guardedLeaf->CopyFullKey(guardedLeaf->mNumSlots - 1, lastKey); TXID txIdInLastKey; utils::Unfold(lastKey, txIdInLastKey); // purge the whole page if it is in the range if (fromTxId <= txIdInFirstKey && txIdInLastKey <= toTxId) { - versionsRemoved += guardedLeaf->mNumSeps; + versionsRemoved += guardedLeaf->mNumSlots; guardedLeaf->Reset(); isFullPagePurged = true; } diff --git a/src/utils/ToJson.hpp b/src/utils/ToJson.hpp index c4c49923..65da0dbd 100644 --- a/src/utils/ToJson.hpp +++ b/src/utils/ToJson.hpp @@ -248,7 +248,7 @@ inline void ToJson(leanstore::storage::btree::BTreeNode* obj, rapidjson::Value* AddMemberToJson(doc, *allocator, "mUpperFence", upperFence); } - AddMemberToJson(doc, *allocator, "mNumSeps", obj->mNumSeps); + AddMemberToJson(doc, *allocator, "mNumSlots", obj->mNumSlots); AddMemberToJson(doc, *allocator, "mIsLeaf", obj->mIsLeaf); AddMemberToJson(doc, *allocator, "mSpaceUsed", obj->mSpaceUsed); AddMemberToJson(doc, *allocator, "mDataOffset", obj->mDataOffset); @@ -270,7 +270,7 @@ inline void ToJson(leanstore::storage::btree::BTreeNode* obj, rapidjson::Value* // slots { rapidjson::Value memberArray(rapidjson::kArrayType); - for (auto i = 0; i < obj->mNumSeps; ++i) { + for (auto i = 0; i < obj->mNumSlots; ++i) { rapidjson::Value arrayElement(rapidjson::kObjectType); AddMemberToJson(&arrayElement, *allocator, "mOffset", static_cast(obj->mSlot[i].mOffset));