diff --git a/engines/ep/src/paging_visitor.cc b/engines/ep/src/paging_visitor.cc index 615272bdeb..89103c87e8 100644 --- a/engines/ep/src/paging_visitor.cc +++ b/engines/ep/src/paging_visitor.cc @@ -182,10 +182,11 @@ void PagingVisitor::visitBucket(const VBucketPtr& vb) { // fast path for expiry item pager if (owner == EXPIRY_PAGER) { if (vBucketFilter(vb->getId())) { - currentBucket = vb; + currentBucket = vb.get(); // EvictionPolicy is not required when running expiry item // pager vb->ht.visit(*this); + currentBucket = nullptr; } return; } @@ -204,8 +205,7 @@ void PagingVisitor::visitBucket(const VBucketPtr& vb) { return; } - currentBucket = vb; - maxCas = currentBucket->getMaxCas(); + maxCas = vb->getMaxCas(); itemEviction.reset(); freqCounterThreshold = 0; @@ -220,7 +220,9 @@ void PagingVisitor::visitBucket(const VBucketPtr& vb) { : ItemEviction::learningPopulation; itemEviction.setUpdateInterval(interval); + currentBucket = vb.get(); vb->ht.visit(*this); + currentBucket = nullptr; /** * Note: We are not taking a reader lock on the vbucket state. * Therefore it is possible that the stats could be slightly @@ -228,9 +230,8 @@ void PagingVisitor::visitBucket(const VBucketPtr& vb) { * to incur any performance cost associated with taking the * lock. */ - const bool isActiveOrPending = - ((currentBucket->getState() == vbucket_state_active) || - (currentBucket->getState() == vbucket_state_pending)); + const bool isActiveOrPending = ((vb->getState() == vbucket_state_active) || + (vb->getState() == vbucket_state_pending)); // Take a snapshot of the latest frequency histogram if (isActiveOrPending) { diff --git a/engines/ep/src/paging_visitor.h b/engines/ep/src/paging_visitor.h index ecef20f494..3e1a2054c1 100644 --- a/engines/ep/src/paging_visitor.h +++ b/engines/ep/src/paging_visitor.h @@ -122,7 +122,8 @@ class PagingVisitor : public CappedDurationVBucketVisitor, size_t ejected; // The current vbucket that the eviction algorithm is operating on. - VBucketPtr currentBucket; + // Only valid while inside visitBucket(). + VBucket* currentBucket{nullptr}; // The frequency counter threshold that is used to determine whether we // should evict items from the hash table. diff --git a/engines/ep/src/warmup.cc b/engines/ep/src/warmup.cc index d67eb1b44b..b98377f4f2 100644 --- a/engines/ep/src/warmup.cc +++ b/engines/ep/src/warmup.cc @@ -1092,8 +1092,9 @@ void LoadStorageKVPairCallback::purge() { void visitBucket(const VBucketPtr& vb) override { if (vBucketFilter(vb->getId())) { - currentBucket = vb; + currentBucket = vb.get(); vb->ht.visit(*this); + currentBucket = nullptr; } } @@ -1107,7 +1108,9 @@ void LoadStorageKVPairCallback::purge() { private: EPBucket& epstore; - VBucketPtr currentBucket; + // The current vbucket that the visitor is operating on. Only valid + // while inside visitBucket(). + VBucket* currentBucket{nullptr}; }; auto vbucketIds(vbuckets.getBuckets()); diff --git a/engines/ep/tests/mock/mock_paging_visitor.h b/engines/ep/tests/mock/mock_paging_visitor.h index 1ea839b71e..05f1c7b9ca 100644 --- a/engines/ep/tests/mock/mock_paging_visitor.h +++ b/engines/ep/tests/mock/mock_paging_visitor.h @@ -60,7 +60,7 @@ class MockPagingVisitor : public PagingVisitor { } void setCurrentBucket(VBucketPtr _currentBucket) { - currentBucket = _currentBucket; + currentBucket = _currentBucket.get(); } MOCK_METHOD1(visitBucket, void(const VBucketPtr&)); diff --git a/engines/ep/tests/module_tests/evp_engine_test.cc b/engines/ep/tests/module_tests/evp_engine_test.cc index e98de311ab..c8c2583333 100644 --- a/engines/ep/tests/module_tests/evp_engine_test.cc +++ b/engines/ep/tests/module_tests/evp_engine_test.cc @@ -22,11 +22,13 @@ #include "programs/engine_testapp/mock_cookie.h" #include "programs/engine_testapp/mock_server.h" #include "tests/module_tests/test_helpers.h" +#include "vb_visitors.h" #include #include #include #include +#include #include #include #include @@ -108,10 +110,16 @@ void EventuallyPersistentEngineTest::TearDown() { } void EventuallyPersistentEngineTest::shutdownEngine() { - destroy_mock_cookie(cookie); - // Need to force the destroy (i.e. pass true) because - // NonIO threads may have been disabled (see DCPTest subclass). - engine->destroy(true); + if (cookie) { + destroy_mock_cookie(cookie); + cookie = nullptr; + } + if (engine) { + // Need to force the destroy (i.e. pass true) because + // NonIO threads may have been disabled (see DCPTest subclass). + engine->destroy(true); + engine = nullptr; + } } queued_item EventuallyPersistentEngineTest::store_item( @@ -495,3 +503,82 @@ INSTANTIATE_TEST_SUITE_P(EphemeralOrPersistent, [](const ::testing::TestParamInfo& info) { return info.param; }); + +/** + * Regression test for MB-48925 - if a Task is scheduled against a Taskable + * (Bucket) which has already been unregistered, then the ExecutorPool throws + * and crashes the process. + * Note: This test as it stands will *not* crash kv-engine if the fix for the + * issue (see rest of this commit) is reverted. This is because the fix is + * to change the currentVb Task member variable from an (owning) + * shared_ptr to a (non-owning) VBucket* - the same thing TestVisior + * below does. However it is included here for reference as to the original + * problematic scenario. + */ +TEST_F(EventuallyPersistentEngineTest, MB48925_ScheduleTaskAfterUnregistered) { + class TestVisitor : public InterruptableVBucketVisitor { + public: + TestVisitor(int& visitCount, + folly::Baton<>& waitForVisit, + folly::Baton<>& waitForDeinitialise) + : visitCount(visitCount), + waitForVisit(waitForVisit), + waitForDeinitialise(waitForDeinitialise) { + } + + void visitBucket(const VBucketPtr& vb) override { + if (visitCount++ == 0) { + currentVb = vb.get(); + // On first call to visitBucket() perform the necessary + // interleaved baton wait / sleeping. + // Suspend execution of this thread; and allow main thread to + // continue, delete Bucket and unregisterTaskable. + waitForVisit.post(); + + // Keep task running until unregisterTaskable() has been called + // and starts to cancel tasks - this ensures that the Task + // object is still alive (ExecutorPool has a reference to it) + // and hence is passed out from unregisterTaskable(), hence kept + // alive past when KVBucket is deleted. + waitForDeinitialise.wait(); + } + } + InterruptableVBucketVisitor::ExecutionState shouldInterrupt() override { + return ExecutionState::Continue; + } + + int& visitCount; + folly::Baton<>& waitForVisit; + folly::Baton<>& waitForDeinitialise; + + // Model the behaviour of PagingVisitor prior to the bugfix. Note that + // _if_ this is changed to a shared_ptr then we crash. + VBucket* currentVb; + }; + + int visitCount{0}; + folly::Baton waitForVisit; + folly::Baton waitForUnregister; + engine->getKVBucket()->visitAsync( + std::make_unique( + visitCount, waitForVisit, waitForUnregister), + "MB48925_ScheduleTaskAfterUnregistered", + TaskId::ExpiredItemPagerVisitor, + std::chrono::seconds{1}); + waitForVisit.wait(); + + // Setup testing hook so we allow our TestVisitor's Task above to + // continue once we are inside unregisterTaskable. + ExecutorPool::get()->unregisterTaskablePostCancelHook = + [&waitForUnregister]() { waitForUnregister.post(); }; + + // Delete the vbucket; so the file deletion will be performed by + // VBucket::DeferredDeleter when the last reference goes out of scope + // (expected to be the ExpiryPager. + engine->getKVBucket()->deleteVBucket(vbid); + + // Destroy the engine. This does happen implicitly in TearDown, but call + // it earlier because we need to call destroy() before our various Baton + // local variables etc go out of scope. + shutdownEngine(); +} diff --git a/executor/executorpool.h b/executor/executorpool.h index 9f288f3993..c5a069a43c 100644 --- a/executor/executorpool.h +++ b/executor/executorpool.h @@ -11,6 +11,7 @@ #include #include +#include #include "task_type.h" #include @@ -222,6 +223,12 @@ class ExecutorPool { */ static int getThreadPriority(task_type_t taskType); + /************** Testing *************************************************/ + + // Testing hook for MB-48925 - called inside unregisterTaskable after + // tasks have been cancelled. + TestingHook<> unregisterTaskablePostCancelHook; + protected: ExecutorPool(size_t maxThreads); diff --git a/executor/folly_executorpool.cc b/executor/folly_executorpool.cc index 2fa93eaab7..f915808991 100644 --- a/executor/folly_executorpool.cc +++ b/executor/folly_executorpool.cc @@ -879,6 +879,8 @@ std::vector FollyExecutorPool::unregisterTaskable(Taskable& taskable, removedTasks = state->cancelTasksOwnedBy(taskable, force); }); + unregisterTaskablePostCancelHook(); + // Step 2 - poll for taskOwners to become empty. This will only // occur once all outstanding, running tasks have been cancelled. auto isTaskOwnersEmpty = [eventBase, &state = this->state, &taskable] {