From ce650adadd33d5c72d78a854f5b032cc4cdef5b8 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Wed, 7 Aug 2024 18:08:03 -0400 Subject: [PATCH 01/18] refactor: Change recursive_mutex to mutex in DatabaseRotatingImp - Follow-up to #4989, which stated "Ideally, the code should be rewritten so it doesn't hold the mutex during the callback and the mutex should be changed back to a regular mutex." --- src/xrpld/nodestore/DatabaseRotating.h | 4 ++++ .../nodestore/detail/DatabaseRotatingImp.cpp | 16 +++++++++++++++- src/xrpld/nodestore/detail/DatabaseRotatingImp.h | 10 +++------- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/xrpld/nodestore/DatabaseRotating.h b/src/xrpld/nodestore/DatabaseRotating.h index 10f575c4662..d923def5f3e 100644 --- a/src/xrpld/nodestore/DatabaseRotating.h +++ b/src/xrpld/nodestore/DatabaseRotating.h @@ -45,6 +45,10 @@ class DatabaseRotating : public Database /** Rotates the backends. @param f A function executed before the rotation and under the same lock + + This function only has one caller in SHAMapStoreImp::run. The locking + for this function will need to be rethought another caller is ever + added. */ virtual void rotateWithLock(std::function( diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index 58cc3599dc6..8b9a311d3ac 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -45,9 +45,23 @@ DatabaseRotatingImp::rotateWithLock( std::function( std::string const& writableBackendName)> const& f) { + auto const writableBackend = [&] { + std::lock_guard lock(mutex_); + return writableBackend_; + }(); + + auto newBackend = f(writableBackend->getName()); + std::lock_guard lock(mutex_); - auto newBackend = f(writableBackend_->getName()); + // This function only has one caller, which is syncronous, and this is the + // only place other than the ctor where writableBackend_ can change. Even + // without a lock, it should be impossible for writableBackend_ to have + // changed. + assert(writableBackend_ == writableBackend); + if (writableBackend_ != writableBackend) + LogicError("Backend changed in the middle of a rotation"); + archiveBackend_->setDeletePath(); archiveBackend_ = std::move(writableBackend_); writableBackend_ = std::move(newBackend); diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index 5183aa1e2e4..d857e3dc2d6 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -48,6 +48,8 @@ class DatabaseRotatingImp : public DatabaseRotating stop(); } + // This function only has one caller in SHAMapStoreImp::run. The locking for + // this function will need to be rethought another caller is ever added. void rotateWithLock( std::function( @@ -82,13 +84,7 @@ class DatabaseRotatingImp : public DatabaseRotating private: std::shared_ptr writableBackend_; std::shared_ptr archiveBackend_; - // This needs to be a recursive mutex because callbacks in `rotateWithLock` - // can call function that also lock the mutex. A current example of this is - // a callback from SHAMapStoreImp, which calls `clearCaches`. This - // `clearCaches` call eventually calls `fetchNodeObject` which tries to - // relock the mutex. It would be desirable to rewrite the code so the lock - // was not held during a callback. - mutable std::recursive_mutex mutex_; + mutable std::mutex mutex_; std::shared_ptr fetchNodeObject( From b8413ae1e1cac318e77989828ffbfe7205dd2261 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Wed, 5 Feb 2025 18:32:28 -0500 Subject: [PATCH 02/18] Review feedback from @Bronek: * Use a second mutex to protect the backends from modification * Remove a bunch of warning comments --- src/xrpld/nodestore/DatabaseRotating.h | 6 +----- src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp | 12 ++++-------- src/xrpld/nodestore/detail/DatabaseRotatingImp.h | 6 ++++-- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/xrpld/nodestore/DatabaseRotating.h b/src/xrpld/nodestore/DatabaseRotating.h index d923def5f3e..259dae4fe65 100644 --- a/src/xrpld/nodestore/DatabaseRotating.h +++ b/src/xrpld/nodestore/DatabaseRotating.h @@ -44,11 +44,7 @@ class DatabaseRotating : public Database /** Rotates the backends. - @param f A function executed before the rotation and under the same lock - - This function only has one caller in SHAMapStoreImp::run. The locking - for this function will need to be rethought another caller is ever - added. + @param f A function executed before the rotation */ virtual void rotateWithLock(std::function( diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index 8b9a311d3ac..d2a4e2fab2c 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -45,6 +45,10 @@ DatabaseRotatingImp::rotateWithLock( std::function( std::string const& writableBackendName)> const& f) { + // backendMutex_ prevents writableBackend_ and archiveBackend_ from changing + // while the main mutex_ is released during the callback. + std::lock_guard backendLock(backendMutex_); + auto const writableBackend = [&] { std::lock_guard lock(mutex_); return writableBackend_; @@ -54,14 +58,6 @@ DatabaseRotatingImp::rotateWithLock( std::lock_guard lock(mutex_); - // This function only has one caller, which is syncronous, and this is the - // only place other than the ctor where writableBackend_ can change. Even - // without a lock, it should be impossible for writableBackend_ to have - // changed. - assert(writableBackend_ == writableBackend); - if (writableBackend_ != writableBackend) - LogicError("Backend changed in the middle of a rotation"); - archiveBackend_->setDeletePath(); archiveBackend_ = std::move(writableBackend_); writableBackend_ = std::move(newBackend); diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index d857e3dc2d6..0ab9ca6d211 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -48,8 +48,6 @@ class DatabaseRotatingImp : public DatabaseRotating stop(); } - // This function only has one caller in SHAMapStoreImp::run. The locking for - // this function will need to be rethought another caller is ever added. void rotateWithLock( std::function( @@ -82,8 +80,12 @@ class DatabaseRotatingImp : public DatabaseRotating sweep() override; private: + // backendMutex_ is only needed when the *Backend_ members are modified. + // Reads are protected by the general mutex_. + std::mutex backendMutex_; std::shared_ptr writableBackend_; std::shared_ptr archiveBackend_; + mutable std::mutex mutex_; std::shared_ptr From d912b50cb91c7e82d82718ef7e0df7dd3050ffe4 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Fri, 7 Feb 2025 17:07:24 -0500 Subject: [PATCH 03/18] Review feedback from @bthomee and @vvysokikh1: - Rewrite the locking in DatabaseRotatingImp::rotateWithLock to use a shared_lock, and write a unit test to show (as much as possible) that it won't deadlock. --- src/test/app/SHAMapStore_test.cpp | 120 ++++++++++++++++++ src/xrpld/app/misc/SHAMapStoreImp.cpp | 31 +++-- src/xrpld/nodestore/DatabaseRotating.h | 3 +- .../nodestore/detail/DatabaseRotatingImp.cpp | 61 +++++++-- .../nodestore/detail/DatabaseRotatingImp.h | 18 ++- 5 files changed, 200 insertions(+), 33 deletions(-) diff --git a/src/test/app/SHAMapStore_test.cpp b/src/test/app/SHAMapStore_test.cpp index 376cb4eb7ba..d521baa3085 100644 --- a/src/test/app/SHAMapStore_test.cpp +++ b/src/test/app/SHAMapStore_test.cpp @@ -20,10 +20,14 @@ #include #include #include +#include #include #include #include +#include #include +#include +#include namespace ripple { namespace test { @@ -518,12 +522,128 @@ class SHAMapStore_test : public beast::unit_test::suite lastRotated = ledgerSeq - 1; } + std::unique_ptr + makeBackendRotating( + jtx::Env& env, + NodeStoreScheduler& scheduler, + std::string path) + { + Section section{ + env.app().config().section(ConfigSection::nodeDatabase())}; + boost::filesystem::path newPath; + + if (!BEAST_EXPECT(path.size())) + return {}; + newPath = path; + section.set("path", newPath.string()); + + auto backend{NodeStore::Manager::instance().make_Backend( + section, + megabytes(env.app().config().getValueFor( + SizedItem::burstSize, std::nullopt)), + scheduler, + env.app().logs().journal("NodeStoreTest"))}; + backend->open(); + return backend; + } + void + testRotateWithLockContention() + { + // The only purpose of this test is to ensure that if something that + // should never happen happens, we don't get a deadlock. + testcase("rotate with lock contention"); + + using namespace jtx; + Env env(*this, envconfig(onlineDelete)); + + ///////////////////////////////////////////////////////////// + // Create the backend. Normally, SHAMapStoreImp handles all these + // details + auto nscfg = env.app().config().section(ConfigSection::nodeDatabase()); + nscfg.set( + NodeStore::DatabaseRotatingImp::unitTestFlag, std::to_string(true)); + + // Provide default values: + if (!nscfg.exists("cache_size")) + nscfg.set( + "cache_size", + std::to_string(env.app().config().getValueFor( + SizedItem::treeCacheSize, std::nullopt))); + + if (!nscfg.exists("cache_age")) + nscfg.set( + "cache_age", + std::to_string(env.app().config().getValueFor( + SizedItem::treeCacheAge, std::nullopt))); + + NodeStoreScheduler scheduler(env.app().getJobQueue()); + + std::string const writableDb = "write"; + std::string const archiveDb = "archive"; + auto writableBackend = makeBackendRotating(env, scheduler, writableDb); + auto archiveBackend = makeBackendRotating(env, scheduler, archiveDb); + + // Create NodeStore with two backends to allow online deletion of + // data + constexpr int readThreads = 4; + auto dbr = std::make_unique( + scheduler, + readThreads, + std::move(writableBackend), + std::move(archiveBackend), + nscfg, + env.app().logs().journal("NodeStoreTest")); + + ///////////////////////////////////////////////////////////// + // Create the impossible situation. Get several calls to rotateWithLock + // going in parallel using a callback that just delays + using namespace std::chrono_literals; + std::atomic threadNum = 0; + auto const cb = [&](std::string const& writableBackendName) { + using namespace std::chrono_literals; + BEAST_EXPECT(writableBackendName == "write"); + auto newBackend = makeBackendRotating( + env, scheduler, std::to_string(++threadNum)); + std::this_thread::sleep_for(5s); + return newBackend; + }; + + std::atomic successes = 0; + std::atomic failures = 0; + std::vector threads; + threads.reserve(5); + for (int i = 0; i < 5; ++i) + { + threads.emplace_back([&]() { + auto const result = dbr->rotateWithLock(cb); + if (result) + ++successes; + else + ++failures; + }); + // There's no point in trying to time the threads to line up at + // exact points, but introduce a little bit of staggering to be more + // "realistic". + std::this_thread::sleep_for(10ms * i); + } + for (auto& t : threads) + { + t.join(); + } + BEAST_EXPECT(successes == 1); + BEAST_EXPECT(failures == 4); + // Only one thread will invoke the callback to increment threadNum + BEAST_EXPECT(threadNum == 1); + BEAST_EXPECT(dbr->getName() == "1"); + } + void run() override { testClear(); testAutomatic(); testCanDelete(); + testRotateWithLockContention(); } }; diff --git a/src/xrpld/app/misc/SHAMapStoreImp.cpp b/src/xrpld/app/misc/SHAMapStoreImp.cpp index 3a530e0e410..c69ffc7c7a2 100644 --- a/src/xrpld/app/misc/SHAMapStoreImp.cpp +++ b/src/xrpld/app/misc/SHAMapStoreImp.cpp @@ -366,18 +366,25 @@ SHAMapStoreImp::run() lastRotated = validatedSeq; - dbRotating_->rotateWithLock( - [&](std::string const& writableBackendName) { - SavedState savedState; - savedState.writableDb = newBackend->getName(); - savedState.archiveDb = writableBackendName; - savedState.lastRotated = lastRotated; - state_db_.setState(savedState); - - clearCaches(validatedSeq); - - return std::move(newBackend); - }); + if (!dbRotating_->rotateWithLock( + [&](std::string const& writableBackendName) { + SavedState savedState; + savedState.writableDb = newBackend->getName(); + savedState.archiveDb = writableBackendName; + savedState.lastRotated = lastRotated; + state_db_.setState(savedState); + + clearCaches(validatedSeq); + + return std::move(newBackend); + })) + { + JLOG(journal_.error()) + << validatedSeq + << " rotation failed. Discard unused new backend."; + newBackend->setDeletePath(); + return; + } JLOG(journal_.warn()) << "finished rotation " << validatedSeq; } diff --git a/src/xrpld/nodestore/DatabaseRotating.h b/src/xrpld/nodestore/DatabaseRotating.h index 259dae4fe65..af7fe6674fc 100644 --- a/src/xrpld/nodestore/DatabaseRotating.h +++ b/src/xrpld/nodestore/DatabaseRotating.h @@ -46,7 +46,8 @@ class DatabaseRotating : public Database @param f A function executed before the rotation */ - virtual void + [[nodiscard]] + virtual bool rotateWithLock(std::function( std::string const& writableBackendName)> const& f) = 0; }; diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index d2a4e2fab2c..c44dd0cf333 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -33,6 +33,7 @@ DatabaseRotatingImp::DatabaseRotatingImp( : DatabaseRotating(scheduler, readThreads, config, j) , writableBackend_(std::move(writableBackend)) , archiveBackend_(std::move(archiveBackend)) + , unitTest_(get(config, unitTestFlag, false)) { if (writableBackend_) fdRequired_ += writableBackend_->fdRequired(); @@ -40,40 +41,72 @@ DatabaseRotatingImp::DatabaseRotatingImp( fdRequired_ += archiveBackend_->fdRequired(); } -void +[[nodiscard]] bool DatabaseRotatingImp::rotateWithLock( std::function( std::string const& writableBackendName)> const& f) { - // backendMutex_ prevents writableBackend_ and archiveBackend_ from changing - // while the main mutex_ is released during the callback. - std::lock_guard backendLock(backendMutex_); + // This function should be the only one taking any kind of unique/write + // lock, and should only be called once at a time by its syncronous caller. + // The extra checking involving the "rotating" flag, are to ensure that if + // that ever changes, we still avoid deadlocks and incorrect behavior. + { + std::unique_lock writeLock(mutex_); + if (!rotating) + { + // Once this flag is set, we're committed to doing the work and + // returning true. + rotating = true; + } + else + { + // This should only be reachable through unit tests. + XRPL_ASSERT( + unitTest_, + "ripple::NodeStore::DatabaseRotatingImp::rotateWithLock " + "unit testing"); + return false; + } + } auto const writableBackend = [&] { - std::lock_guard lock(mutex_); + std::shared_lock readLock(mutex_); + XRPL_ASSERT( + rotating, + "ripple::NodeStore::DatabaseRotatingImp::rotateWithLock rotating " + "flag set"); + return writableBackend_; }(); auto newBackend = f(writableBackend->getName()); - std::lock_guard lock(mutex_); + // Because of the "rotating" flag, there should be no way any other thread + // is waiting at this point. As long as they all release the shared_lock + // before taking the unique_lock (which they have to, because upgrading is + // unsupported), there can be no deadlock. + std::unique_lock writeLock(mutex_); archiveBackend_->setDeletePath(); archiveBackend_ = std::move(writableBackend_); writableBackend_ = std::move(newBackend); + + rotating = false; + + return true; } std::string DatabaseRotatingImp::getName() const { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); return writableBackend_->getName(); } std::int32_t DatabaseRotatingImp::getWriteLoad() const { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); return writableBackend_->getWriteLoad(); } @@ -81,7 +114,7 @@ void DatabaseRotatingImp::importDatabase(Database& source) { auto const backend = [&] { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); return writableBackend_; }(); @@ -91,7 +124,7 @@ DatabaseRotatingImp::importDatabase(Database& source) void DatabaseRotatingImp::sync() { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); writableBackend_->sync(); } @@ -105,7 +138,7 @@ DatabaseRotatingImp::store( auto nObj = NodeObject::createObject(type, std::move(data), hash); auto const backend = [&] { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); return writableBackend_; }(); @@ -159,7 +192,7 @@ DatabaseRotatingImp::fetchNodeObject( std::shared_ptr nodeObject; auto [writable, archive] = [&] { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); return std::make_pair(writableBackend_, archiveBackend_); }(); @@ -173,7 +206,7 @@ DatabaseRotatingImp::fetchNodeObject( { { // Refresh the writable backend pointer - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); writable = writableBackend_; } @@ -194,7 +227,7 @@ DatabaseRotatingImp::for_each( std::function)> f) { auto [writable, archive] = [&] { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); return std::make_pair(writableBackend_, archiveBackend_); }(); diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index 0ab9ca6d211..3ae7ce8dfc9 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -22,7 +22,7 @@ #include -#include +#include namespace ripple { namespace NodeStore { @@ -48,7 +48,7 @@ class DatabaseRotatingImp : public DatabaseRotating stop(); } - void + [[nodiscard]] bool rotateWithLock( std::function( std::string const& writableBackendName)> const& f) override; @@ -79,14 +79,20 @@ class DatabaseRotatingImp : public DatabaseRotating void sweep() override; + // Include the space in the name to ensure it can't be set in a file + static constexpr auto unitTestFlag = " unit_test"; + private: - // backendMutex_ is only needed when the *Backend_ members are modified. - // Reads are protected by the general mutex_. - std::mutex backendMutex_; + bool const unitTest_; + bool rotating = false; std::shared_ptr writableBackend_; std::shared_ptr archiveBackend_; - mutable std::mutex mutex_; + // https://en.cppreference.com/w/cpp/thread/shared_timed_mutex/lock + // "Shared mutexes do not support direct transition from shared to unique + // ownership mode: the shared lock has to be relinquished with + // unlock_shared() before exclusive ownership may be obtained with lock()." + mutable std::shared_timed_mutex mutex_; std::shared_ptr fetchNodeObject( From 3f7fb662e574d0e4e683795753b90b9a418dd87a Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Fri, 7 Feb 2025 17:26:03 -0500 Subject: [PATCH 04/18] Update levelization tracking --- Builds/levelization/results/ordering.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Builds/levelization/results/ordering.txt b/Builds/levelization/results/ordering.txt index acf8daafb79..681f76dd5db 100644 --- a/Builds/levelization/results/ordering.txt +++ b/Builds/levelization/results/ordering.txt @@ -19,6 +19,7 @@ test.app > xrpl.basics test.app > xrpld.app test.app > xrpld.core test.app > xrpld.ledger +test.app > xrpld.nodestore test.app > xrpld.overlay test.app > xrpld.rpc test.app > xrpl.json From 3b6984c4dc49d42773e10dc39b714caf3c0b911b Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Sat, 8 Feb 2025 11:10:11 -0500 Subject: [PATCH 05/18] fixup! Review feedback from @bthomee and @vvysokikh1: --- src/xrpld/nodestore/detail/DatabaseRotatingImp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index 3ae7ce8dfc9..5280746d7c7 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -83,10 +83,10 @@ class DatabaseRotatingImp : public DatabaseRotating static constexpr auto unitTestFlag = " unit_test"; private: - bool const unitTest_; bool rotating = false; std::shared_ptr writableBackend_; std::shared_ptr archiveBackend_; + bool const unitTest_; // https://en.cppreference.com/w/cpp/thread/shared_timed_mutex/lock // "Shared mutexes do not support direct transition from shared to unique From b90f65f24d451c80748fe2ade7f454c64164b45e Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Wed, 12 Feb 2025 10:59:56 -0500 Subject: [PATCH 06/18] Rewrite DatabaseRotatingImp again: - Use a boost::upgrade_mutex, which implements a clean read -> write lock upgrade. --- src/xrpld/nodestore/DatabaseRotating.h | 3 ++ .../nodestore/detail/DatabaseRotatingImp.cpp | 50 ++++++------------- .../nodestore/detail/DatabaseRotatingImp.h | 15 +++--- 3 files changed, 25 insertions(+), 43 deletions(-) diff --git a/src/xrpld/nodestore/DatabaseRotating.h b/src/xrpld/nodestore/DatabaseRotating.h index af7fe6674fc..e534b77b9da 100644 --- a/src/xrpld/nodestore/DatabaseRotating.h +++ b/src/xrpld/nodestore/DatabaseRotating.h @@ -45,6 +45,9 @@ class DatabaseRotating : public Database /** Rotates the backends. @param f A function executed before the rotation + + @return bool indicating whether the callback "f" was called, and the new + Backend it returned is stored */ [[nodiscard]] virtual bool diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index c44dd0cf333..e721c32d672 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -19,6 +19,7 @@ #include #include +#include namespace ripple { namespace NodeStore { @@ -48,51 +49,28 @@ DatabaseRotatingImp::rotateWithLock( { // This function should be the only one taking any kind of unique/write // lock, and should only be called once at a time by its syncronous caller. - // The extra checking involving the "rotating" flag, are to ensure that if - // that ever changes, we still avoid deadlocks and incorrect behavior. + boost::upgrade_lock readLock(mutex_, boost::defer_lock); + if (!readLock.try_lock()) { - std::unique_lock writeLock(mutex_); - if (!rotating) - { - // Once this flag is set, we're committed to doing the work and - // returning true. - rotating = true; - } - else - { - // This should only be reachable through unit tests. - XRPL_ASSERT( - unitTest_, - "ripple::NodeStore::DatabaseRotatingImp::rotateWithLock " - "unit testing"); - return false; - } - } - auto const writableBackend = [&] { - std::shared_lock readLock(mutex_); + // This should only be reachable through unit tests. XRPL_ASSERT( - rotating, - "ripple::NodeStore::DatabaseRotatingImp::rotateWithLock rotating " - "flag set"); - - return writableBackend_; - }(); - - auto newBackend = f(writableBackend->getName()); + unitTest_, + "ripple::NodeStore::DatabaseRotatingImp::rotateWithLock " + "unit testing"); + return false; + } + auto newBackend = f(writableBackend_->getName()); - // Because of the "rotating" flag, there should be no way any other thread - // is waiting at this point. As long as they all release the shared_lock - // before taking the unique_lock (which they have to, because upgrading is - // unsupported), there can be no deadlock. - std::unique_lock writeLock(mutex_); + // boost::upgrade_mutex guarantees that only only thread can have "upgrade + // ownership" at a time, so this is 100% safe, and guaranteed to avoid + // deadlock. + boost::upgrade_to_unique_lock writeLock(readLock); archiveBackend_->setDeletePath(); archiveBackend_ = std::move(writableBackend_); writableBackend_ = std::move(newBackend); - rotating = false; - return true; } diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index 5280746d7c7..cf45be82f29 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -22,7 +22,7 @@ #include -#include +#include namespace ripple { namespace NodeStore { @@ -83,16 +83,17 @@ class DatabaseRotatingImp : public DatabaseRotating static constexpr auto unitTestFlag = " unit_test"; private: - bool rotating = false; std::shared_ptr writableBackend_; std::shared_ptr archiveBackend_; bool const unitTest_; - // https://en.cppreference.com/w/cpp/thread/shared_timed_mutex/lock - // "Shared mutexes do not support direct transition from shared to unique - // ownership mode: the shared lock has to be relinquished with - // unlock_shared() before exclusive ownership may be obtained with lock()." - mutable std::shared_timed_mutex mutex_; + // Implements the "UpgradeLockable" concept + // https://www.boost.org/doc/libs/1_86_0/doc/html/thread/synchronization.html#thread.synchronization.mutex_concepts.upgrade_lockable + // In short: Many threads can have shared ownership. One thread can have + // upgradable ownership at the same time as others have shared ownership. + // The upgradeable ownership can be upgraded to exclusive ownership, + // blocking if necessary until no other threads have shared ownership. + mutable boost::upgrade_mutex mutex_; std::shared_ptr fetchNodeObject( From 925c31119fade6c678a24d981c76561752ce927c Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Wed, 12 Feb 2025 11:30:34 -0500 Subject: [PATCH 07/18] Add another test case, use boost locks, update comments --- src/test/app/SHAMapStore_test.cpp | 20 ++++++++++++++++-- src/xrpld/app/misc/SHAMapStoreImp.cpp | 2 ++ .../nodestore/detail/DatabaseRotatingImp.cpp | 21 ++++++++++--------- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/test/app/SHAMapStore_test.cpp b/src/test/app/SHAMapStore_test.cpp index d521baa3085..3ba77fce9a8 100644 --- a/src/test/app/SHAMapStore_test.cpp +++ b/src/test/app/SHAMapStore_test.cpp @@ -615,8 +615,7 @@ class SHAMapStore_test : public beast::unit_test::suite for (int i = 0; i < 5; ++i) { threads.emplace_back([&]() { - auto const result = dbr->rotateWithLock(cb); - if (result) + if (dbr->rotateWithLock(cb)) ++successes; else ++failures; @@ -635,6 +634,23 @@ class SHAMapStore_test : public beast::unit_test::suite // Only one thread will invoke the callback to increment threadNum BEAST_EXPECT(threadNum == 1); BEAST_EXPECT(dbr->getName() == "1"); + + ///////////////////////////////////////////////////////////// + // Create another impossible situation. Try to re-enter rotateWithLock + // inside the callback. + auto const cbReentrant = [&](std::string const& writableBackendName) { + BEAST_EXPECT(writableBackendName == "1"); + auto newBackend = makeBackendRotating( + env, scheduler, std::to_string(++threadNum)); + BEAST_EXPECT(!dbr->rotateWithLock(cb)); + return newBackend; + }; + BEAST_EXPECT(dbr->rotateWithLock(cbReentrant)); + + BEAST_EXPECT(successes == 1); + BEAST_EXPECT(failures == 4); + BEAST_EXPECT(threadNum == 2); + BEAST_EXPECT(dbr->getName() == "2"); } void diff --git a/src/xrpld/app/misc/SHAMapStoreImp.cpp b/src/xrpld/app/misc/SHAMapStoreImp.cpp index c69ffc7c7a2..dd9dc562b21 100644 --- a/src/xrpld/app/misc/SHAMapStoreImp.cpp +++ b/src/xrpld/app/misc/SHAMapStoreImp.cpp @@ -379,6 +379,8 @@ SHAMapStoreImp::run() return std::move(newBackend); })) { + UNREACHABLE( + "ripple::SHAMapStoreImp::run rotateWithLock failed"); JLOG(journal_.error()) << validatedSeq << " rotation failed. Discard unused new backend."; diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index e721c32d672..52bb4cd6701 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -53,7 +53,8 @@ DatabaseRotatingImp::rotateWithLock( boost::upgrade_lock readLock(mutex_, boost::defer_lock); if (!readLock.try_lock()) { - // This should only be reachable through unit tests. + // If anything other than a unit test gets here, something has gone very + // wrong XRPL_ASSERT( unitTest_, "ripple::NodeStore::DatabaseRotatingImp::rotateWithLock " @@ -62,7 +63,7 @@ DatabaseRotatingImp::rotateWithLock( } auto newBackend = f(writableBackend_->getName()); - // boost::upgrade_mutex guarantees that only only thread can have "upgrade + // boost::upgrade_mutex guarantees that only one thread can have "upgrade // ownership" at a time, so this is 100% safe, and guaranteed to avoid // deadlock. boost::upgrade_to_unique_lock writeLock(readLock); @@ -77,14 +78,14 @@ DatabaseRotatingImp::rotateWithLock( std::string DatabaseRotatingImp::getName() const { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); return writableBackend_->getName(); } std::int32_t DatabaseRotatingImp::getWriteLoad() const { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); return writableBackend_->getWriteLoad(); } @@ -92,7 +93,7 @@ void DatabaseRotatingImp::importDatabase(Database& source) { auto const backend = [&] { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); return writableBackend_; }(); @@ -102,7 +103,7 @@ DatabaseRotatingImp::importDatabase(Database& source) void DatabaseRotatingImp::sync() { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); writableBackend_->sync(); } @@ -116,7 +117,7 @@ DatabaseRotatingImp::store( auto nObj = NodeObject::createObject(type, std::move(data), hash); auto const backend = [&] { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); return writableBackend_; }(); @@ -170,7 +171,7 @@ DatabaseRotatingImp::fetchNodeObject( std::shared_ptr nodeObject; auto [writable, archive] = [&] { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); return std::make_pair(writableBackend_, archiveBackend_); }(); @@ -184,7 +185,7 @@ DatabaseRotatingImp::fetchNodeObject( { { // Refresh the writable backend pointer - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); writable = writableBackend_; } @@ -205,7 +206,7 @@ DatabaseRotatingImp::for_each( std::function)> f) { auto [writable, archive] = [&] { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); return std::make_pair(writableBackend_, archiveBackend_); }(); From 217bc1da75c3530aa2a9b1ba6d36e0a89511b920 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Wed, 12 Feb 2025 12:56:46 -0500 Subject: [PATCH 08/18] Review feedback: rename readlock, add and update comments --- src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp | 8 +++++--- src/xrpld/nodestore/detail/DatabaseRotatingImp.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index 52bb4cd6701..86c6486dd9f 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -50,8 +50,10 @@ DatabaseRotatingImp::rotateWithLock( // This function should be the only one taking any kind of unique/write // lock, and should only be called once at a time by its syncronous caller. - boost::upgrade_lock readLock(mutex_, boost::defer_lock); - if (!readLock.try_lock()) + // The upgradable lock will NOT block shared locks, but will block other + // upgrade locks and unique/exclusive locks. + boost::upgrade_lock upgradeableLock(mutex_, boost::defer_lock); + if (!upgradeableLock.try_lock()) { // If anything other than a unit test gets here, something has gone very // wrong @@ -66,7 +68,7 @@ DatabaseRotatingImp::rotateWithLock( // boost::upgrade_mutex guarantees that only one thread can have "upgrade // ownership" at a time, so this is 100% safe, and guaranteed to avoid // deadlock. - boost::upgrade_to_unique_lock writeLock(readLock); + boost::upgrade_to_unique_lock writeLock(upgradeableLock); archiveBackend_->setDeletePath(); archiveBackend_ = std::move(writableBackend_); diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index cf45be82f29..0feb894eb9b 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -88,7 +88,7 @@ class DatabaseRotatingImp : public DatabaseRotating bool const unitTest_; // Implements the "UpgradeLockable" concept - // https://www.boost.org/doc/libs/1_86_0/doc/html/thread/synchronization.html#thread.synchronization.mutex_concepts.upgrade_lockable + // https://www.boost.org/doc/libs/release/doc/html/thread/synchronization.html#thread.synchronization.mutex_concepts.upgrade_lockable // In short: Many threads can have shared ownership. One thread can have // upgradable ownership at the same time as others have shared ownership. // The upgradeable ownership can be upgraded to exclusive ownership, From a2fa56e93e1371c43413f4c409677011ad2cd2be Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Thu, 13 Feb 2025 09:20:11 -0500 Subject: [PATCH 09/18] Revert back to upstream/develop, except unit tests --- Builds/levelization/results/ordering.txt | 1 - src/xrpld/app/misc/SHAMapStoreImp.cpp | 33 +++++--------- src/xrpld/nodestore/DatabaseRotating.h | 8 +--- .../nodestore/detail/DatabaseRotatingImp.cpp | 44 +++++-------------- .../nodestore/detail/DatabaseRotatingImp.h | 23 ++++------ 5 files changed, 33 insertions(+), 76 deletions(-) diff --git a/Builds/levelization/results/ordering.txt b/Builds/levelization/results/ordering.txt index 681f76dd5db..acf8daafb79 100644 --- a/Builds/levelization/results/ordering.txt +++ b/Builds/levelization/results/ordering.txt @@ -19,7 +19,6 @@ test.app > xrpl.basics test.app > xrpld.app test.app > xrpld.core test.app > xrpld.ledger -test.app > xrpld.nodestore test.app > xrpld.overlay test.app > xrpld.rpc test.app > xrpl.json diff --git a/src/xrpld/app/misc/SHAMapStoreImp.cpp b/src/xrpld/app/misc/SHAMapStoreImp.cpp index dd9dc562b21..3a530e0e410 100644 --- a/src/xrpld/app/misc/SHAMapStoreImp.cpp +++ b/src/xrpld/app/misc/SHAMapStoreImp.cpp @@ -366,27 +366,18 @@ SHAMapStoreImp::run() lastRotated = validatedSeq; - if (!dbRotating_->rotateWithLock( - [&](std::string const& writableBackendName) { - SavedState savedState; - savedState.writableDb = newBackend->getName(); - savedState.archiveDb = writableBackendName; - savedState.lastRotated = lastRotated; - state_db_.setState(savedState); - - clearCaches(validatedSeq); - - return std::move(newBackend); - })) - { - UNREACHABLE( - "ripple::SHAMapStoreImp::run rotateWithLock failed"); - JLOG(journal_.error()) - << validatedSeq - << " rotation failed. Discard unused new backend."; - newBackend->setDeletePath(); - return; - } + dbRotating_->rotateWithLock( + [&](std::string const& writableBackendName) { + SavedState savedState; + savedState.writableDb = newBackend->getName(); + savedState.archiveDb = writableBackendName; + savedState.lastRotated = lastRotated; + state_db_.setState(savedState); + + clearCaches(validatedSeq); + + return std::move(newBackend); + }); JLOG(journal_.warn()) << "finished rotation " << validatedSeq; } diff --git a/src/xrpld/nodestore/DatabaseRotating.h b/src/xrpld/nodestore/DatabaseRotating.h index e534b77b9da..10f575c4662 100644 --- a/src/xrpld/nodestore/DatabaseRotating.h +++ b/src/xrpld/nodestore/DatabaseRotating.h @@ -44,13 +44,9 @@ class DatabaseRotating : public Database /** Rotates the backends. - @param f A function executed before the rotation - - @return bool indicating whether the callback "f" was called, and the new - Backend it returned is stored + @param f A function executed before the rotation and under the same lock */ - [[nodiscard]] - virtual bool + virtual void rotateWithLock(std::function( std::string const& writableBackendName)> const& f) = 0; }; diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index 86c6486dd9f..58cc3599dc6 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -19,7 +19,6 @@ #include #include -#include namespace ripple { namespace NodeStore { @@ -34,7 +33,6 @@ DatabaseRotatingImp::DatabaseRotatingImp( : DatabaseRotating(scheduler, readThreads, config, j) , writableBackend_(std::move(writableBackend)) , archiveBackend_(std::move(archiveBackend)) - , unitTest_(get(config, unitTestFlag, false)) { if (writableBackend_) fdRequired_ += writableBackend_->fdRequired(); @@ -42,52 +40,30 @@ DatabaseRotatingImp::DatabaseRotatingImp( fdRequired_ += archiveBackend_->fdRequired(); } -[[nodiscard]] bool +void DatabaseRotatingImp::rotateWithLock( std::function( std::string const& writableBackendName)> const& f) { - // This function should be the only one taking any kind of unique/write - // lock, and should only be called once at a time by its syncronous caller. + std::lock_guard lock(mutex_); - // The upgradable lock will NOT block shared locks, but will block other - // upgrade locks and unique/exclusive locks. - boost::upgrade_lock upgradeableLock(mutex_, boost::defer_lock); - if (!upgradeableLock.try_lock()) - { - // If anything other than a unit test gets here, something has gone very - // wrong - XRPL_ASSERT( - unitTest_, - "ripple::NodeStore::DatabaseRotatingImp::rotateWithLock " - "unit testing"); - return false; - } auto newBackend = f(writableBackend_->getName()); - - // boost::upgrade_mutex guarantees that only one thread can have "upgrade - // ownership" at a time, so this is 100% safe, and guaranteed to avoid - // deadlock. - boost::upgrade_to_unique_lock writeLock(upgradeableLock); - archiveBackend_->setDeletePath(); archiveBackend_ = std::move(writableBackend_); writableBackend_ = std::move(newBackend); - - return true; } std::string DatabaseRotatingImp::getName() const { - boost::shared_lock lock(mutex_); + std::lock_guard lock(mutex_); return writableBackend_->getName(); } std::int32_t DatabaseRotatingImp::getWriteLoad() const { - boost::shared_lock lock(mutex_); + std::lock_guard lock(mutex_); return writableBackend_->getWriteLoad(); } @@ -95,7 +71,7 @@ void DatabaseRotatingImp::importDatabase(Database& source) { auto const backend = [&] { - boost::shared_lock lock(mutex_); + std::lock_guard lock(mutex_); return writableBackend_; }(); @@ -105,7 +81,7 @@ DatabaseRotatingImp::importDatabase(Database& source) void DatabaseRotatingImp::sync() { - boost::shared_lock lock(mutex_); + std::lock_guard lock(mutex_); writableBackend_->sync(); } @@ -119,7 +95,7 @@ DatabaseRotatingImp::store( auto nObj = NodeObject::createObject(type, std::move(data), hash); auto const backend = [&] { - boost::shared_lock lock(mutex_); + std::lock_guard lock(mutex_); return writableBackend_; }(); @@ -173,7 +149,7 @@ DatabaseRotatingImp::fetchNodeObject( std::shared_ptr nodeObject; auto [writable, archive] = [&] { - boost::shared_lock lock(mutex_); + std::lock_guard lock(mutex_); return std::make_pair(writableBackend_, archiveBackend_); }(); @@ -187,7 +163,7 @@ DatabaseRotatingImp::fetchNodeObject( { { // Refresh the writable backend pointer - boost::shared_lock lock(mutex_); + std::lock_guard lock(mutex_); writable = writableBackend_; } @@ -208,7 +184,7 @@ DatabaseRotatingImp::for_each( std::function)> f) { auto [writable, archive] = [&] { - boost::shared_lock lock(mutex_); + std::lock_guard lock(mutex_); return std::make_pair(writableBackend_, archiveBackend_); }(); diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index 0feb894eb9b..5183aa1e2e4 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -22,7 +22,7 @@ #include -#include +#include namespace ripple { namespace NodeStore { @@ -48,7 +48,7 @@ class DatabaseRotatingImp : public DatabaseRotating stop(); } - [[nodiscard]] bool + void rotateWithLock( std::function( std::string const& writableBackendName)> const& f) override; @@ -79,21 +79,16 @@ class DatabaseRotatingImp : public DatabaseRotating void sweep() override; - // Include the space in the name to ensure it can't be set in a file - static constexpr auto unitTestFlag = " unit_test"; - private: std::shared_ptr writableBackend_; std::shared_ptr archiveBackend_; - bool const unitTest_; - - // Implements the "UpgradeLockable" concept - // https://www.boost.org/doc/libs/release/doc/html/thread/synchronization.html#thread.synchronization.mutex_concepts.upgrade_lockable - // In short: Many threads can have shared ownership. One thread can have - // upgradable ownership at the same time as others have shared ownership. - // The upgradeable ownership can be upgraded to exclusive ownership, - // blocking if necessary until no other threads have shared ownership. - mutable boost::upgrade_mutex mutex_; + // This needs to be a recursive mutex because callbacks in `rotateWithLock` + // can call function that also lock the mutex. A current example of this is + // a callback from SHAMapStoreImp, which calls `clearCaches`. This + // `clearCaches` call eventually calls `fetchNodeObject` which tries to + // relock the mutex. It would be desirable to rewrite the code so the lock + // was not held during a callback. + mutable std::recursive_mutex mutex_; std::shared_ptr fetchNodeObject( From a983cc974c234d942074a17acbeec77b89077fc6 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Thu, 13 Feb 2025 09:42:33 -0500 Subject: [PATCH 10/18] Fix the unit test for original logic --- src/test/app/SHAMapStore_test.cpp | 48 +++++++++---------------------- 1 file changed, 14 insertions(+), 34 deletions(-) diff --git a/src/test/app/SHAMapStore_test.cpp b/src/test/app/SHAMapStore_test.cpp index 3ba77fce9a8..b3cf9df9d90 100644 --- a/src/test/app/SHAMapStore_test.cpp +++ b/src/test/app/SHAMapStore_test.cpp @@ -560,8 +560,6 @@ class SHAMapStore_test : public beast::unit_test::suite // Create the backend. Normally, SHAMapStoreImp handles all these // details auto nscfg = env.app().config().section(ConfigSection::nodeDatabase()); - nscfg.set( - NodeStore::DatabaseRotatingImp::unitTestFlag, std::to_string(true)); // Provide default values: if (!nscfg.exists("cache_size")) @@ -595,8 +593,7 @@ class SHAMapStore_test : public beast::unit_test::suite env.app().logs().journal("NodeStoreTest")); ///////////////////////////////////////////////////////////// - // Create the impossible situation. Get several calls to rotateWithLock - // going in parallel using a callback that just delays + // Check basic functionality using namespace std::chrono_literals; std::atomic threadNum = 0; auto const cb = [&](std::string const& writableBackendName) { @@ -604,52 +601,35 @@ class SHAMapStore_test : public beast::unit_test::suite BEAST_EXPECT(writableBackendName == "write"); auto newBackend = makeBackendRotating( env, scheduler, std::to_string(++threadNum)); - std::this_thread::sleep_for(5s); + // Ensure that dbr functions can be called from within the callback + BEAST_EXPECT(dbr->getName() == "write"); + return newBackend; }; - std::atomic successes = 0; - std::atomic failures = 0; - std::vector threads; - threads.reserve(5); - for (int i = 0; i < 5; ++i) - { - threads.emplace_back([&]() { - if (dbr->rotateWithLock(cb)) - ++successes; - else - ++failures; - }); - // There's no point in trying to time the threads to line up at - // exact points, but introduce a little bit of staggering to be more - // "realistic". - std::this_thread::sleep_for(10ms * i); - } - for (auto& t : threads) - { - t.join(); - } - BEAST_EXPECT(successes == 1); - BEAST_EXPECT(failures == 4); - // Only one thread will invoke the callback to increment threadNum + dbr->rotateWithLock(cb); BEAST_EXPECT(threadNum == 1); BEAST_EXPECT(dbr->getName() == "1"); ///////////////////////////////////////////////////////////// // Create another impossible situation. Try to re-enter rotateWithLock // inside the callback. + auto const cbBasic = [&](std::string const& writableBackendName) { + auto newBackend = makeBackendRotating( + env, scheduler, std::to_string(++threadNum)); + return newBackend; + }; auto const cbReentrant = [&](std::string const& writableBackendName) { BEAST_EXPECT(writableBackendName == "1"); auto newBackend = makeBackendRotating( env, scheduler, std::to_string(++threadNum)); - BEAST_EXPECT(!dbr->rotateWithLock(cb)); + // Note: doing this is stupid and should never happen + dbr->rotateWithLock(cbBasic); return newBackend; }; - BEAST_EXPECT(dbr->rotateWithLock(cbReentrant)); + dbr->rotateWithLock(cbReentrant); - BEAST_EXPECT(successes == 1); - BEAST_EXPECT(failures == 4); - BEAST_EXPECT(threadNum == 2); + BEAST_EXPECT(threadNum == 3); BEAST_EXPECT(dbr->getName() == "2"); } From 542b9587b772bc8fe8e16ed0f682b582fd288c2f Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Wed, 7 Aug 2024 18:08:03 -0400 Subject: [PATCH 11/18] refactor: Change recursive_mutex to mutex in DatabaseRotatingImp - Follow-up to #4989, which stated "Ideally, the code should be rewritten so it doesn't hold the mutex during the callback and the mutex should be changed back to a regular mutex." - Change the order of the rotation operation. Before: Write the state table, then update the backend pointers, all under lock. If rippled crashes between these two steps, the old archive DB folder would be orphaned and need to be cleaned up manually. After: Update the backend pointers under lock, then write the state table outside of lock. This opens a small window where data can be written to the new DB before the state is updated. If rippled crashes between these two steps, the new writable DB folder would be orphaned, and any data written will be lost. That data will need to be downloaded from peers on startup along with any other data missed while rippled is offline. --- src/test/app/SHAMapStore_test.cpp | 61 +++++++++++-------- src/xrpld/app/misc/SHAMapStoreImp.cpp | 12 ++-- src/xrpld/nodestore/DatabaseRotating.h | 12 +++- .../nodestore/detail/DatabaseRotatingImp.cpp | 28 ++++++--- .../nodestore/detail/DatabaseRotatingImp.h | 16 ++--- 5 files changed, 77 insertions(+), 52 deletions(-) diff --git a/src/test/app/SHAMapStore_test.cpp b/src/test/app/SHAMapStore_test.cpp index b3cf9df9d90..ef70944cd0b 100644 --- a/src/test/app/SHAMapStore_test.cpp +++ b/src/test/app/SHAMapStore_test.cpp @@ -547,7 +547,7 @@ class SHAMapStore_test : public beast::unit_test::suite return backend; } void - testRotateWithLockContention() + testRotate() { // The only purpose of this test is to ensure that if something that // should never happen happens, we don't get a deadlock. @@ -596,41 +596,52 @@ class SHAMapStore_test : public beast::unit_test::suite // Check basic functionality using namespace std::chrono_literals; std::atomic threadNum = 0; - auto const cb = [&](std::string const& writableBackendName) { - using namespace std::chrono_literals; - BEAST_EXPECT(writableBackendName == "write"); + + { auto newBackend = makeBackendRotating( env, scheduler, std::to_string(++threadNum)); - // Ensure that dbr functions can be called from within the callback - BEAST_EXPECT(dbr->getName() == "write"); - return newBackend; - }; + auto const cb = [&](std::string const& writableName, + std::string const& archiveName) { + BEAST_EXPECT(writableName == "1"); + BEAST_EXPECT(archiveName == "write"); + // Ensure that dbr functions can be called from within the + // callback + BEAST_EXPECT(dbr->getName() == "1"); + }; - dbr->rotateWithLock(cb); + dbr->rotate(std::move(newBackend), cb); + } BEAST_EXPECT(threadNum == 1); BEAST_EXPECT(dbr->getName() == "1"); ///////////////////////////////////////////////////////////// - // Create another impossible situation. Try to re-enter rotateWithLock - // inside the callback. - auto const cbBasic = [&](std::string const& writableBackendName) { - auto newBackend = makeBackendRotating( - env, scheduler, std::to_string(++threadNum)); - return newBackend; - }; - auto const cbReentrant = [&](std::string const& writableBackendName) { - BEAST_EXPECT(writableBackendName == "1"); + // Do something stupid. Try to re-enter rotate from inside the callback. + { + auto const cb = [&](std::string const& writableName, + std::string const& archiveName) { + BEAST_EXPECT(writableName == "3"); + BEAST_EXPECT(archiveName == "2"); + // Ensure that dbr functions can be called from within the + // callback + BEAST_EXPECT(dbr->getName() == "3"); + }; + auto const cbReentrant = [&](std::string const& writableName, + std::string const& archiveName) { + BEAST_EXPECT(writableName == "2"); + BEAST_EXPECT(archiveName == "1"); + auto newBackend = makeBackendRotating( + env, scheduler, std::to_string(++threadNum)); + // Reminder: doing this is stupid and should never happen + dbr->rotate(std::move(newBackend), cb); + }; auto newBackend = makeBackendRotating( env, scheduler, std::to_string(++threadNum)); - // Note: doing this is stupid and should never happen - dbr->rotateWithLock(cbBasic); - return newBackend; - }; - dbr->rotateWithLock(cbReentrant); + dbr->rotate(std::move(newBackend), cbReentrant); + } BEAST_EXPECT(threadNum == 3); - BEAST_EXPECT(dbr->getName() == "2"); + BEAST_EXPECT(dbr->getName() == "3"); } void @@ -639,7 +650,7 @@ class SHAMapStore_test : public beast::unit_test::suite testClear(); testAutomatic(); testCanDelete(); - testRotateWithLockContention(); + testRotate(); } }; diff --git a/src/xrpld/app/misc/SHAMapStoreImp.cpp b/src/xrpld/app/misc/SHAMapStoreImp.cpp index 3a530e0e410..e2e0e3b9c46 100644 --- a/src/xrpld/app/misc/SHAMapStoreImp.cpp +++ b/src/xrpld/app/misc/SHAMapStoreImp.cpp @@ -366,17 +366,17 @@ SHAMapStoreImp::run() lastRotated = validatedSeq; - dbRotating_->rotateWithLock( - [&](std::string const& writableBackendName) { + dbRotating_->rotate( + std::move(newBackend), + [&](std::string const& writableName, + std::string const& archiveName) { SavedState savedState; - savedState.writableDb = newBackend->getName(); - savedState.archiveDb = writableBackendName; + savedState.writableDb = writableName; + savedState.archiveDb = archiveName; savedState.lastRotated = lastRotated; state_db_.setState(savedState); clearCaches(validatedSeq); - - return std::move(newBackend); }); JLOG(journal_.warn()) << "finished rotation " << validatedSeq; diff --git a/src/xrpld/nodestore/DatabaseRotating.h b/src/xrpld/nodestore/DatabaseRotating.h index 10f575c4662..3e8c6a7d5f0 100644 --- a/src/xrpld/nodestore/DatabaseRotating.h +++ b/src/xrpld/nodestore/DatabaseRotating.h @@ -44,11 +44,17 @@ class DatabaseRotating : public Database /** Rotates the backends. - @param f A function executed before the rotation and under the same lock + @param newBackend New writable backend + @param f A function executed after the rotation outside of lock. The + values passed to f will be the new backend database names _after_ + rotation. */ virtual void - rotateWithLock(std::function( - std::string const& writableBackendName)> const& f) = 0; + rotate( + std::unique_ptr&& newBackend, + std::function const& f) = 0; }; } // namespace NodeStore diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index 58cc3599dc6..c10b733bcf4 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -41,16 +41,28 @@ DatabaseRotatingImp::DatabaseRotatingImp( } void -DatabaseRotatingImp::rotateWithLock( - std::function( - std::string const& writableBackendName)> const& f) +DatabaseRotatingImp::rotate( + std::unique_ptr&& newBackend, + std::function const& f) { - std::lock_guard lock(mutex_); + std::string newWritableBackendName; + std::string newArchiveBackendName; + std::shared_ptr oldArchiveBackend; + { + std::lock_guard lock(mutex_); + + archiveBackend_->setDeletePath(); + oldArchiveBackend = std::move(archiveBackend_); + + archiveBackend_ = std::move(writableBackend_); + newArchiveBackendName = archiveBackend_->getName(); + writableBackend_ = std::move(newBackend); + newWritableBackendName = writableBackend_->getName(); + } - auto newBackend = f(writableBackend_->getName()); - archiveBackend_->setDeletePath(); - archiveBackend_ = std::move(writableBackend_); - writableBackend_ = std::move(newBackend); + f(newWritableBackendName, newArchiveBackendName); } std::string diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index 5183aa1e2e4..d9f114f5039 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -49,9 +49,11 @@ class DatabaseRotatingImp : public DatabaseRotating } void - rotateWithLock( - std::function( - std::string const& writableBackendName)> const& f) override; + rotate( + std::unique_ptr&& newBackend, + std::function const& f) override; std::string getName() const override; @@ -82,13 +84,7 @@ class DatabaseRotatingImp : public DatabaseRotating private: std::shared_ptr writableBackend_; std::shared_ptr archiveBackend_; - // This needs to be a recursive mutex because callbacks in `rotateWithLock` - // can call function that also lock the mutex. A current example of this is - // a callback from SHAMapStoreImp, which calls `clearCaches`. This - // `clearCaches` call eventually calls `fetchNodeObject` which tries to - // relock the mutex. It would be desirable to rewrite the code so the lock - // was not held during a callback. - mutable std::recursive_mutex mutex_; + mutable std::mutex mutex_; std::shared_ptr fetchNodeObject( From 27cef470499a658aca1fd2275c5f0d4da24b416a Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Thu, 13 Feb 2025 11:10:52 -0500 Subject: [PATCH 12/18] [FOLD] Clean up #include's --- src/test/app/SHAMapStore_test.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/test/app/SHAMapStore_test.cpp b/src/test/app/SHAMapStore_test.cpp index ef70944cd0b..97e5814b0f9 100644 --- a/src/test/app/SHAMapStore_test.cpp +++ b/src/test/app/SHAMapStore_test.cpp @@ -26,8 +26,6 @@ #include #include #include -#include -#include namespace ripple { namespace test { From 4986662b6507d9916fb20710a75015134ad947a7 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Thu, 13 Feb 2025 11:26:10 -0500 Subject: [PATCH 13/18] Make the `rotate()` internals const --- .../nodestore/detail/DatabaseRotatingImp.cpp | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index c10b733bcf4..f739c1b2742 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -47,20 +47,23 @@ DatabaseRotatingImp::rotate( std::string const& writableName, std::string const& archiveName)> const& f) { - std::string newWritableBackendName; - std::string newArchiveBackendName; - std::shared_ptr oldArchiveBackend; - { - std::lock_guard lock(mutex_); - - archiveBackend_->setDeletePath(); - oldArchiveBackend = std::move(archiveBackend_); + auto const + [newWritableBackendName, newArchiveBackendName, oldArchiveBackend] = + [&]() { + std::lock_guard lock(mutex_); - archiveBackend_ = std::move(writableBackend_); - newArchiveBackendName = archiveBackend_->getName(); - writableBackend_ = std::move(newBackend); - newWritableBackendName = writableBackend_->getName(); - } + archiveBackend_->setDeletePath(); + auto const oldArchiveBackend = std::move(archiveBackend_); + + archiveBackend_ = std::move(writableBackend_); + auto const newArchiveBackendName = archiveBackend_->getName(); + writableBackend_ = std::move(newBackend); + auto const newWritableBackendName = writableBackend_->getName(); + return std::tuple{ + newWritableBackendName, + newArchiveBackendName, + oldArchiveBackend}; + }(); f(newWritableBackendName, newArchiveBackendName); } From 01cf3c4ce4ef3f6fd5604ea51c2522f3118c1809 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Thu, 13 Feb 2025 13:13:59 -0500 Subject: [PATCH 14/18] Update src/test/app/SHAMapStore_test.cpp Co-authored-by: Bart --- src/test/app/SHAMapStore_test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/app/SHAMapStore_test.cpp b/src/test/app/SHAMapStore_test.cpp index 97e5814b0f9..5fd3f79c9f5 100644 --- a/src/test/app/SHAMapStore_test.cpp +++ b/src/test/app/SHAMapStore_test.cpp @@ -544,6 +544,7 @@ class SHAMapStore_test : public beast::unit_test::suite backend->open(); return backend; } + void testRotate() { From 3994ae321a38fe2bf0d0096c7bfebd4b74d3bd1d Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Thu, 13 Feb 2025 13:16:41 -0500 Subject: [PATCH 15/18] Revert "Make the `rotate()` internals const" This reverts commit 4986662b6507d9916fb20710a75015134ad947a7. --- .../nodestore/detail/DatabaseRotatingImp.cpp | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index f739c1b2742..c10b733bcf4 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -47,23 +47,20 @@ DatabaseRotatingImp::rotate( std::string const& writableName, std::string const& archiveName)> const& f) { - auto const - [newWritableBackendName, newArchiveBackendName, oldArchiveBackend] = - [&]() { - std::lock_guard lock(mutex_); + std::string newWritableBackendName; + std::string newArchiveBackendName; + std::shared_ptr oldArchiveBackend; + { + std::lock_guard lock(mutex_); - archiveBackend_->setDeletePath(); - auto const oldArchiveBackend = std::move(archiveBackend_); - - archiveBackend_ = std::move(writableBackend_); - auto const newArchiveBackendName = archiveBackend_->getName(); - writableBackend_ = std::move(newBackend); - auto const newWritableBackendName = writableBackend_->getName(); - return std::tuple{ - newWritableBackendName, - newArchiveBackendName, - oldArchiveBackend}; - }(); + archiveBackend_->setDeletePath(); + oldArchiveBackend = std::move(archiveBackend_); + + archiveBackend_ = std::move(writableBackend_); + newArchiveBackendName = archiveBackend_->getName(); + writableBackend_ = std::move(newBackend); + newWritableBackendName = writableBackend_->getName(); + } f(newWritableBackendName, newArchiveBackendName); } From c88668ec3f632cc56127ae11bc2296c7733a92c5 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Thu, 13 Feb 2025 13:22:12 -0500 Subject: [PATCH 16/18] Add comments, some const --- src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index c10b733bcf4..79aadd0cfed 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -47,8 +47,12 @@ DatabaseRotatingImp::rotate( std::string const& writableName, std::string const& archiveName)> const& f) { - std::string newWritableBackendName; + // Pass these two names to the callback function + std::string const newWritableBackendName = newBackEnd->getName(); std::string newArchiveBackendName; + // Hold on to current archive backend pointer until after the + // callback finishes. Only then will the archive directory be + // deleted. std::shared_ptr oldArchiveBackend; { std::lock_guard lock(mutex_); @@ -58,8 +62,8 @@ DatabaseRotatingImp::rotate( archiveBackend_ = std::move(writableBackend_); newArchiveBackendName = archiveBackend_->getName(); + writableBackend_ = std::move(newBackend); - newWritableBackendName = writableBackend_->getName(); } f(newWritableBackendName, newArchiveBackendName); From 0f032cc3ffe08d06aade6c574ea4df4c39ff410e Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Thu, 13 Feb 2025 13:25:50 -0500 Subject: [PATCH 17/18] Fix levelization (again?) --- Builds/levelization/results/ordering.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Builds/levelization/results/ordering.txt b/Builds/levelization/results/ordering.txt index acf8daafb79..681f76dd5db 100644 --- a/Builds/levelization/results/ordering.txt +++ b/Builds/levelization/results/ordering.txt @@ -19,6 +19,7 @@ test.app > xrpl.basics test.app > xrpld.app test.app > xrpld.core test.app > xrpld.ledger +test.app > xrpld.nodestore test.app > xrpld.overlay test.app > xrpld.rpc test.app > xrpl.json From ba94c0e4c9fb43d54c3d6c177cf5dbe07d99cb48 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Thu, 13 Feb 2025 15:06:27 -0500 Subject: [PATCH 18/18] fixup! Add comments, some const --- src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index 79aadd0cfed..c7e6c8c349f 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -48,7 +48,7 @@ DatabaseRotatingImp::rotate( std::string const& archiveName)> const& f) { // Pass these two names to the callback function - std::string const newWritableBackendName = newBackEnd->getName(); + std::string const newWritableBackendName = newBackend->getName(); std::string newArchiveBackendName; // Hold on to current archive backend pointer until after the // callback finishes. Only then will the archive directory be