From caf0ed3ae9ad811e3d92f8a737633e53286a2e48 Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Mon, 9 Dec 2024 16:32:31 -0400 Subject: [PATCH 1/4] Allow the checkpointer to zero the old *-shm pages instead of the writer who does so in the COMMIT block --- sqlitecluster/SQLite.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index e4f63acb2..178ffb035 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -797,7 +797,9 @@ int SQLite::commit(const string& description, function* preCheckpointCal if (_sharedData.outstandingFramesToCheckpoint) { auto start = STimeNow(); int framesCheckpointed = 0; - sqlite3_wal_checkpoint_v2(_db, 0, SQLITE_CHECKPOINT_PASSIVE, NULL, &framesCheckpointed); + sqlite3_busy_timeout(_db, 120'000); // 2 minutes + sqlite3_wal_checkpoint_v2(_db, 0, SQLITE_CHECKPOINT_FULL, NULL, &framesCheckpointed); + sqlite3_busy_timeout(_db, 0); auto end = STimeNow(); SINFO("Checkpointed " << framesCheckpointed << " (total) frames of " << _sharedData.outstandingFramesToCheckpoint << " in " << (end - start) << "us."); From 5acd465856c55587499396deef2979b75d7e1534 Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Mon, 16 Dec 2024 12:47:10 -0400 Subject: [PATCH 2/4] Allow to pass checkpointMode via CLI to make it dynamically configurable --- BedrockServer.cpp | 13 +++++++++---- main.cpp | 2 ++ sqlitecluster/SQLite.cpp | 32 +++++++++++++++++++++++++------- sqlitecluster/SQLite.h | 5 ++++- sqlitecluster/SQLitePool.cpp | 5 +++-- sqlitecluster/SQLitePool.h | 2 +- 6 files changed, 44 insertions(+), 15 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 98e59414f..cbe7d7e9a 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -97,7 +97,7 @@ void BedrockServer::sync() // We use fewer FDs on test machines that have other resource restrictions in place. SINFO("Setting dbPool size to: " << _dbPoolSize); - _dbPool = make_shared(_dbPoolSize, args["-db"], args.calc("-cacheSize"), args.calc("-maxJournalSize"), journalTables, mmapSizeGB, args.isSet("-hctree")); + _dbPool = make_shared(_dbPoolSize, args["-db"], args.calc("-cacheSize"), args.calc("-maxJournalSize"), journalTables, mmapSizeGB, args.isSet("-hctree"), args["-checkpointMode"]); SQLite& db = _dbPool->getBase(); // Initialize the command processor. @@ -358,7 +358,7 @@ void BedrockServer::sync() committingCommand = true; _syncNode->startCommit(SQLiteNode::QUORUM); _lastQuorumCommandTime = STimeNow(); - + // This interrupts the next poll loop immediately. This prevents a 1-second wait when running as a single server. _notifyDoneSync.push(true); SDEBUG("Finished sending distributed transaction for db upgrade."); @@ -1267,6 +1267,11 @@ BedrockServer::BedrockServer(const SData& args_) sort(versions.begin(), versions.end()); _version = SComposeList(versions, ":"); + const set validCheckpointModes = {"PASSIVE", "FULL", "RESTART", "TRUNCATE"}; + if (validCheckpointModes.find(args["-checkpointMode"]) == validCheckpointModes.end()) { + SERROR("Invalid checkpoint mode " << args["-checkpointMode"]); + } + list pluginString; for (auto& p : plugins) { pluginString.emplace_back(p.first); @@ -1695,14 +1700,14 @@ void BedrockServer::_status(unique_ptr& command) { size_t totalCount = 0; for (const auto& s : _crashCommands) { totalCount += s.second.size(); - + vector paramsArray; for (const STable& params : s.second) { if (!params.empty()) { paramsArray.push_back(SComposeJSONObject(params)); } } - + STable commandObject; commandObject[s.first] = SComposeJSONArray(paramsArray); crashCommandListArray.push_back(SComposeJSONObject(commandObject)); diff --git a/main.cpp b/main.cpp index 240a1da88..160f4262d 100644 --- a/main.cpp +++ b/main.cpp @@ -236,6 +236,7 @@ int main(int argc, char* argv[]) { << endl; cout << "-maxJournalSize <#commits> Number of commits to retain in the historical journal (default 1000000)" << endl; + cout << "-checkpointMode Accepts PASSIVE|FULL|RESTART|TRUNCATE, which is the value passed to https://www.sqlite.org/c3ref/wal_checkpoint_v2.html" << endl; cout << endl; cout << "Quick Start Tips:" << endl; cout << "-----------------" << endl; @@ -299,6 +300,7 @@ int main(int argc, char* argv[]) { SETDEFAULT("-maxJournalSize", "1000000"); SETDEFAULT("-queryLog", "queryLog.csv"); SETDEFAULT("-enableMultiWrite", "true"); + SETDEFAULT("-checkpointMode", "PASSIVE"); args["-plugins"] = SComposeList(loadPlugins(args)); diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 2a5f4e4e0..0f81a685b 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -223,7 +223,7 @@ void SQLite::commonConstructorInitialization(bool hctree) { } SQLite::SQLite(const string& filename, int cacheSize, int maxJournalSize, - int minJournalTables, int64_t mmapSizeGB, bool hctree) : + int minJournalTables, int64_t mmapSizeGB, bool hctree, const string& checkpointMode) : _filename(initializeFilename(filename)), _maxJournalSize(maxJournalSize), _db(initializeDB(_filename, mmapSizeGB, hctree)), @@ -231,7 +231,8 @@ SQLite::SQLite(const string& filename, int cacheSize, int maxJournalSize, _sharedData(initializeSharedData(_db, _filename, _journalNames, hctree)), _journalSize(initializeJournalSize(_db, _journalNames)), _cacheSize(cacheSize), - _mmapSizeGB(mmapSizeGB) + _mmapSizeGB(mmapSizeGB), + _checkpointMode(checkpointMode) { commonConstructorInitialization(hctree); } @@ -244,7 +245,8 @@ SQLite::SQLite(const SQLite& from) : _sharedData(from._sharedData), _journalSize(from._journalSize), _cacheSize(from._cacheSize), - _mmapSizeGB(from._mmapSizeGB) + _mmapSizeGB(from._mmapSizeGB), + _checkpointMode(from._checkpointMode) { // This can always pass "true" because the copy constructor does not need to set the DB to WAL2 mode, it would have been set in the object being copied. commonConstructorInitialization(true); @@ -819,11 +821,27 @@ int SQLite::commit(const string& description, function* preCheckpointCal if (_sharedData.outstandingFramesToCheckpoint) { auto start = STimeNow(); int framesCheckpointed = 0; - sqlite3_busy_timeout(_db, 120'000); // 2 minutes - sqlite3_wal_checkpoint_v2(_db, 0, SQLITE_CHECKPOINT_FULL, NULL, &framesCheckpointed); - sqlite3_busy_timeout(_db, 0); + + // We default to PASSIVE checkpoint everywhere as that has been the value proven to work fine for many years. + if (_checkpointMode != "PASSIVE") { + int checkpointMode = SQLITE_CHECKPOINT_PASSIVE; + if (_checkpointMode == "FULL") { + checkpointMode = SQLITE_CHECKPOINT_FULL; + } else if (_checkpointMode == "RESTART") { + checkpointMode = SQLITE_CHECKPOINT_RESTART; + } else if (_checkpointMode == "TRUNCATE") { + checkpointMode = SQLITE_CHECKPOINT_TRUNCATE; + } + // For non-passive checkpoints, we must set a busy timeout in order to wait on any readers. + // We set it to 2 minutes as the majority of transactions should take less than that. + sqlite3_busy_timeout(_db, 120'000); + sqlite3_wal_checkpoint_v2(_db, 0, checkpointMode, NULL, &framesCheckpointed); + sqlite3_busy_timeout(_db, 0); + } else { + sqlite3_wal_checkpoint_v2(_db, 0, SQLITE_CHECKPOINT_PASSIVE, NULL, &framesCheckpointed); + } auto end = STimeNow(); - SINFO("Checkpointed " << framesCheckpointed << " (total) frames of " << _sharedData.outstandingFramesToCheckpoint << " in " << (end - start) << "us."); + SINFO(_checkpointMode << " checkpoint complete with " << framesCheckpointed << " frames checkpointed of " << _sharedData.outstandingFramesToCheckpoint << " frames outstanding in " << (end - start) << "us."); // It might not actually be 0, but we'll just let sqlite tell us what it is next time _walHookCallback runs. _sharedData.outstandingFramesToCheckpoint = 0; diff --git a/sqlitecluster/SQLite.h b/sqlitecluster/SQLite.h index 648d0b1a7..e11e91e3c 100644 --- a/sqlitecluster/SQLite.h +++ b/sqlitecluster/SQLite.h @@ -57,7 +57,7 @@ class SQLite { // // mmapSizeGB: address space to use for memory-mapped IO, in GB. SQLite(const string& filename, int cacheSize, int maxJournalSize, int minJournalTables, - int64_t mmapSizeGB = 0, bool hctree = false); + int64_t mmapSizeGB = 0, bool hctree = false, const string& checkpointMode = "PASSIVE"); // This constructor is not exactly a copy constructor. It creates an other SQLite object based on the first except // with a *different* journal table. This avoids a lot of locking around creating structures that we know already @@ -529,4 +529,7 @@ class SQLite { // Set to true inside of a write query. bool _currentlyWriting{false}; + + // One of PASSIVE|FULL|RESTART|TRUNCATE, translated to corresponding values to be passed to sqlite3_wal_checkpoint_v2. + string _checkpointMode; }; diff --git a/sqlitecluster/SQLitePool.cpp b/sqlitecluster/SQLitePool.cpp index ca3d7a4ad..75d1a31f2 100644 --- a/sqlitecluster/SQLitePool.cpp +++ b/sqlitecluster/SQLitePool.cpp @@ -8,9 +8,10 @@ SQLitePool::SQLitePool(size_t maxDBs, int maxJournalSize, int minJournalTables, int64_t mmapSizeGB, - bool hctree) + bool hctree, + const string& checkpointMode) : _maxDBs(max(maxDBs, 1ul)), - _baseDB(filename, cacheSize, maxJournalSize, minJournalTables, mmapSizeGB, hctree), + _baseDB(filename, cacheSize, maxJournalSize, minJournalTables, mmapSizeGB, hctree, checkpointMode), _objects(_maxDBs, nullptr) { } diff --git a/sqlitecluster/SQLitePool.h b/sqlitecluster/SQLitePool.h index 8cbc6c92e..cf12e38e2 100644 --- a/sqlitecluster/SQLitePool.h +++ b/sqlitecluster/SQLitePool.h @@ -7,7 +7,7 @@ class SQLitePool { public: // Create a pool of DB handles. SQLitePool(size_t maxDBs, const string& filename, int cacheSize, int maxJournalSize, int minJournalTables, - int64_t mmapSizeGB = 0, bool hctree = false); + int64_t mmapSizeGB = 0, bool hctree = false, const string& checkpointMode = "PASSIVE"); ~SQLitePool(); // Get the base object (the first one created, which uses the `journal` table). Note that if called by multiple From 451441baf934dcf271c0fdaa1ac1652dabad3b6d Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Mon, 16 Dec 2024 16:50:31 -0400 Subject: [PATCH 3/4] Store _checkpointMode as int --- BedrockServer.cpp | 5 ----- main.cpp | 2 ++ sqlitecluster/SQLite.cpp | 38 +++++++++++++++++++++----------------- sqlitecluster/SQLite.h | 5 +++-- 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index cbe7d7e9a..be9297eab 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -1267,11 +1267,6 @@ BedrockServer::BedrockServer(const SData& args_) sort(versions.begin(), versions.end()); _version = SComposeList(versions, ":"); - const set validCheckpointModes = {"PASSIVE", "FULL", "RESTART", "TRUNCATE"}; - if (validCheckpointModes.find(args["-checkpointMode"]) == validCheckpointModes.end()) { - SERROR("Invalid checkpoint mode " << args["-checkpointMode"]); - } - list pluginString; for (auto& p : plugins) { pluginString.emplace_back(p.first); diff --git a/main.cpp b/main.cpp index 160f4262d..e318e70ca 100644 --- a/main.cpp +++ b/main.cpp @@ -300,6 +300,8 @@ int main(int argc, char* argv[]) { SETDEFAULT("-maxJournalSize", "1000000"); SETDEFAULT("-queryLog", "queryLog.csv"); SETDEFAULT("-enableMultiWrite", "true"); + + // We default to PASSIVE checkpoint everywhere as that has been the value proven to work fine for many years. SETDEFAULT("-checkpointMode", "PASSIVE"); args["-plugins"] = SComposeList(loadPlugins(args)); diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 0f81a685b..3a8d0f9c2 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -232,7 +232,7 @@ SQLite::SQLite(const string& filename, int cacheSize, int maxJournalSize, _journalSize(initializeJournalSize(_db, _journalNames)), _cacheSize(cacheSize), _mmapSizeGB(mmapSizeGB), - _checkpointMode(checkpointMode) + _checkpointMode(getCheckpointModeFromString(checkpointMode)) { commonConstructorInitialization(hctree); } @@ -822,24 +822,12 @@ int SQLite::commit(const string& description, function* preCheckpointCal auto start = STimeNow(); int framesCheckpointed = 0; - // We default to PASSIVE checkpoint everywhere as that has been the value proven to work fine for many years. - if (_checkpointMode != "PASSIVE") { - int checkpointMode = SQLITE_CHECKPOINT_PASSIVE; - if (_checkpointMode == "FULL") { - checkpointMode = SQLITE_CHECKPOINT_FULL; - } else if (_checkpointMode == "RESTART") { - checkpointMode = SQLITE_CHECKPOINT_RESTART; - } else if (_checkpointMode == "TRUNCATE") { - checkpointMode = SQLITE_CHECKPOINT_TRUNCATE; - } - // For non-passive checkpoints, we must set a busy timeout in order to wait on any readers. - // We set it to 2 minutes as the majority of transactions should take less than that. + // For non-passive checkpoints, we must set a busy timeout in order to wait on any readers. + // We set it to 2 minutes as the majority of transactions should take less than that. + if (_checkpointMode != SQLITE_CHECKPOINT_PASSIVE) { sqlite3_busy_timeout(_db, 120'000); - sqlite3_wal_checkpoint_v2(_db, 0, checkpointMode, NULL, &framesCheckpointed); - sqlite3_busy_timeout(_db, 0); - } else { - sqlite3_wal_checkpoint_v2(_db, 0, SQLITE_CHECKPOINT_PASSIVE, NULL, &framesCheckpointed); } + sqlite3_wal_checkpoint_v2(_db, 0, _checkpointMode, NULL, &framesCheckpointed); auto end = STimeNow(); SINFO(_checkpointMode << " checkpoint complete with " << framesCheckpointed << " frames checkpointed of " << _sharedData.outstandingFramesToCheckpoint << " frames outstanding in " << (end - start) << "us."); @@ -865,6 +853,22 @@ int SQLite::commit(const string& description, function* preCheckpointCal return result; } +int SQLite::getCheckpointModeFromString(const string& checkpointModeString) { + if (checkpointModeString == "PASSIVE") { + return SQLITE_CHECKPOINT_PASSIVE; + } + if (checkpointModeString == "FULL") { + return SQLITE_CHECKPOINT_FULL; + } + if (checkpointModeString == "RESTART") { + return SQLITE_CHECKPOINT_RESTART; + } + if (checkpointModeString == "TRUNCATE") { + return SQLITE_CHECKPOINT_TRUNCATE; + } + SERROR("Invalid checkpoint type: " << checkpointModeString); +} + map> SQLite::popCommittedTransactions() { return _sharedData.popCommittedTransactions(); } diff --git a/sqlitecluster/SQLite.h b/sqlitecluster/SQLite.h index e11e91e3c..81254f2c2 100644 --- a/sqlitecluster/SQLite.h +++ b/sqlitecluster/SQLite.h @@ -356,6 +356,7 @@ class SQLite { static vector initializeJournal(sqlite3* db, int minJournalTables); static uint64_t initializeJournalSize(sqlite3* db, const vector& journalNames); void commonConstructorInitialization(bool hctree = false); + static int getCheckpointModeFromString(const string& checkpointModeString); // The filename of this DB, canonicalized to its full path on disk. const string _filename; @@ -530,6 +531,6 @@ class SQLite { // Set to true inside of a write query. bool _currentlyWriting{false}; - // One of PASSIVE|FULL|RESTART|TRUNCATE, translated to corresponding values to be passed to sqlite3_wal_checkpoint_v2. - string _checkpointMode; + // One of 0|1|2|3 (a.k.a. PASSIVE|FULL|RESTART|TRUNCATE), which is the value to be passed to sqlite3_wal_checkpoint_v2. + int _checkpointMode; }; From 3df5fdfe869919ea2f5b7b68f5368bc8dc6c7cb9 Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Tue, 17 Dec 2024 12:37:25 -0400 Subject: [PATCH 4/4] Move sqlite3_busy_timeout to commonConstructorInitialization --- sqlitecluster/SQLite.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 3a8d0f9c2..3a831e409 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -220,6 +220,12 @@ void SQLite::commonConstructorInitialization(bool hctree) { // Always set synchronous commits to off for best commit performance in WAL mode. SASSERT(!SQuery(_db, "setting synchronous commits to off", "PRAGMA synchronous = OFF;")); + + // For non-passive checkpoints, we must set a busy timeout in order to wait on any readers. + // We set it to 2 minutes as the majority of transactions should take less than that. + if (_checkpointMode != SQLITE_CHECKPOINT_PASSIVE) { + sqlite3_busy_timeout(_db, 120'000); + } } SQLite::SQLite(const string& filename, int cacheSize, int maxJournalSize, @@ -821,15 +827,9 @@ int SQLite::commit(const string& description, function* preCheckpointCal if (_sharedData.outstandingFramesToCheckpoint) { auto start = STimeNow(); int framesCheckpointed = 0; - - // For non-passive checkpoints, we must set a busy timeout in order to wait on any readers. - // We set it to 2 minutes as the majority of transactions should take less than that. - if (_checkpointMode != SQLITE_CHECKPOINT_PASSIVE) { - sqlite3_busy_timeout(_db, 120'000); - } sqlite3_wal_checkpoint_v2(_db, 0, _checkpointMode, NULL, &framesCheckpointed); auto end = STimeNow(); - SINFO(_checkpointMode << " checkpoint complete with " << framesCheckpointed << " frames checkpointed of " << _sharedData.outstandingFramesToCheckpoint << " frames outstanding in " << (end - start) << "us."); + SINFO("Checkpoint with type=" << _checkpointMode << " complete with " << framesCheckpointed << " frames checkpointed of " << _sharedData.outstandingFramesToCheckpoint << " frames outstanding in " << (end - start) << "us."); // It might not actually be 0, but we'll just let sqlite tell us what it is next time _walHookCallback runs. _sharedData.outstandingFramesToCheckpoint = 0;