diff --git a/fdbbackup/CMakeLists.txt b/fdbbackup/CMakeLists.txt index 6b91e0b16fa..5ae128fb774 100644 --- a/fdbbackup/CMakeLists.txt +++ b/fdbbackup/CMakeLists.txt @@ -37,11 +37,11 @@ if(NOT OPEN_FOR_IDE) symlink_files( LOCATION packages/bin SOURCE fdbbackup - TARGETS fdbdr dr_agent backup_agent fdbrestore fastrestore_tool) + TARGETS fdbdr dr_agent backup_agent fdbrestore) symlink_files( LOCATION bin SOURCE fdbbackup - TARGETS fdbdr dr_agent backup_agent fdbrestore fastrestore_tool) + TARGETS fdbdr dr_agent backup_agent fdbrestore) endif() if (GPERFTOOLS_FOUND) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index e703e3d7bb3..274a6ed213a 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -82,7 +82,7 @@ #include "flow/actorcompiler.h" // This must be the last #include. // Type of program being executed -enum class ProgramExe { AGENT, BACKUP, RESTORE, FASTRESTORE_TOOL, DR_AGENT, DB_BACKUP, UNDEFINED }; +enum class ProgramExe { AGENT, BACKUP, RESTORE, DR_AGENT, DB_BACKUP, UNDEFINED }; enum class BackupType { UNDEFINED = 0, @@ -695,7 +695,7 @@ CSimpleOpt::SOption g_rgBackupQueryOptions[] = { SO_END_OF_OPTIONS }; -// g_rgRestoreOptions is used by fdbrestore and fastrestore_tool +// g_rgRestoreOptions is used by fdbrestore CSimpleOpt::SOption g_rgRestoreOptions[] = { #ifdef _WIN32 { OPT_PARENTPID, "--parentpid", SO_REQ_SEP }, @@ -931,7 +931,6 @@ CSimpleOpt::SOption g_rgDBPauseOptions[] = { const KeyRef exeAgent = "backup_agent"_sr; const KeyRef exeBackup = "fdbbackup"_sr; const KeyRef exeRestore = "fdbrestore"_sr; -const KeyRef exeFastRestoreTool = "fastrestore_tool"_sr; // must be lower case const KeyRef exeDatabaseAgent = "dr_agent"_sr; const KeyRef exeDatabaseBackup = "fdbdr"_sr; @@ -1253,14 +1252,6 @@ static void printRestoreUsage(bool devhelp) { return; } -static void printFastRestoreUsage(bool devhelp) { - printf(" NOTE: Fast restore aims to support the same fdbrestore option list.\n"); - printf(" But fast restore is still under development. The options may not be fully supported.\n"); - printf(" Supported options are: --dest-cluster-file, -r, --waitfordone, --logdir\n"); - printRestoreUsage(devhelp); - return; -} - static void printDBAgentUsage(bool devhelp) { printf("FoundationDB " FDB_VT_PACKAGE_NAME " (v" FDB_VT_VERSION ")\n"); printf("Usage: %s [OPTIONS]\n\n", exeDatabaseAgent.toString().c_str()); @@ -1363,9 +1354,6 @@ static void printUsage(ProgramExe programExe, bool devhelp) { case ProgramExe::RESTORE: printRestoreUsage(devhelp); break; - case ProgramExe::FASTRESTORE_TOOL: - printFastRestoreUsage(devhelp); - break; case ProgramExe::DR_AGENT: printDBAgentUsage(devhelp); break; @@ -1426,14 +1414,6 @@ ProgramExe getProgramType(std::string programExe) { enProgramExe = ProgramExe::RESTORE; } - // Check if restore - else if ((programExe.length() >= exeFastRestoreTool.size()) && - (programExe.compare(programExe.length() - exeFastRestoreTool.size(), - exeFastRestoreTool.size(), - (const char*)exeFastRestoreTool.begin()) == 0)) { - enProgramExe = ProgramExe::FASTRESTORE_TOOL; - } - // Check if db agent else if ((programExe.length() >= exeDatabaseAgent.size()) && (programExe.compare(programExe.length() - exeDatabaseAgent.size(), @@ -2463,121 +2443,6 @@ ACTOR Future runRestore(Database db, return Void(); } -// Fast restore agent that kicks off the restore: send restore requests to restore workers. -ACTOR Future runFastRestoreTool(Database db, - std::string tagName, - std::string container, - Optional proxy, - Standalone> ranges, - Version dbVersion, - bool performRestore, - Verbose verbose, - WaitForComplete waitForDone) { - try { - state FileBackupAgent backupAgent; - state Version restoreVersion = invalidVersion; - - if (ranges.size() > 1) { - fprintf(stdout, "[WARNING] Currently only a single restore range is tested!\n"); - } - - if (ranges.size() == 0) { - ranges.push_back(ranges.arena(), normalKeys); - } - - printf("[INFO] runFastRestoreTool: restore_ranges:%d first range:%s\n", - ranges.size(), - ranges.front().toString().c_str()); - TraceEvent ev("FastRestoreTool"); - ev.detail("RestoreRanges", ranges.size()); - for (int i = 0; i < ranges.size(); ++i) { - ev.detail(format("Range%d", i), ranges[i]); - } - - if (performRestore) { - if (dbVersion == invalidVersion) { - TraceEvent("FastRestoreTool").detail("TargetRestoreVersion", "Largest restorable version"); - BackupDescription desc = wait(IBackupContainer::openContainer(container, proxy, {})->describeBackup()); - if (!desc.maxRestorableVersion.present()) { - fprintf(stderr, "The specified backup is not restorable to any version.\n"); - throw restore_error(); - } - - dbVersion = desc.maxRestorableVersion.get(); - TraceEvent("FastRestoreTool").detail("TargetRestoreVersion", dbVersion); - } - state UID randomUID = deterministicRandom()->randomUniqueID(); - TraceEvent("FastRestoreTool") - .detail("SubmitRestoreRequests", ranges.size()) - .detail("RestoreUID", randomUID); - wait(backupAgent.submitParallelRestore(db, - KeyRef(tagName), - ranges, - KeyRef(container), - proxy, - dbVersion, - LockDB::True, - randomUID, - ""_sr, - ""_sr)); - // TODO: Support addPrefix and removePrefix - if (waitForDone) { - // Wait for parallel restore to finish and unlock DB after that - TraceEvent("FastRestoreTool").detail("BackupAndParallelRestore", "WaitForRestoreToFinish"); - wait(backupAgent.parallelRestoreFinish(db, randomUID)); - TraceEvent("FastRestoreTool").detail("BackupAndParallelRestore", "RestoreFinished"); - } else { - TraceEvent("FastRestoreTool") - .detail("RestoreUID", randomUID) - .detail("OperationGuide", "Manually unlock DB when restore finishes"); - printf("WARNING: DB will be in locked state after restore. Need UID:%s to unlock DB\n", - randomUID.toString().c_str()); - } - - restoreVersion = dbVersion; - } else { - state Reference bc = IBackupContainer::openContainer(container, proxy, {}); - state BackupDescription description = wait(bc->describeBackup()); - - if (dbVersion <= 0) { - wait(description.resolveVersionTimes(db)); - if (description.maxRestorableVersion.present()) - restoreVersion = description.maxRestorableVersion.get(); - else { - fprintf(stderr, "Backup is not restorable\n"); - throw restore_invalid_version(); - } - } else { - restoreVersion = dbVersion; - } - - state Optional rset = wait(bc->getRestoreSet(restoreVersion)); - if (!rset.present()) { - fmt::print(stderr, "Insufficient data to restore to version {}\n", restoreVersion); - throw restore_invalid_version(); - } - - // Display the restore information, if requested - if (verbose) { - fmt::print("[DRY RUN] Restoring backup to version: {}\n", restoreVersion); - fmt::print("{}\n", description.toString()); - } - } - - if (waitForDone && verbose) { - // If restore completed then report version restored - fmt::print("Restored to version {0}{1}\n", restoreVersion, (performRestore) ? "" : " (DRY RUN)"); - } - } catch (Error& e) { - if (e.code() == error_code_actor_cancelled) - throw; - fprintf(stderr, "ERROR: %s\n", e.what()); - throw; - } - - return Void(); -} - ACTOR Future dumpBackupData(const char* name, std::string destinationContainer, Optional proxy, @@ -3467,20 +3332,6 @@ int main(int argc, char* argv[]) { argc - 1, argv + 1, g_rgRestoreOptions, SO_O_EXACT | SO_O_HYPHEN_TO_UNDERSCORE); } break; - case ProgramExe::FASTRESTORE_TOOL: - if (argc < 2) { - printFastRestoreUsage(false); - return FDB_EXIT_ERROR; - } - // Get the restore operation type - restoreType = getRestoreType(argv[1]); - if (restoreType == RestoreType::UNKNOWN) { - args = std::make_unique(argc, argv, g_rgOptions, SO_O_EXACT | SO_O_HYPHEN_TO_UNDERSCORE); - } else { - args = std::make_unique( - argc - 1, argv + 1, g_rgRestoreOptions, SO_O_EXACT | SO_O_HYPHEN_TO_UNDERSCORE); - } - break; case ProgramExe::UNDEFINED: default: fprintf(stderr, "FoundationDB " FDB_VT_PACKAGE_NAME " (v" FDB_VT_VERSION ")\n"); @@ -4036,13 +3887,6 @@ int main(int argc, char* argv[]) { return FDB_EXIT_ERROR; break; - case ProgramExe::FASTRESTORE_TOOL: - fprintf( - stderr, "ERROR: FDB Fast Restore Tool does not support argument value `%s'\n", args->File(argLoop)); - printHelpTeaser(argv[0]); - return FDB_EXIT_ERROR; - break; - case ProgramExe::DR_AGENT: fprintf(stderr, "ERROR: DR Agent does not support argument value `%s'\n", args->File(argLoop)); printHelpTeaser(argv[0]); @@ -4197,18 +4041,10 @@ int main(int argc, char* argv[]) { return result.present(); }; - // The fastrestore tool does not yet support multiple ranges and is incompatible with tenants - // or other features that back up data in the system keys - if (!restoreSystemKeys && !restoreUserKeys && backupKeys.empty() && - programExe != ProgramExe::FASTRESTORE_TOOL) { + if (!restoreSystemKeys && !restoreUserKeys && backupKeys.empty()) { addDefaultBackupRanges(backupKeys); } - if ((restoreSystemKeys || restoreUserKeys) && programExe == ProgramExe::FASTRESTORE_TOOL) { - fprintf(stderr, "ERROR: Options: --user-data and --system-metadata are not supported with fastrestore\n"); - return FDB_EXIT_ERROR; - } - if ((restoreUserKeys || restoreSystemKeys) && !backupKeys.empty()) { fprintf(stderr, "ERROR: Cannot specify additional ranges when using --user-data or --system-metadata " @@ -4464,81 +4300,6 @@ int main(int argc, char* argv[]) { throw restore_error(); } break; - case ProgramExe::FASTRESTORE_TOOL: - // Support --dest-cluster-file option as fdbrestore does - if (dryRun) { - if (restoreType != RestoreType::START) { - fprintf(stderr, "Restore dry run only works for 'start' command\n"); - return FDB_EXIT_ERROR; - } - - // Must explicitly call trace file options handling if not calling Database::createDatabase() - initTraceFile(); - } else { - if (restoreClusterFileDest.empty()) { - fprintf(stderr, "Restore destination cluster file must be specified explicitly.\n"); - return FDB_EXIT_ERROR; - } - - if (!fileExists(restoreClusterFileDest)) { - fprintf(stderr, - "Restore destination cluster file '%s' does not exist.\n", - restoreClusterFileDest.c_str()); - return FDB_EXIT_ERROR; - } - - try { - db = Database::createDatabase(restoreClusterFileDest, ApiVersion::LATEST_VERSION); - } catch (Error& e) { - fprintf(stderr, - "Restore destination cluster file '%s' invalid: %s\n", - restoreClusterFileDest.c_str(), - e.what()); - return FDB_EXIT_ERROR; - } - } - // TODO: We have not implemented the code commented out in this case - switch (restoreType) { - case RestoreType::START: - f = stopAfter(runFastRestoreTool(db, - tagName, - restoreContainer, - proxy, - backupKeys, - restoreVersion, - !dryRun, - Verbose{ !quietDisplay }, - waitForDone)); - break; - case RestoreType::WAIT: - printf("[TODO][ERROR] FastRestore does not support RESTORE_WAIT yet!\n"); - throw restore_error(); - // f = stopAfter( success(ba.waitRestore(db, KeyRef(tagName), true)) ); - break; - case RestoreType::ABORT: - printf("[TODO][ERROR] FastRestore does not support RESTORE_ABORT yet!\n"); - throw restore_error(); - // f = stopAfter( map(ba.abortRestore(db, KeyRef(tagName)), - //[tagName](FileBackupAgent::ERestoreState s) -> Void { printf("Tag: %s State: - //%s\n", tagName.c_str(), - // FileBackupAgent::restoreStateText(s).toString().c_str()); return Void(); - // }) ); - break; - case RestoreType::STATUS: - printf("[TODO][ERROR] FastRestore does not support RESTORE_STATUS yet!\n"); - throw restore_error(); - // If no tag is specifically provided then print all tag status, don't just use "default" - if (tagProvided) - tag = tagName; - // f = stopAfter( map(ba.restoreStatus(db, KeyRef(tag)), [](std::string s) -> Void { - // printf("%s\n", s.c_str()); - // return Void(); - // }) ); - break; - default: - throw restore_error(); - } - break; case ProgramExe::DR_AGENT: if (!initCluster() || !initSourceCluster(true)) { return FDB_EXIT_ERROR; diff --git a/fdbclient/ClientKnobs.cpp b/fdbclient/ClientKnobs.cpp index 8304d2786d0..ea1ba1268b1 100644 --- a/fdbclient/ClientKnobs.cpp +++ b/fdbclient/ClientKnobs.cpp @@ -195,7 +195,6 @@ void ClientKnobs::initialize(Randomize randomize) { init( BACKUP_STATUS_DELAY, 40.0 ); init( BACKUP_STATUS_JITTER, 0.05 ); init( MIN_CLEANUP_SECONDS, 3600.0 ); - init( FASTRESTORE_ATOMICOP_WEIGHT, 1 ); if( randomize && BUGGIFY ) { FASTRESTORE_ATOMICOP_WEIGHT = deterministicRandom()->random01() * 200 + 1; } init( RESTORE_RANGES_READ_BATCH, 10000 ); init( BLOB_GRANULE_RESTORE_CHECK_INTERVAL, 10 ); init( BACKUP_CONTAINER_LOCAL_ALLOW_RELATIVE_PATH, false ); diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index f642ac44c14..4e135702217 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -6517,148 +6517,6 @@ class FileBackupAgentImpl { public: static constexpr int MAX_RESTORABLE_FILE_METASECTION_BYTES = 1024 * 8; - // Parallel restore - ACTOR static Future parallelRestoreFinish(Database cx, UID randomUID, UnlockDB unlockDB = UnlockDB::True) { - state ReadYourWritesTransaction tr(cx); - state Optional restoreRequestDoneKeyValue; - TraceEvent("FastRestoreToolWaitForRestoreToFinish").detail("DBLock", randomUID); - // TODO: register watch first and then check if the key exist - loop { - try { - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - Optional _restoreRequestDoneKeyValue = wait(tr.get(restoreRequestDoneKey)); - restoreRequestDoneKeyValue = _restoreRequestDoneKeyValue; - // Restore may finish before restoreTool waits on the restore finish event. - if (restoreRequestDoneKeyValue.present()) { - break; - } else { - state Future watchForRestoreRequestDone = tr.watch(restoreRequestDoneKey); - wait(tr.commit()); - wait(watchForRestoreRequestDone); - break; - } - } catch (Error& e) { - wait(tr.onError(e)); - } - } - - TraceEvent("FastRestoreToolRestoreFinished") - .detail("ClearRestoreRequestDoneKey", restoreRequestDoneKeyValue.present()); - // Only this agent can clear the restoreRequestDoneKey - wait(runRYWTransaction(cx, [](Reference tr) -> Future { - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - tr->clear(restoreRequestDoneKey); - return Void(); - })); - - if (unlockDB) { - TraceEvent("FastRestoreToolRestoreFinished").detail("UnlockDBStart", randomUID); - wait(unlockDatabase(cx, randomUID)); - TraceEvent("FastRestoreToolRestoreFinished").detail("UnlockDBFinish", randomUID); - } else { - TraceEvent("FastRestoreToolRestoreFinished").detail("DBLeftLockedAfterRestore", randomUID); - } - - return Void(); - } - - ACTOR static Future submitParallelRestore(Database cx, - Key backupTag, - Standalone> backupRanges, - Key bcUrl, - Optional proxy, - Version targetVersion, - LockDB lockDB, - UID randomUID, - Key addPrefix, - Key removePrefix) { - // Sanity check backup is valid - state Reference bc = IBackupContainer::openContainer(bcUrl.toString(), proxy, {}); - state BackupDescription desc = wait(bc->describeBackup()); - wait(desc.resolveVersionTimes(cx)); - - if (targetVersion == invalidVersion && desc.maxRestorableVersion.present()) { - targetVersion = desc.maxRestorableVersion.get(); - TraceEvent(SevWarn, "FastRestoreSubmitRestoreRequestWithInvalidTargetVersion") - .detail("OverrideTargetVersion", targetVersion); - } - - Optional restoreSet = wait(bc->getRestoreSet(targetVersion)); - - if (!restoreSet.present()) { - TraceEvent(SevWarn, "FileBackupAgentRestoreNotPossible") - .detail("BackupContainer", bc->getURL()) - .detail("TargetVersion", targetVersion); - throw restore_invalid_version(); - } - - TraceEvent("FastRestoreSubmitRestoreRequest") - .detail("BackupDesc", desc.toString()) - .detail("TargetVersion", targetVersion); - - state Reference tr(new ReadYourWritesTransaction(cx)); - state int restoreIndex = 0; - state int numTries = 0; - // lock DB for restore - loop { - try { - if (lockDB) { - wait(lockDatabase(cx, randomUID)); - } - wait(checkDatabaseLock(tr, randomUID)); - - TraceEvent("FastRestoreToolSubmitRestoreRequests").detail("DBIsLocked", randomUID); - break; - } catch (Error& e) { - TraceEvent(numTries > 50 ? SevError : SevInfo, "FastRestoreToolSubmitRestoreRequestsMayFail") - .error(e) - .detail("Reason", "DB is not properly locked") - .detail("ExpectedLockID", randomUID); - numTries++; - wait(tr->onError(e)); - } - } - - // set up restore request - tr->reset(); - numTries = 0; - loop { - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - try { - // Note: we always lock DB here in case DB is modified at the bacupRanges boundary. - for (restoreIndex = 0; restoreIndex < backupRanges.size(); restoreIndex++) { - auto range = backupRanges[restoreIndex]; - Standalone restoreTag(backupTag.toString() + "_" + std::to_string(restoreIndex)); - // Register the request request in DB, which will be picked up by restore worker leader - struct RestoreRequest restoreRequest(restoreIndex, - restoreTag, - bcUrl, - proxy, - targetVersion, - range, - deterministicRandom()->randomUniqueID(), - addPrefix, - removePrefix); - tr->set(restoreRequestKeyFor(restoreRequest.index), restoreRequestValue(restoreRequest)); - } - tr->set(restoreRequestTriggerKey, - restoreRequestTriggerValue(deterministicRandom()->randomUniqueID(), backupRanges.size())); - wait(tr->commit()); // Trigger restore - break; - } catch (Error& e) { - TraceEvent(numTries > 50 ? SevError : SevInfo, "FastRestoreToolSubmitRestoreRequestsRetry") - .error(e) - .detail("RestoreIndex", restoreIndex); - numTries++; - wait(tr->onError(e)); - } - } - return Void(); - } - // This method will return the final status of the backup at tag, and return the URL that was used on the tag // when that status value was read. ACTOR static Future waitBackup(FileBackupAgent* backupAgent, @@ -7782,26 +7640,7 @@ class FileBackupAgentImpl { bc = fileBackup::getBackupContainerWithProxy(bc); if (fastRestore) { - TraceEvent("AtomicParallelRestoreStartRestore").log(); - Version targetVersion = ::invalidVersion; - wait(submitParallelRestore(cx, - tagName, - ranges, - KeyRef(bc->getURL()), - bc->getProxy(), - targetVersion, - LockDB::True, - randomUid, - addPrefix, - removePrefix)); - state bool hasPrefix = (addPrefix.size() > 0 || removePrefix.size() > 0); - TraceEvent("AtomicParallelRestoreWaitForRestoreFinish").detail("HasPrefix", hasPrefix); - wait(parallelRestoreFinish(cx, randomUid, UnlockDB{ !hasPrefix })); - // If addPrefix or removePrefix set, we want to transform the effect by copying data - if (hasPrefix) { - wait(transformRestoredDatabase(cx, ranges, addPrefix, removePrefix)); - wait(unlockDatabase(cx, randomUid)); - } + ASSERT(false); // fastRestore deprecated return -1; } else { TraceEvent("AS_StartRestore").log(); @@ -7883,50 +7722,10 @@ class FileBackupAgentImpl { return ver; } } - - // Similar to atomicRestore, only used in simulation test. - // locks the database before discontinuing the backup and that same lock is then used while doing the restore. - // the tagname of the backup must be the same as the restore. - static Future atomicParallelRestore(FileBackupAgent* backupAgent, - Database cx, - Key tagName, - Standalone> ranges, - Key addPrefix, - Key removePrefix) { - return success( - atomicRestore(backupAgent, cx, tagName, ranges, addPrefix, removePrefix, UsePartitionedLog::True)); - } }; const int FileBackupAgent::dataFooterSize = 20; -// Return if parallel restore has finished -Future FileBackupAgent::parallelRestoreFinish(Database cx, UID randomUID, UnlockDB unlockDB) { - return FileBackupAgentImpl::parallelRestoreFinish(cx, randomUID, unlockDB); -} - -Future FileBackupAgent::submitParallelRestore(Database cx, - Key backupTag, - Standalone> backupRanges, - Key bcUrl, - Optional proxy, - Version targetVersion, - LockDB lockDB, - UID randomUID, - Key addPrefix, - Key removePrefix) { - return FileBackupAgentImpl::submitParallelRestore( - cx, backupTag, backupRanges, bcUrl, proxy, targetVersion, lockDB, randomUID, addPrefix, removePrefix); -} - -Future FileBackupAgent::atomicParallelRestore(Database cx, - Key tagName, - Standalone> ranges, - Key addPrefix, - Key removePrefix) { - return FileBackupAgentImpl::atomicParallelRestore(this, cx, tagName, ranges, addPrefix, removePrefix); -} - Future FileBackupAgent::restore(Database cx, Optional cxOrig, Key tagName, @@ -8240,180 +8039,6 @@ ACTOR static Future writeKVs(Database cx, Standalone transformDatabaseContents(Database cx, - Key addPrefix, - Key removePrefix, - Standalone> restoreRanges) { - state ReadYourWritesTransaction tr(cx); - state Standalone> oldData; - - TraceEvent("FastRestoreWorkloadTransformDatabaseContents") - .detail("AddPrefix", addPrefix) - .detail("RemovePrefix", removePrefix); - state int i = 0; - loop { // Read all data from DB - try { - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - for (i = 0; i < restoreRanges.size(); ++i) { - RangeResult kvs = wait(tr.getRange(restoreRanges[i], CLIENT_KNOBS->TOO_MANY)); - ASSERT(!kvs.more); - for (auto kv : kvs) { - oldData.push_back_deep(oldData.arena(), KeyValueRef(kv.key, kv.value)); - } - } - break; - } catch (Error& e) { - TraceEvent("FastRestoreWorkloadTransformDatabaseContentsGetAllKeys") - .error(e) - .detail("Index", i) - .detail("RestoreRange", restoreRanges[i]); - oldData = Standalone>(); // clear the vector - wait(tr.onError(e)); - } - } - - // Convert data by removePrefix and addPrefix in memory - state Standalone> newKVs; - for (int i = 0; i < oldData.size(); ++i) { - Key newKey(oldData[i].key); - TraceEvent(SevFRTestInfo, "TransformDatabaseContents") - .detail("Keys", oldData.size()) - .detail("Index", i) - .detail("GetKey", oldData[i].key) - .detail("GetValue", oldData[i].value); - if (newKey.size() < removePrefix.size()) { // If true, must check why. - TraceEvent(SevError, "TransformDatabaseContents") - .detail("Key", newKey) - .detail("RemovePrefix", removePrefix); - continue; - } - newKey = newKey.removePrefix(removePrefix).withPrefix(addPrefix); - newKVs.push_back_deep(newKVs.arena(), KeyValueRef(newKey.contents(), oldData[i].value)); - TraceEvent(SevFRTestInfo, "TransformDatabaseContents") - .detail("Keys", newKVs.size()) - .detail("Index", i) - .detail("NewKey", newKVs.back().key) - .detail("NewValue", newKVs.back().value); - } - - state Standalone> backupRanges; // dest. ranges - for (auto& range : restoreRanges) { - KeyRange tmpRange = range; - backupRanges.push_back_deep(backupRanges.arena(), tmpRange.removePrefix(removePrefix).withPrefix(addPrefix)); - } - - // Clear the transformed data (original data with removePrefix and addPrefix) in restoreRanges - wait(runRYWTransaction(cx, [=](Reference tr) -> Future { - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - for (int i = 0; i < restoreRanges.size(); i++) { - TraceEvent(SevFRTestInfo, "TransformDatabaseContents") - .detail("ClearRestoreRange", restoreRanges[i]) - .detail("ClearBackupRange", backupRanges[i]); - tr->clear(restoreRanges[i]); // Clear the range.removePrefix().withPrefix() - tr->clear(backupRanges[i]); - } - return Void(); - })); - - // Sanity check to ensure no data in the ranges - tr.reset(); - loop { - try { - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - RangeResult emptyData = wait(tr.getRange(normalKeys, CLIENT_KNOBS->TOO_MANY)); - for (int i = 0; i < emptyData.size(); ++i) { - TraceEvent(SevError, "ExpectEmptyData") - .detail("Index", i) - .detail("Key", emptyData[i].key) - .detail("Value", emptyData[i].value); - } - break; - } catch (Error& e) { - wait(tr.onError(e)); - } - } - - // Write transformed KVs (i.e., kv backup took) back to DB - state std::vector> fwrites; - loop { - try { - state int begin = 0; - state int len = 0; - while (begin < newKVs.size()) { - len = std::min(100, newKVs.size() - begin); - fwrites.push_back(writeKVs(cx, newKVs, begin, begin + len)); - begin = begin + len; - } - wait(waitForAll(fwrites)); - break; - } catch (Error& e) { - TraceEvent(SevError, "FastRestoreWorkloadTransformDatabaseContentsUnexpectedErrorOnWriteKVs").error(e); - wait(tr.onError(e)); - } - } - - // Sanity check - tr.reset(); - loop { - try { - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - RangeResult allData = wait(tr.getRange(normalKeys, CLIENT_KNOBS->TOO_MANY)); - TraceEvent(SevFRTestInfo, "SanityCheckData").detail("Size", allData.size()); - for (int i = 0; i < allData.size(); ++i) { - std::pair backupRestoreValid = insideValidRange(allData[i], restoreRanges, backupRanges); - TraceEvent(backupRestoreValid.first ? SevFRTestInfo : SevError, "SanityCheckData") - .detail("Index", i) - .detail("Key", allData[i].key) - .detail("Value", allData[i].value) - .detail("InsideBackupRange", backupRestoreValid.first) - .detail("InsideRestoreRange", backupRestoreValid.second); - } - break; - } catch (Error& e) { - wait(tr.onError(e)); - } - } - - TraceEvent("FastRestoreWorkloadTransformDatabaseContentsFinish") - .detail("AddPrefix", addPrefix) - .detail("RemovePrefix", removePrefix); - - return Void(); -} - -// addPrefix and removePrefix are the options used in the restore request: -// every backup key applied removePrefix and addPrefix in restore; -// transformRestoredDatabase actor will revert it by remove addPrefix and add removePrefix. -ACTOR Future transformRestoredDatabase(Database cx, - Standalone> backupRanges, - Key addPrefix, - Key removePrefix) { - try { - Standalone> restoreRanges; - for (int i = 0; i < backupRanges.size(); ++i) { - KeyRange range(backupRanges[i]); - Key begin = range.begin.removePrefix(removePrefix).withPrefix(addPrefix); - Key end = range.end.removePrefix(removePrefix).withPrefix(addPrefix); - TraceEvent("FastRestoreTransformRestoredDatabase") - .detail("From", KeyRangeRef(begin.contents(), end.contents())) - .detail("To", range); - restoreRanges.push_back_deep(restoreRanges.arena(), KeyRangeRef(begin.contents(), end.contents())); - } - wait(transformDatabaseContents(cx, removePrefix, addPrefix, restoreRanges)); - } catch (Error& e) { - TraceEvent(SevError, "FastRestoreTransformRestoredDatabaseUnexpectedError").error(e); - throw; - } - - return Void(); -} - void simulateBlobFailure() { if (BUGGIFY && deterministicRandom()->random01() < 0.01) { // Simulate blob failures double i = deterministicRandom()->random01(); diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 1455efc1b77..ce041a17571 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -1115,6 +1115,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( PHYSICAL_SHARD_MOVE_LOG_SEVERITY, 1 ); init( FETCH_SHARD_BUFFER_BYTE_LIMIT, 20e6 ); if( randomize && BUGGIFY ) FETCH_SHARD_BUFFER_BYTE_LIMIT = 1; init( FETCH_SHARD_UPDATES_BYTE_LIMIT, 2500000 ); if( randomize && BUGGIFY ) FETCH_SHARD_UPDATES_BYTE_LIMIT = 100; + init( STORAGE_UPDATE_PROCESS_STATS_INTERVAL, 5 ); if( randomize && BUGGIFY ) { STORAGE_UPDATE_PROCESS_STATS_INTERVAL = deterministicRandom()->random01() * 60 + 1; } //Wait Failure init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2; @@ -1185,53 +1186,6 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( TIME_KEEPER_DELAY, 10 ); init( TIME_KEEPER_MAX_ENTRIES, 3600 * 24 * 30 * 6 ); if( randomize && BUGGIFY ) { TIME_KEEPER_MAX_ENTRIES = 2; } - // Fast Restore - init( FASTRESTORE_FAILURE_TIMEOUT, 3600 ); - init( FASTRESTORE_HEARTBEAT_INTERVAL, 60 ); - init( FASTRESTORE_SAMPLING_PERCENT, 100 ); if( randomize && BUGGIFY ) { FASTRESTORE_SAMPLING_PERCENT = deterministicRandom()->random01() * 100; } - init( FASTRESTORE_NUM_LOADERS, 3 ); if( randomize && BUGGIFY ) { FASTRESTORE_NUM_LOADERS = deterministicRandom()->random01() * 10 + 1; } - init( FASTRESTORE_NUM_APPLIERS, 3 ); if( randomize && BUGGIFY ) { FASTRESTORE_NUM_APPLIERS = deterministicRandom()->random01() * 10 + 1; } - init( FASTRESTORE_TXN_BATCH_MAX_BYTES, 1024.0 * 1024.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_BATCH_MAX_BYTES = deterministicRandom()->random01() * 1024.0 * 1024.0 + 1.0; } - init( FASTRESTORE_VERSIONBATCH_MAX_BYTES, 10.0 * 1024.0 * 1024.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_VERSIONBATCH_MAX_BYTES = deterministicRandom()->random01() < 0.2 ? 50 * 1024 : deterministicRandom()->random01() < 0.4 ? 100 * 1024 * 1024 : deterministicRandom()->random01() * 1000.0 * 1024.0 * 1024.0; } // too small value may increase chance of TooManyFile error - init( FASTRESTORE_VB_PARALLELISM, 5 ); if( randomize && BUGGIFY ) { FASTRESTORE_VB_PARALLELISM = deterministicRandom()->random01() < 0.2 ? 2 : deterministicRandom()->random01() * 10 + 1; } - init( FASTRESTORE_VB_MONITOR_DELAY, 30 ); if( randomize && BUGGIFY ) { FASTRESTORE_VB_MONITOR_DELAY = deterministicRandom()->random01() * 20 + 1; } - init( FASTRESTORE_VB_LAUNCH_DELAY, 1.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_VB_LAUNCH_DELAY = deterministicRandom()->random01() < 0.2 ? 0.1 : deterministicRandom()->random01() * 10.0 + 1; } - init( FASTRESTORE_ROLE_LOGGING_DELAY, 5 ); if( randomize && BUGGIFY ) { FASTRESTORE_ROLE_LOGGING_DELAY = deterministicRandom()->random01() * 60 + 1; } - init( FASTRESTORE_UPDATE_PROCESS_STATS_INTERVAL, 5 ); if( randomize && BUGGIFY ) { FASTRESTORE_UPDATE_PROCESS_STATS_INTERVAL = deterministicRandom()->random01() * 60 + 1; } - init( FASTRESTORE_ATOMICOP_WEIGHT, 1 ); if( randomize && BUGGIFY ) { FASTRESTORE_ATOMICOP_WEIGHT = deterministicRandom()->random01() * 200 + 1; } - init( FASTRESTORE_MONITOR_LEADER_DELAY, 5 ); if( randomize && BUGGIFY ) { FASTRESTORE_MONITOR_LEADER_DELAY = deterministicRandom()->random01() * 100; } - init( FASTRESTORE_STRAGGLER_THRESHOLD_SECONDS, 60 ); if( randomize && BUGGIFY ) { FASTRESTORE_STRAGGLER_THRESHOLD_SECONDS = deterministicRandom()->random01() * 240 + 10; } - init( FASTRESTORE_TRACK_REQUEST_LATENCY, false ); if( randomize && BUGGIFY ) { FASTRESTORE_TRACK_REQUEST_LATENCY = false; } - init( FASTRESTORE_MEMORY_THRESHOLD_MB_SOFT, 6144 ); if( randomize && BUGGIFY ) { FASTRESTORE_MEMORY_THRESHOLD_MB_SOFT = 1; } - init( FASTRESTORE_WAIT_FOR_MEMORY_LATENCY, 10 ); if( randomize && BUGGIFY ) { FASTRESTORE_WAIT_FOR_MEMORY_LATENCY = 60; } - init( FASTRESTORE_HEARTBEAT_DELAY, 10 ); if( randomize && BUGGIFY ) { FASTRESTORE_HEARTBEAT_DELAY = deterministicRandom()->random01() * 120 + 2; } - init( FASTRESTORE_HEARTBEAT_MAX_DELAY, 10 ); if( randomize && BUGGIFY ) { FASTRESTORE_HEARTBEAT_MAX_DELAY = FASTRESTORE_HEARTBEAT_DELAY * 10; } - init( FASTRESTORE_APPLIER_FETCH_KEYS_SIZE, 100 ); if( randomize && BUGGIFY ) { FASTRESTORE_APPLIER_FETCH_KEYS_SIZE = deterministicRandom()->random01() * 10240 + 1; } - init( FASTRESTORE_LOADER_SEND_MUTATION_MSG_BYTES, 1.0 * 1024.0 * 1024.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_LOADER_SEND_MUTATION_MSG_BYTES = deterministicRandom()->random01() < 0.2 ? 1024 : deterministicRandom()->random01() * 5.0 * 1024.0 * 1024.0 + 1; } - init( FASTRESTORE_GET_RANGE_VERSIONS_EXPENSIVE, false ); if( randomize && BUGGIFY ) { FASTRESTORE_GET_RANGE_VERSIONS_EXPENSIVE = deterministicRandom()->random01() < 0.5 ? true : false; } - init( FASTRESTORE_REQBATCH_PARALLEL, 50 ); if( randomize && BUGGIFY ) { FASTRESTORE_REQBATCH_PARALLEL = deterministicRandom()->random01() * 100 + 1; } - init( FASTRESTORE_REQBATCH_LOG, false ); if( randomize && BUGGIFY ) { FASTRESTORE_REQBATCH_LOG = deterministicRandom()->random01() < 0.2 ? true : false; } - init( FASTRESTORE_TXN_CLEAR_MAX, 100 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_CLEAR_MAX = deterministicRandom()->random01() * 100 + 1; } - init( FASTRESTORE_TXN_RETRY_MAX, 10 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_RETRY_MAX = deterministicRandom()->random01() * 100 + 1; } - init( FASTRESTORE_TXN_EXTRA_DELAY, 0.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_EXTRA_DELAY = deterministicRandom()->random01() * 1 + 0.001;} - init( FASTRESTORE_NOT_WRITE_DB, false ); // Perf test only: set it to true will cause simulation failure - init( FASTRESTORE_USE_RANGE_FILE, true ); // Perf test only: set it to false will cause simulation failure - init( FASTRESTORE_USE_LOG_FILE, true ); // Perf test only: set it to false will cause simulation failure - init( FASTRESTORE_SAMPLE_MSG_BYTES, 1048576 ); if( randomize && BUGGIFY ) { FASTRESTORE_SAMPLE_MSG_BYTES = deterministicRandom()->random01() * 2048;} - init( FASTRESTORE_SCHED_UPDATE_DELAY, 0.1 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_UPDATE_DELAY = deterministicRandom()->random01() * 2;} - init( FASTRESTORE_SCHED_TARGET_CPU_PERCENT, 70 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_TARGET_CPU_PERCENT = deterministicRandom()->random01() * 100 + 50;} // simulate cpu usage can be larger than 100 - init( FASTRESTORE_SCHED_MAX_CPU_PERCENT, 90 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_MAX_CPU_PERCENT = FASTRESTORE_SCHED_TARGET_CPU_PERCENT + deterministicRandom()->random01() * 100;} - init( FASTRESTORE_SCHED_INFLIGHT_LOAD_REQS, 50 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_INFLIGHT_LOAD_REQS = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 30 + 1;} - init( FASTRESTORE_SCHED_INFLIGHT_SEND_REQS, 3 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_INFLIGHT_SEND_REQS = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 10 + 1;} - init( FASTRESTORE_SCHED_LOAD_REQ_BATCHSIZE, 5 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_LOAD_REQ_BATCHSIZE = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 10 + 1;} - init( FASTRESTORE_SCHED_INFLIGHT_SENDPARAM_THRESHOLD, 10 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_INFLIGHT_SENDPARAM_THRESHOLD = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 15 + 1;} - init( FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH, 2 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 15 + 1;} - init( FASTRESTORE_NUM_TRACE_EVENTS, 100 ); if( randomize && BUGGIFY ) { FASTRESTORE_NUM_TRACE_EVENTS = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 500 + 1;} - init( FASTRESTORE_EXPENSIVE_VALIDATION, false ); if( randomize && BUGGIFY ) { FASTRESTORE_EXPENSIVE_VALIDATION = deterministicRandom()->random01() < 0.5 ? true : false;} - init( FASTRESTORE_WRITE_BW_MB, 70 ); if( randomize && BUGGIFY ) { FASTRESTORE_WRITE_BW_MB = deterministicRandom()->random01() < 0.5 ? 2 : 100;} - init( FASTRESTORE_RATE_UPDATE_SECONDS, 1.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_RATE_UPDATE_SECONDS = deterministicRandom()->random01() < 0.5 ? 0.1 : 2;} - init( FASTRESTORE_DUMP_INSERT_RANGE_VERSION, false ); - init( REDWOOD_DEFAULT_PAGE_SIZE, 8192 ); init( REDWOOD_DEFAULT_EXTENT_SIZE, 32 * 1024 * 1024 ); init( REDWOOD_DEFAULT_EXTENT_READ_SIZE, 1024 * 1024 ); diff --git a/fdbclient/include/fdbclient/BackupAgent.actor.h b/fdbclient/include/fdbclient/BackupAgent.actor.h index 1635220c1a0..c3177594c76 100644 --- a/fdbclient/include/fdbclient/BackupAgent.actor.h +++ b/fdbclient/include/fdbclient/BackupAgent.actor.h @@ -163,24 +163,6 @@ class FileBackupAgent : public BackupAgentBase { static StringRef restoreStateText(ERestoreState id); static Key getPauseKey(); - // parallel restore - Future parallelRestoreFinish(Database cx, UID randomUID, UnlockDB = UnlockDB::True); - Future submitParallelRestore(Database cx, - Key backupTag, - Standalone> backupRanges, - Key bcUrl, - Optional proxy, - Version targetVersion, - LockDB lockDB, - UID randomUID, - Key addPrefix, - Key removePrefix); - Future atomicParallelRestore(Database cx, - Key tagName, - Standalone> ranges, - Key addPrefix, - Key removePrefix); - // restore() will // - make sure that url is readable and appears to be a complete backup // - make sure the requested TargetVersion is valid @@ -1039,15 +1021,6 @@ ACTOR Future>> decodeMutationLogFileBlock(Refe Value makePadding(int size); } // namespace fileBackup -// For fast restore simulation test -// For testing addPrefix feature in fast restore. -// Transform db content in restoreRanges by removePrefix and then addPrefix. -// Assume: DB is locked -ACTOR Future transformRestoredDatabase(Database cx, - Standalone> backupRanges, - Key addPrefix, - Key removePrefix); - void simulateBlobFailure(); // Add the set of ranges that are backed up in a default backup to the given vector. This consists of all normal keys diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h index a3318620712..d5f85fd9e8b 100644 --- a/fdbclient/include/fdbclient/ServerKnobs.h +++ b/fdbclient/include/fdbclient/ServerKnobs.h @@ -1148,6 +1148,7 @@ class SWIFT_CXX_IMMORTAL_SINGLETON_TYPE ServerKnobs : public KnobsImpl #include "fdbclient/ClientBooleanParams.h" #include "fdbclient/Knobs.h" -#include "fdbserver/RestoreCommon.actor.h" -#include "fdbserver/RestoreUtil.h" #include "fdbserver/StorageMetrics.actor.h" #include "flow/CodeProbe.h" #include "flow/Error.h" diff --git a/fdbserver/CommitProxyServer.actor.cpp b/fdbserver/CommitProxyServer.actor.cpp index f6484ebfe7d..122f198330e 100644 --- a/fdbserver/CommitProxyServer.actor.cpp +++ b/fdbserver/CommitProxyServer.actor.cpp @@ -57,7 +57,6 @@ #include "fdbserver/ProxyCommitData.actor.h" #include "fdbserver/RatekeeperInterface.h" #include "fdbserver/RecoveryState.h" -#include "fdbserver/RestoreUtil.h" #include "fdbserver/ServerDBInfo.actor.h" #include "fdbserver/WaitFailure.h" #include "fdbserver/WorkerInterface.actor.h" @@ -3784,7 +3783,7 @@ ACTOR Future processCompleteTransactionStateRequest(TransactionStateResolv ((KeyRangeRef&)txnKeys) = KeyRangeRef(keyAfter(data.back().key, txnKeys.arena()), txnKeys.end); - MutationsVec mutations; + Standalone> mutations; std::vector, int>> keyInfoData; std::vector src, dest; ServerCacheInfo info; diff --git a/fdbserver/Resolver.actor.cpp b/fdbserver/Resolver.actor.cpp index 4f877e27e67..2d9d71cded7 100644 --- a/fdbserver/Resolver.actor.cpp +++ b/fdbserver/Resolver.actor.cpp @@ -34,7 +34,6 @@ #include "fdbserver/LogSystemDiskQueueAdapter.h" #include "fdbserver/MasterInterface.h" #include "fdbserver/ResolverInterface.h" -#include "fdbserver/RestoreUtil.h" #include "fdbserver/ServerDBInfo.h" #include "fdbserver/StorageMetrics.actor.h" #include "fdbserver/WaitFailure.h" @@ -608,7 +607,7 @@ ACTOR Future processCompleteTransactionStateRequest( ((KeyRangeRef&)txnKeys) = KeyRangeRef(keyAfter(data.back().key, txnKeys.arena()), txnKeys.end); - MutationsVec mutations; + Standalone> mutations; std::vector, int>> keyInfoData; std::vector src, dest; ServerCacheInfo info; diff --git a/fdbserver/RestoreApplier.actor.cpp b/fdbserver/RestoreApplier.actor.cpp deleted file mode 100644 index 30d8d38aafd..00000000000 --- a/fdbserver/RestoreApplier.actor.cpp +++ /dev/null @@ -1,825 +0,0 @@ -/* - * RestoreApplier.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This file defines the functions used by the RestoreApplier role. -// RestoreApplier role starts at restoreApplierCore actor - -#include "fdbclient/NativeAPI.actor.h" -#include "fdbclient/SystemData.h" -#include "fdbclient/BackupAgent.actor.h" -#include "fdbclient/ManagementAPI.actor.h" -#include "fdbclient/MutationList.h" -#include "fdbclient/BackupContainer.h" -#include "fdbserver/Knobs.h" -#include "fdbserver/RestoreCommon.actor.h" -#include "fdbserver/RestoreUtil.h" -#include "fdbserver/RestoreRoleCommon.actor.h" -#include "fdbserver/RestoreApplier.actor.h" - -#include "flow/network.h" - -#include "flow/actorcompiler.h" // This must be the last #include. - -ACTOR static Future handleSendMutationVectorRequest(RestoreSendVersionedMutationsRequest req, - Reference self); -ACTOR static Future handleApplyToDBRequest(RestoreVersionBatchRequest req, - Reference self, - Database cx); -void handleUpdateRateRequest(RestoreUpdateRateRequest req, Reference self); - -ACTOR Future restoreApplierCore(RestoreApplierInterface applierInterf, int nodeIndex, Database cx) { - state Reference self = makeReference(applierInterf.id(), nodeIndex); - state ActorCollection actors(false); - state Future exitRole = Never(); - - actors.add(updateProcessMetrics(self)); - actors.add(traceProcessMetrics(self, "RestoreApplier")); - actors.add(traceRoleVersionBatchProgress(self, "RestoreApplier")); - - loop { - state std::string requestTypeStr = "[Init]"; - - try { - choose { - when(RestoreSimpleRequest req = waitNext(applierInterf.heartbeat.getFuture())) { - requestTypeStr = "heartbeat"; - actors.add(handleHeartbeat(req, applierInterf.id())); - } - when(RestoreSendVersionedMutationsRequest req = - waitNext(applierInterf.sendMutationVector.getFuture())) { - requestTypeStr = "sendMutationVector"; - actors.add(handleSendMutationVectorRequest(req, self)); - } - when(RestoreVersionBatchRequest req = waitNext(applierInterf.applyToDB.getFuture())) { - requestTypeStr = "applyToDB"; - actors.add(handleApplyToDBRequest( - req, self, cx)); // TODO: Check how FDB uses TaskPriority for ACTORS. We may need to add - // priority here to avoid requests at later VB block requests at earlier VBs - } - when(RestoreUpdateRateRequest req = waitNext(applierInterf.updateRate.getFuture())) { - requestTypeStr = "updateRate"; - handleUpdateRateRequest(req, self); - } - when(RestoreVersionBatchRequest req = waitNext(applierInterf.initVersionBatch.getFuture())) { - requestTypeStr = "initVersionBatch"; - actors.add(handleInitVersionBatchRequest(req, self)); - } - when(RestoreFinishRequest req = waitNext(applierInterf.finishRestore.getFuture())) { - requestTypeStr = "finishRestore"; - actors.clear(false); // cancel all pending actors - handleFinishRestoreRequest(req, self); - if (req.terminate) { - exitRole = Void(); - } - } - when(wait(actors.getResult())) {} - when(wait(exitRole)) { - TraceEvent("RestoreApplierCoreExitRole", self->id()); - break; - } - } - //TraceEvent("RestoreApplierCore", self->id()).detail("Request", requestTypeStr); // For debug only - } catch (Error& e) { - bool isError = e.code() != error_code_operation_cancelled; - TraceEvent(isError ? SevError : SevWarnAlways, "FastRestoreApplierError", self->id()) - .errorUnsuppressed(e) - .detail("RequestType", requestTypeStr); - actors.clear(false); - break; - } - } - - return Void(); -} - -// The actor may be invoked multiple times and executed async. -// No race condition as long as we do not wait or yield when operate the shared -// data. Multiple such actors can run on different fileIDs. -// Different files may contain mutations of the same commit versions, but with -// different subsequence number. -// Only one actor can process mutations from the same file. -ACTOR static Future handleSendMutationVectorRequest(RestoreSendVersionedMutationsRequest req, - Reference self) { - state Reference batchData; // initialized as nullptr - state bool printTrace = false; - state NotifiedVersion* curMsgIndex = nullptr; - - if (req.batchIndex <= self->finishedBatch.get()) { // Handle duplicate request from batchIndex that has finished - TraceEvent(SevWarn, "FastRestoreApplierRestoreSendVersionedMutationsRequestTooLate") - .detail("RequestBatchIndex", req.batchIndex) - .detail("FinishedBatchIndex", self->finishedBatch.get()); - req.reply.send(RestoreCommonReply(self->id(), true)); - ASSERT_WE_THINK(false); // Test to see if simulation can reproduce this - return Void(); - } - - batchData = self->batch[req.batchIndex]; - - ASSERT(batchData.isValid()); - ASSERT(self->finishedBatch.get() < req.batchIndex); - // wait(delay(0.0, TaskPriority::RestoreApplierReceiveMutations)); // This hurts performance from 100MB/s to 60MB/s - // on circus - - batchData->receiveMutationReqs += 1; - // Trace when the receive phase starts at a VB and when it finishes. - // This can help check if receiveMutations block applyMutation phase. - // If so, we need more sophisticated scheduler to ensure priority execution - printTrace = (batchData->receiveMutationReqs % SERVER_KNOBS->FASTRESTORE_NUM_TRACE_EVENTS == 0); - TraceEvent(printTrace ? SevInfo : SevFRDebugInfo, "FastRestoreApplierPhaseReceiveMutations", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("RestoreAsset", req.asset.toString()) - .detail("RestoreAssetMesssageIndex", batchData->processedFileState[req.asset].get()) - .detail("Request", req.toString()) - .detail("CurrentMemory", getSystemStatistics().processMemory) - .detail("PreviousVersionBatchState", batchData->vbState.get()) - .detail("ReceiveMutationRequests", batchData->receiveMutationReqs); - - wait(isSchedulable(self, req.batchIndex, __FUNCTION__)); - - ASSERT(batchData.isValid()); - ASSERT(req.batchIndex > self->finishedBatch.get()); - // Assume: processedFileState[req.asset] will not be erased while the actor is active. - // Note: Insert new items into processedFileState will not invalidate the reference. - curMsgIndex = &batchData->processedFileState[req.asset]; - wait(curMsgIndex->whenAtLeast(req.msgIndex - 1)); - batchData->vbState = ApplierVersionBatchState::RECEIVE_MUTATIONS; - - state bool isDuplicated = true; - if (curMsgIndex->get() == req.msgIndex - 1) { - isDuplicated = false; - - for (int mIndex = 0; mIndex < req.versionedMutations.size(); mIndex++) { - const VersionedMutationSerialized& versionedMutation = req.versionedMutations[mIndex]; - TraceEvent(SevFRDebugInfo, "FastRestoreApplierPhaseReceiveMutations", self->id()) - .detail("RestoreAsset", req.asset.toString()) - .detail("Version", versionedMutation.version.toString()) - .detail("Index", mIndex) - .detail("MutationReceived", versionedMutation.mutation.toString()); - batchData->receivedBytes += versionedMutation.mutation.totalSize(); - batchData->counters.receivedBytes += versionedMutation.mutation.totalSize(); - batchData->counters.receivedWeightedBytes += - versionedMutation.mutation.weightedTotalSize(); // atomicOp will be amplified - batchData->counters.receivedMutations += 1; - batchData->counters.receivedAtomicOps += - isAtomicOp((MutationRef::Type)versionedMutation.mutation.type) ? 1 : 0; - // Sanity check - ASSERT_WE_THINK(req.asset.isInVersionRange(versionedMutation.version.version)); - ASSERT_WE_THINK(req.asset.isInKeyRange( - versionedMutation.mutation)); // mutation is already applied removePrefix and addPrefix - - // Note: Log and range mutations may be delivered out of order. Can we handle it? - batchData->addMutation(versionedMutation.mutation, versionedMutation.version); - - ASSERT(versionedMutation.mutation.type != MutationRef::SetVersionstampedKey && - versionedMutation.mutation.type != MutationRef::SetVersionstampedValue); - } - curMsgIndex->set(req.msgIndex); - } - - req.reply.send(RestoreCommonReply(self->id(), isDuplicated)); - TraceEvent(printTrace ? SevInfo : SevFRDebugInfo, "FastRestoreApplierPhaseReceiveMutationsDone", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("RestoreAsset", req.asset.toString()) - .detail("ProcessedMessageIndex", curMsgIndex->get()) - .detail("Request", req.toString()); - return Void(); -} - -// Clear all ranges in input ranges -ACTOR static Future applyClearRangeMutations(Standalone> ranges, - double delayTime, - Database cx, - UID applierID, - int batchIndex, - ApplierBatchData::Counters* cc) { - state Reference tr(new ReadYourWritesTransaction(cx)); - state int retries = 0; - state double numOps = 0; - wait(delay(delayTime + deterministicRandom()->random01() * delayTime)); - TraceEvent(delayTime > 5 ? SevWarnAlways : SevDebug, "FastRestoreApplierClearRangeMutationsStart", applierID) - .detail("BatchIndex", batchIndex) - .detail("Ranges", ranges.size()) - .detail("DelayTime", delayTime); - if (SERVER_KNOBS->FASTRESTORE_NOT_WRITE_DB) { - TraceEvent("FastRestoreApplierClearRangeMutationsNotWriteDB", applierID) - .detail("BatchIndex", batchIndex) - .detail("Ranges", ranges.size()); - ASSERT(!g_network->isSimulated()); - return Void(); - } - - loop { - try { - // TODO: Consider clearrange traffic in write traffic control - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - for (auto& range : ranges) { - debugFRMutation("FastRestoreApplierApplyClearRangeMutation", - 0, - MutationRef(MutationRef::ClearRange, range.begin, range.end)); - tr->clear(range); - cc->clearOps += 1; - ++numOps; - if (numOps >= SERVER_KNOBS->FASTRESTORE_TXN_CLEAR_MAX) { - TraceEvent(SevWarn, "FastRestoreApplierClearRangeMutationsTooManyClearsInTxn") - .suppressFor(5.0) - .detail("Clears", numOps) - .detail("Ranges", ranges.size()) - .detail("Range", range.toString()); - } - } - wait(tr->commit()); - cc->clearTxns += 1; - break; - } catch (Error& e) { - retries++; - if (retries > SERVER_KNOBS->FASTRESTORE_TXN_RETRY_MAX) { - TraceEvent(SevWarnAlways, "RestoreApplierApplyClearRangeMutationsStuck", applierID) - .error(e) - .detail("BatchIndex", batchIndex) - .detail("ClearRanges", ranges.size()); - } - wait(tr->onError(e)); - } - } - return Void(); -} - -// Get keys in incompleteStagingKeys and precompute the stagingKey which is stored in batchData->stagingKeys -ACTOR static Future getAndComputeStagingKeys( - std::map::iterator> incompleteStagingKeys, - double delayTime, - Database cx, - UID applierID, - int batchIndex, - ApplierBatchData::Counters* cc) { - state Reference tr(new ReadYourWritesTransaction(cx)); - state std::vector>> fValues(incompleteStagingKeys.size(), Never()); - state int retries = 0; - state UID randomID = deterministicRandom()->randomUniqueID(); - - wait(delay(delayTime + deterministicRandom()->random01() * delayTime)); - - if (SERVER_KNOBS->FASTRESTORE_NOT_WRITE_DB) { // Get dummy value to short-circut DB - TraceEvent("FastRestoreApplierGetAndComputeStagingKeysStartNotUseDB", applierID) - .detail("RandomUID", randomID) - .detail("BatchIndex", batchIndex) - .detail("GetKeys", incompleteStagingKeys.size()) - .detail("DelayTime", delayTime); - ASSERT(!g_network->isSimulated()); - for (auto& key : incompleteStagingKeys) { - MutationRef m(MutationRef::SetValue, key.first, "0"_sr); - key.second->second.add(m, LogMessageVersion(1)); - key.second->second.precomputeResult("GetAndComputeStagingKeys", applierID, batchIndex); - } - return Void(); - } - - TraceEvent("FastRestoreApplierGetAndComputeStagingKeysStart", applierID) - .detail("RandomUID", randomID) - .detail("BatchIndex", batchIndex) - .detail("GetKeys", incompleteStagingKeys.size()) - .detail("DelayTime", delayTime); - - loop { - try { - int i = 0; - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - for (auto& key : incompleteStagingKeys) { - fValues[i++] = tr->get(key.first); - cc->fetchKeys += 1; - } - wait(waitForAll(fValues)); - cc->fetchTxns += 1; - break; - } catch (Error& e) { - cc->fetchTxnRetries += 1; - if (retries++ > incompleteStagingKeys.size()) { - if (e.code() != error_code_actor_cancelled) { - TraceEvent(SevWarnAlways, "GetAndComputeStagingKeys", applierID) - .errorUnsuppressed(e) - .suppressFor(1.0) - .detail("RandomUID", randomID) - .detail("BatchIndex", batchIndex); - } - } - wait(tr->onError(e)); - } - } - - ASSERT(fValues.size() == incompleteStagingKeys.size()); - int i = 0; - for (auto& key : incompleteStagingKeys) { - if (!fValues[i].get().present()) { // Key not exist in DB - // if condition: fValues[i].Valid() && fValues[i].isReady() && !fValues[i].isError() && - TraceEvent(SevDebug, "FastRestoreApplierGetAndComputeStagingKeysNoBaseValueInDB", applierID) - .suppressFor(5.0) - .detail("BatchIndex", batchIndex) - .detail("Key", key.first) - .detail("IsReady", fValues[i].isReady()) - .detail("PendingMutations", key.second->second.pendingMutations.size()) - .detail("StagingKeyType", getTypeString(key.second->second.type)); - for (auto& vm : key.second->second.pendingMutations) { - TraceEvent(SevDebug, "FastRestoreApplierGetAndComputeStagingKeysNoBaseValueInDB") - .detail("PendingMutationVersion", vm.first.toString()) - .detail("PendingMutation", vm.second.toString()); - } - key.second->second.precomputeResult("GetAndComputeStagingKeysNoBaseValueInDB", applierID, batchIndex); - } else { - // The key's version ideally should be the most recently committed version. - // But as long as it is > 1 and less than the start version of the version batch, it is the same result. - MutationRef m(MutationRef::SetValue, key.first, fValues[i].get().get()); - key.second->second.add(m, LogMessageVersion(1)); - key.second->second.precomputeResult("GetAndComputeStagingKeys", applierID, batchIndex); - } - i++; - } - - TraceEvent("FastRestoreApplierGetAndComputeStagingKeysDone", applierID) - .detail("RandomUID", randomID) - .detail("BatchIndex", batchIndex) - .detail("GetKeys", incompleteStagingKeys.size()) - .detail("DelayTime", delayTime); - - return Void(); -} - -ACTOR static Future precomputeMutationsResult(Reference batchData, - UID applierID, - int64_t batchIndex, - Database cx) { - // Apply range mutations (i.e., clearRange) to database cx - TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResultStart", applierID) - .detail("BatchIndex", batchIndex) - .detail("Step", "Applying clear range mutations to DB") - .detail("ClearRanges", batchData->stagingKeyRanges.size()); - state std::vector> fClearRanges; - Standalone> clearRanges; - double curTxnSize = 0; - { - double delayTime = 0; - for (auto& rangeMutation : batchData->stagingKeyRanges) { - KeyRangeRef range(rangeMutation.mutation.param1, rangeMutation.mutation.param2); - debugFRMutation("FastRestoreApplierPrecomputeMutationsResultClearRange", - rangeMutation.version.version, - MutationRef(MutationRef::ClearRange, range.begin, range.end)); - clearRanges.push_back_deep(clearRanges.arena(), range); - curTxnSize += range.expectedSize(); - if (curTxnSize >= SERVER_KNOBS->FASTRESTORE_TXN_BATCH_MAX_BYTES) { - fClearRanges.push_back( - applyClearRangeMutations(clearRanges, delayTime, cx, applierID, batchIndex, &batchData->counters)); - delayTime += SERVER_KNOBS->FASTRESTORE_TXN_EXTRA_DELAY; - clearRanges = Standalone>(); - curTxnSize = 0; - } - } - if (curTxnSize > 0) { - fClearRanges.push_back( - applyClearRangeMutations(clearRanges, delayTime, cx, applierID, batchIndex, &batchData->counters)); - } - } - - // Apply range mutations (i.e., clearRange) to stagingKeyRanges - TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResult", applierID) - .detail("BatchIndex", batchIndex) - .detail("Step", "Applying clear range mutations to staging keys") - .detail("ClearRanges", batchData->stagingKeyRanges.size()) - .detail("FutureClearRanges", fClearRanges.size()); - for (auto& rangeMutation : batchData->stagingKeyRanges) { - ASSERT(rangeMutation.mutation.param1 <= rangeMutation.mutation.param2); - std::map::iterator lb = batchData->stagingKeys.lower_bound(rangeMutation.mutation.param1); - std::map::iterator ub = batchData->stagingKeys.lower_bound(rangeMutation.mutation.param2); - while (lb != ub) { - if (lb->first >= rangeMutation.mutation.param2) { - TraceEvent(SevError, "FastRestoreApplerPhasePrecomputeMutationsResultIncorrectUpperBound") - .detail("Key", lb->first) - .detail("ClearRangeUpperBound", rangeMutation.mutation.param2) - .detail("UsedUpperBound", ub->first); - } - // We make the beginKey = endKey for the ClearRange on purpose so that - // we can sanity check ClearRange mutation when we apply it to DB. - MutationRef clearKey(MutationRef::ClearRange, lb->first, lb->first); - lb->second.add(clearKey, rangeMutation.version); - lb++; - } - } - TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResult", applierID) - .detail("BatchIndex", batchIndex) - .detail("Step", "Wait on applying clear range mutations to DB") - .detail("FutureClearRanges", fClearRanges.size()); - - wait(waitForAll(fClearRanges)); - TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResult", applierID) - .detail("BatchIndex", batchIndex) - .detail("Step", "Getting and computing staging keys") - .detail("StagingKeys", batchData->stagingKeys.size()); - - // Get keys in stagingKeys which does not have a baseline key by reading database cx, and precompute the key's value - std::vector> fGetAndComputeKeys; - std::map::iterator> incompleteStagingKeys; - std::map::iterator stagingKeyIter = batchData->stagingKeys.begin(); - int numKeysInBatch = 0; - int numGetTxns = 0; - { - double delayTime = 0; // Start transactions at different time to avoid overwhelming FDB. - for (; stagingKeyIter != batchData->stagingKeys.end(); stagingKeyIter++) { - if (!stagingKeyIter->second.hasBaseValue()) { - incompleteStagingKeys.emplace(stagingKeyIter->first, stagingKeyIter); - numKeysInBatch++; - } - if (numKeysInBatch == SERVER_KNOBS->FASTRESTORE_APPLIER_FETCH_KEYS_SIZE) { - fGetAndComputeKeys.push_back(getAndComputeStagingKeys( - incompleteStagingKeys, delayTime, cx, applierID, batchIndex, &batchData->counters)); - numGetTxns++; - delayTime += SERVER_KNOBS->FASTRESTORE_TXN_EXTRA_DELAY; - numKeysInBatch = 0; - incompleteStagingKeys.clear(); - } - } - if (numKeysInBatch > 0) { - numGetTxns++; - fGetAndComputeKeys.push_back(getAndComputeStagingKeys( - incompleteStagingKeys, delayTime, cx, applierID, batchIndex, &batchData->counters)); - } - } - - TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResult", applierID) - .detail("BatchIndex", batchIndex) - .detail("Step", "Compute the other staging keys") - .detail("StagingKeys", batchData->stagingKeys.size()) - .detail("GetStagingKeyBatchTxns", numGetTxns); - // Pre-compute pendingMutations to other keys in stagingKeys that has base value - for (stagingKeyIter = batchData->stagingKeys.begin(); stagingKeyIter != batchData->stagingKeys.end(); - stagingKeyIter++) { - if (stagingKeyIter->second.hasBaseValue()) { - stagingKeyIter->second.precomputeResult("HasBaseValue", applierID, batchIndex); - } - } - - TraceEvent("FastRestoreApplierGetAndComputeStagingKeysWaitOn", applierID).log(); - wait(waitForAll(fGetAndComputeKeys)); - - // Sanity check all stagingKeys have been precomputed - ASSERT_WE_THINK(batchData->allKeysPrecomputed()); - - TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResultDone", applierID).detail("BatchIndex", batchIndex); - - return Void(); -} - -bool okToReleaseTxns(double targetMB, double applyingDataBytes) { - return applyingDataBytes < targetMB * 1024 * 1024; -} - -ACTOR static Future shouldReleaseTransaction(double* targetMB, - double* applyingDataBytes, - AsyncTrigger* releaseTxns) { - loop { - if (okToReleaseTxns(*targetMB, *applyingDataBytes)) { - break; - } else { - wait(releaseTxns->onTrigger()); - wait(delay(0.0)); // Avoid all waiting txns are triggered at the same time and all decide to proceed before - // applyingDataBytes has a chance to update - } - } - return Void(); -} - -// Apply mutations in batchData->stagingKeys [begin, end). -ACTOR static Future applyStagingKeysBatch(std::map::iterator begin, - std::map::iterator end, - Database cx, - UID applierID, - ApplierBatchData::Counters* cc, - double* appliedBytes, - double* applyingDataBytes, - double* targetMB, - AsyncTrigger* releaseTxnTrigger) { - if (SERVER_KNOBS->FASTRESTORE_NOT_WRITE_DB) { - TraceEvent("FastRestoreApplierPhaseApplyStagingKeysBatchSkipped", applierID).detail("Begin", begin->first); - ASSERT(!g_network->isSimulated()); - return Void(); - } - wait(shouldReleaseTransaction(targetMB, applyingDataBytes, releaseTxnTrigger)); - - state Reference tr(new ReadYourWritesTransaction(cx)); - state int sets = 0; - state int clears = 0; - state Key endKey = begin->first; - state double txnSize = 0; - state double txnSizeUsed = 0; // txn size accounted in applyingDataBytes - TraceEvent(SevFRDebugInfo, "FastRestoreApplierPhaseApplyStagingKeysBatch", applierID).detail("Begin", begin->first); - loop { - try { - txnSize = 0; - txnSizeUsed = 0; - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - std::map::iterator iter = begin; - while (iter != end) { - if (iter->second.type == MutationRef::SetValue) { - tr->set(iter->second.key, iter->second.val); - txnSize += iter->second.totalSize(); - cc->appliedMutations += 1; - TraceEvent(SevFRMutationInfo, "FastRestoreApplierPhaseApplyStagingKeysBatch", applierID) - .detail("SetKey", iter->second.key); - sets++; - } else if (iter->second.type == MutationRef::ClearRange) { - if (iter->second.key != iter->second.val) { - TraceEvent(SevError, "FastRestoreApplierPhaseApplyStagingKeysBatchClearTooMuchData", applierID) - .detail("KeyBegin", iter->second.key) - .detail("KeyEnd", iter->second.val) - .detail("Version", iter->second.version.version) - .detail("SubVersion", iter->second.version.sub); - } - tr->clear(singleKeyRange(iter->second.key)); - txnSize += iter->second.totalSize(); - cc->appliedMutations += 1; - TraceEvent(SevFRMutationInfo, "FastRestoreApplierPhaseApplyStagingKeysBatch", applierID) - .detail("ClearKey", iter->second.key); - clears++; - } else { - ASSERT(false); - } - endKey = iter != end ? iter->first : endKey; - iter++; - if (sets > 10000000 || clears > 10000000) { - TraceEvent(SevError, "FastRestoreApplierPhaseApplyStagingKeysBatchInfiniteLoop", applierID) - .detail("Begin", begin->first) - .detail("Sets", sets) - .detail("Clears", clears); - } - } - TraceEvent(SevFRDebugInfo, "FastRestoreApplierPhaseApplyStagingKeysBatchPrecommit", applierID) - .detail("Begin", begin->first) - .detail("End", endKey) - .detail("Sets", sets) - .detail("Clears", clears); - tr->addWriteConflictRange(KeyRangeRef(begin->first, keyAfter(endKey))); // Reduce resolver load - txnSizeUsed = txnSize; - *applyingDataBytes += txnSizeUsed; // Must account for applying bytes before wait for write traffic control - wait(tr->commit()); - cc->appliedTxns += 1; - cc->appliedBytes += txnSize; - *appliedBytes += txnSize; - *applyingDataBytes -= txnSizeUsed; - if (okToReleaseTxns(*targetMB, *applyingDataBytes)) { - releaseTxnTrigger->trigger(); - } - break; - } catch (Error& e) { - cc->appliedTxnRetries += 1; - wait(tr->onError(e)); - *applyingDataBytes -= txnSizeUsed; - } - } - return Void(); -} - -// Apply mutations in stagingKeys in batches in parallel -ACTOR static Future applyStagingKeys(Reference batchData, - UID applierID, - int64_t batchIndex, - Database cx) { - std::map::iterator begin = batchData->stagingKeys.begin(); - std::map::iterator cur = begin; - state int txnBatches = 0; - double txnSize = 0; - std::vector> fBatches; - TraceEvent("FastRestoreApplerPhaseApplyStagingKeysStart", applierID) - .detail("BatchIndex", batchIndex) - .detail("StagingKeys", batchData->stagingKeys.size()); - batchData->totalBytesToWrite = 0; - while (cur != batchData->stagingKeys.end()) { - txnSize += cur->second.totalSize(); // should be consistent with receivedBytes accounting method - if (txnSize > SERVER_KNOBS->FASTRESTORE_TXN_BATCH_MAX_BYTES) { - fBatches.push_back(applyStagingKeysBatch(begin, - cur, - cx, - applierID, - &batchData->counters, - &batchData->appliedBytes, - &batchData->applyingDataBytes, - &batchData->targetWriteRateMB, - &batchData->releaseTxnTrigger)); - batchData->totalBytesToWrite += txnSize; - begin = cur; - txnSize = 0; - txnBatches++; - } - cur++; - } - if (begin != batchData->stagingKeys.end()) { - fBatches.push_back(applyStagingKeysBatch(begin, - cur, - cx, - applierID, - &batchData->counters, - &batchData->appliedBytes, - &batchData->applyingDataBytes, - &batchData->targetWriteRateMB, - &batchData->releaseTxnTrigger)); - batchData->totalBytesToWrite += txnSize; - txnBatches++; - } - - wait(waitForAll(fBatches)); - - TraceEvent("FastRestoreApplerPhaseApplyStagingKeysDone", applierID) - .detail("BatchIndex", batchIndex) - .detail("StagingKeys", batchData->stagingKeys.size()) - .detail("TransactionBatches", txnBatches) - .detail("TotalBytesToWrite", batchData->totalBytesToWrite); - return Void(); -} - -// Write mutations to the destination DB -ACTOR Future writeMutationsToDB(UID applierID, - int64_t batchIndex, - Reference batchData, - Database cx) { - TraceEvent("FastRestoreApplierPhaseApplyTxnStart", applierID).detail("BatchIndex", batchIndex); - wait(precomputeMutationsResult(batchData, applierID, batchIndex, cx)); - - wait(applyStagingKeys(batchData, applierID, batchIndex, cx)); - TraceEvent("FastRestoreApplierPhaseApplyTxnDone", applierID) - .detail("BatchIndex", batchIndex) - .detail("AppliedBytes", batchData->appliedBytes) - .detail("ReceivedBytes", batchData->receivedBytes); - - return Void(); -} - -void handleUpdateRateRequest(RestoreUpdateRateRequest req, Reference self) { - TraceEvent ev("FastRestoreApplierUpdateRateRequest", self->id()); - ev.suppressFor(10) - .detail("BatchIndex", req.batchIndex) - .detail("FinishedBatch", self->finishedBatch.get()) - .detail("WriteMB", req.writeMB); - double remainingDataMB = 0; - if (self->finishedBatch.get() == req.batchIndex - 1) { // current applying batch - Reference batchData = self->batch[req.batchIndex]; - ASSERT(batchData.isValid()); - batchData->targetWriteRateMB = req.writeMB; - remainingDataMB = batchData->totalBytesToWrite > 0 - ? std::max(0.0, batchData->totalBytesToWrite - batchData->appliedBytes) / 1024 / 1024 - : batchData->receivedBytes / 1024 / 1024; - ev.detail("TotalBytesToWrite", batchData->totalBytesToWrite) - .detail("AppliedBytes", batchData->appliedBytes) - .detail("ReceivedBytes", batchData->receivedBytes) - .detail("TargetWriteRateMB", batchData->targetWriteRateMB) - .detail("RemainingDataMB", remainingDataMB); - } - req.reply.send(RestoreUpdateRateReply(self->id(), remainingDataMB)); - - return; -} - -ACTOR static Future traceRate(const char* context, - Reference batchData, - int batchIndex, - UID nodeID, - NotifiedVersion* finishedVB, - bool once = false) { - loop { - if ((finishedVB->get() != batchIndex - 1) || !batchData.isValid()) { - break; - } - TraceEvent(context, nodeID) - .suppressFor(10) - .detail("BatchIndex", batchIndex) - .detail("FinishedBatchIndex", finishedVB->get()) - .detail("TotalDataToWriteMB", batchData->totalBytesToWrite / 1024 / 1024) - .detail("AppliedBytesMB", batchData->appliedBytes / 1024 / 1024) - .detail("TargetBytesMB", batchData->targetWriteRateMB) - .detail("InflightBytesMB", batchData->applyingDataBytes) - .detail("ReceivedBytes", batchData->receivedBytes); - if (once) { - break; - } - wait(delay(5.0)); - } - - return Void(); -} - -ACTOR static Future handleApplyToDBRequest(RestoreVersionBatchRequest req, - Reference self, - Database cx) { - TraceEvent("FastRestoreApplierPhaseHandleApplyToDBStart", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("FinishedBatch", self->finishedBatch.get()); - - // Ensure batch (i-1) is applied before batch i - // TODO: Add a counter to warn when too many requests are waiting on the actor - wait(self->finishedBatch.whenAtLeast(req.batchIndex - 1)); - - state bool isDuplicated = true; - if (self->finishedBatch.get() == req.batchIndex - 1) { - // duplicate request from earlier version batch will be ignored - state Reference batchData = self->batch[req.batchIndex]; - ASSERT(batchData.isValid()); - TraceEvent("FastRestoreApplierPhaseHandleApplyToDBRunning", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("FinishedBatch", self->finishedBatch.get()) - .detail("HasStarted", batchData->dbApplier.present()) - .detail("WroteToDBDone", batchData->dbApplier.present() ? batchData->dbApplier.get().isReady() : 0) - .detail("PreviousVersionBatchState", batchData->vbState.get()); - - ASSERT(batchData.isValid()); - if (!batchData->dbApplier.present()) { - isDuplicated = false; - batchData->dbApplier = Never(); - batchData->dbApplier = writeMutationsToDB(self->id(), req.batchIndex, batchData, cx); - batchData->vbState = ApplierVersionBatchState::WRITE_TO_DB; - batchData->rateTracer = traceRate("FastRestoreApplierTransactionRateControl", - batchData, - req.batchIndex, - self->id(), - &self->finishedBatch); - } - - ASSERT(batchData->dbApplier.present()); - ASSERT(!batchData->dbApplier.get().isError()); // writeMutationsToDB actor cannot have error. - // We cannot blindly retry because it is not idempotent - - wait(batchData->dbApplier.get()); - - // Multiple actors can wait on req.batchIndex-1; - // Avoid setting finishedBatch when finishedBatch > req.batchIndex - if (self->finishedBatch.get() == req.batchIndex - 1) { - batchData->rateTracer = traceRate("FastRestoreApplierTransactionRateControlDone", - batchData, - req.batchIndex, - self->id(), - &self->finishedBatch, - true /*print once*/); // Track the last rate info - self->finishedBatch.set(req.batchIndex); - // self->batch[req.batchIndex]->vbState = ApplierVersionBatchState::DONE; - // Free memory for the version batch - self->batch.erase(req.batchIndex); - if (self->delayedActors > 0) { - self->checkMemory.trigger(); - } - } - } - - req.reply.send(RestoreCommonReply(self->id(), isDuplicated)); - - TraceEvent("FastRestoreApplierPhaseHandleApplyToDBDone", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("FinishedBatch", self->finishedBatch.get()) - .detail("IsDuplicated", isDuplicated); - - return Void(); -} - -// Copy from WriteDuringRead.actor.cpp with small modifications -// Not all AtomicOps are handled in this function: SetVersionstampedKey, SetVersionstampedValue, and CompareAndClear -Value applyAtomicOp(Optional existingValue, Value value, MutationRef::Type type) { - Arena arena; - if (type == MutationRef::AddValue) - return doLittleEndianAdd(existingValue, value, arena); - else if (type == MutationRef::AppendIfFits) - return doAppendIfFits(existingValue, value, arena); - else if (type == MutationRef::And || type == MutationRef::AndV2) - return doAndV2(existingValue, value, arena); - else if (type == MutationRef::Or) - return doOr(existingValue, value, arena); - else if (type == MutationRef::Xor) - return doXor(existingValue, value, arena); - else if (type == MutationRef::Max) - return doMax(existingValue, value, arena); - else if (type == MutationRef::Min || type == MutationRef::MinV2) - return doMinV2(existingValue, value, arena); - else if (type == MutationRef::ByteMin) - return doByteMin(existingValue, value, arena); - else if (type == MutationRef::ByteMax) - return doByteMax(existingValue, value, arena); - else { - TraceEvent(SevError, "ApplyAtomicOpUnhandledType") - .detail("TypeCode", (int)type) - .detail("TypeName", getTypeString(type)); - ASSERT(false); - } - return Value(); -} diff --git a/fdbserver/RestoreCommon.actor.cpp b/fdbserver/RestoreCommon.actor.cpp deleted file mode 100644 index e167203140b..00000000000 --- a/fdbserver/RestoreCommon.actor.cpp +++ /dev/null @@ -1,367 +0,0 @@ -/* - * RestoreCommon.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This file implements the functions defined in RestoreCommon.actor.h -// The functions in this file are copied from BackupAgent - -#include "fdbserver/RestoreCommon.actor.h" - -// Backup agent header -#include "fdbclient/BackupAgent.actor.h" -#include "fdbclient/BackupContainer.h" -#include "fdbclient/KeyBackedTypes.actor.h" -#include "fdbclient/ManagementAPI.actor.h" -#include "fdbclient/MutationList.h" -#include "fdbclient/NativeAPI.actor.h" -#include "fdbclient/SystemData.h" - -#include "flow/actorcompiler.h" // This must be the last #include. - -// Split RestoreConfigFR defined in FileBackupAgent.actor.cpp to declaration in Restore.actor.h and implementation in -// RestoreCommon.actor.cpp - -KeyBackedProperty RestoreConfigFR::stateEnum() { - return configSpace.pack(__FUNCTION__sr); -} -Future RestoreConfigFR::stateText(Reference tr) { - return map(stateEnum().getD(tr), [](ERestoreState s) -> StringRef { return FileBackupAgent::restoreStateText(s); }); -} -KeyBackedProperty RestoreConfigFR::addPrefix() { - return configSpace.pack(__FUNCTION__sr); -} -KeyBackedProperty RestoreConfigFR::removePrefix() { - return configSpace.pack(__FUNCTION__sr); -} -// XXX: Remove restoreRange() once it is safe to remove. It has been changed to restoreRanges -KeyBackedProperty RestoreConfigFR::restoreRange() { - return configSpace.pack(__FUNCTION__sr); -} -KeyBackedProperty> RestoreConfigFR::restoreRanges() { - return configSpace.pack(__FUNCTION__sr); -} -KeyBackedProperty RestoreConfigFR::batchFuture() { - return configSpace.pack(__FUNCTION__sr); -} -KeyBackedProperty RestoreConfigFR::restoreVersion() { - return configSpace.pack(__FUNCTION__sr); -} - -KeyBackedProperty> RestoreConfigFR::sourceContainer() { - return configSpace.pack(__FUNCTION__sr); -} -// Get the source container as a bare URL, without creating a container instance -KeyBackedProperty RestoreConfigFR::sourceContainerURL() { - return configSpace.pack("sourceContainer"_sr); -} - -// Total bytes written by all log and range restore tasks. -KeyBackedBinaryValue RestoreConfigFR::bytesWritten() { - return configSpace.pack(__FUNCTION__sr); -} -// File blocks that have had tasks created for them by the Dispatch task -KeyBackedBinaryValue RestoreConfigFR::filesBlocksDispatched() { - return configSpace.pack(__FUNCTION__sr); -} -// File blocks whose tasks have finished -KeyBackedBinaryValue RestoreConfigFR::fileBlocksFinished() { - return configSpace.pack(__FUNCTION__sr); -} -// Total number of files in the fileMap -KeyBackedBinaryValue RestoreConfigFR::fileCount() { - return configSpace.pack(__FUNCTION__sr); -} -// Total number of file blocks in the fileMap -KeyBackedBinaryValue RestoreConfigFR::fileBlockCount() { - return configSpace.pack(__FUNCTION__sr); -} - -Future> RestoreConfigFR::getRestoreRangesOrDefault(Reference tr) { - return getRestoreRangesOrDefault_impl(this, tr); -} - -ACTOR Future> RestoreConfigFR::getRestoreRangesOrDefault_impl( - RestoreConfigFR* self, - Reference tr) { - state std::vector ranges = wait(self->restoreRanges().getD(tr)); - if (ranges.empty()) { - state KeyRange range = wait(self->restoreRange().getD(tr)); - ranges.push_back(range); - } - return ranges; -} - -KeyBackedSet RestoreConfigFR::fileSet() { - return configSpace.pack(__FUNCTION__sr); -} - -Future RestoreConfigFR::isRunnable(Reference tr) { - return map(stateEnum().getD(tr), [](ERestoreState s) -> bool { - return s != ERestoreState::ABORTED && s != ERestoreState::COMPLETED && s != ERestoreState::UNINITIALIZED; - }); -} - -Future RestoreConfigFR::logError(Database cx, Error e, std::string const& details, void* taskInstance) { - if (!uid.isValid()) { - TraceEvent(SevError, "FileRestoreErrorNoUID").error(e).detail("Description", details); - return Void(); - } - TraceEvent t(SevWarn, "FileRestoreError"); - t.error(e).detail("RestoreUID", uid).detail("Description", details).detail("TaskInstance", (uint64_t)taskInstance); - // key_not_found could happen - if (e.code() == error_code_key_not_found) - t.backtrace(); - - return updateErrorInfo(cx, e, details); -} - -Key RestoreConfigFR::mutationLogPrefix() { - return uidPrefixKey(applyLogKeys.begin, uid); -} - -Key RestoreConfigFR::applyMutationsMapPrefix() { - return uidPrefixKey(applyMutationsKeyVersionMapRange.begin, uid); -} - -ACTOR Future RestoreConfigFR::getApplyVersionLag_impl(Reference tr, UID uid) { - // Both of these are snapshot reads - state Future> beginVal = tr->get(uidPrefixKey(applyMutationsBeginRange.begin, uid), Snapshot::True); - state Future> endVal = tr->get(uidPrefixKey(applyMutationsEndRange.begin, uid), Snapshot::True); - wait(success(beginVal) && success(endVal)); - - if (!beginVal.get().present() || !endVal.get().present()) - return 0; - - Version beginVersion = BinaryReader::fromStringRef(beginVal.get().get(), Unversioned()); - Version endVersion = BinaryReader::fromStringRef(endVal.get().get(), Unversioned()); - return endVersion - beginVersion; -} - -Future RestoreConfigFR::getApplyVersionLag(Reference tr) { - return getApplyVersionLag_impl(tr, uid); -} - -void RestoreConfigFR::initApplyMutations(Reference tr, Key addPrefix, Key removePrefix) { - // Set these because they have to match the applyMutations values. - this->addPrefix().set(tr, addPrefix); - this->removePrefix().set(tr, removePrefix); - - clearApplyMutationsKeys(tr); - - // Initialize add/remove prefix, range version map count and set the map's start key to InvalidVersion - tr->set(uidPrefixKey(applyMutationsAddPrefixRange.begin, uid), addPrefix); - tr->set(uidPrefixKey(applyMutationsRemovePrefixRange.begin, uid), removePrefix); - int64_t startCount = 0; - tr->set(uidPrefixKey(applyMutationsKeyVersionCountRange.begin, uid), StringRef((uint8_t*)&startCount, 8)); - Key mapStart = uidPrefixKey(applyMutationsKeyVersionMapRange.begin, uid); - tr->set(mapStart, BinaryWriter::toValue(invalidVersion, Unversioned())); -} - -void RestoreConfigFR::clearApplyMutationsKeys(Reference tr) { - tr->setOption(FDBTransactionOptions::COMMIT_ON_FIRST_PROXY); - - // Clear add/remove prefix keys - tr->clear(uidPrefixKey(applyMutationsAddPrefixRange.begin, uid)); - tr->clear(uidPrefixKey(applyMutationsRemovePrefixRange.begin, uid)); - - // Clear range version map and count key - tr->clear(uidPrefixKey(applyMutationsKeyVersionCountRange.begin, uid)); - Key mapStart = uidPrefixKey(applyMutationsKeyVersionMapRange.begin, uid); - tr->clear(KeyRangeRef(mapStart, strinc(mapStart))); - - // Clear any loaded mutations that have not yet been applied - Key mutationPrefix = mutationLogPrefix(); - tr->clear(KeyRangeRef(mutationPrefix, strinc(mutationPrefix))); - - // Clear end and begin versions (intentionally in this order) - tr->clear(uidPrefixKey(applyMutationsEndRange.begin, uid)); - tr->clear(uidPrefixKey(applyMutationsBeginRange.begin, uid)); -} - -void RestoreConfigFR::setApplyBeginVersion(Reference tr, Version ver) { - tr->set(uidPrefixKey(applyMutationsBeginRange.begin, uid), BinaryWriter::toValue(ver, Unversioned())); -} - -void RestoreConfigFR::setApplyEndVersion(Reference tr, Version ver) { - tr->set(uidPrefixKey(applyMutationsEndRange.begin, uid), BinaryWriter::toValue(ver, Unversioned())); -} - -Future RestoreConfigFR::getApplyEndVersion(Reference tr) { - return map(tr->get(uidPrefixKey(applyMutationsEndRange.begin, uid)), [=](Optional const& value) -> Version { - return value.present() ? BinaryReader::fromStringRef(value.get(), Unversioned()) : 0; - }); -} - -// Meng: Change RestoreConfigFR to Reference because FastRestore pass the Reference -// around -ACTOR Future RestoreConfigFR::getProgress_impl(Reference restore, - Reference tr) { - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - - state Future fileCount = restore->fileCount().getD(tr); - state Future fileBlockCount = restore->fileBlockCount().getD(tr); - state Future fileBlocksDispatched = restore->filesBlocksDispatched().getD(tr); - state Future fileBlocksFinished = restore->fileBlocksFinished().getD(tr); - state Future bytesWritten = restore->bytesWritten().getD(tr); - state Future status = restore->stateText(tr); - state Future lag = restore->getApplyVersionLag(tr); - state Future tag = restore->tag().getD(tr); - state Future> lastError = restore->lastError().getD(tr); - - // restore might no longer be valid after the first wait so make sure it is not needed anymore. - state UID uid = restore->getUid(); - wait(success(fileCount) && success(fileBlockCount) && success(fileBlocksDispatched) && - success(fileBlocksFinished) && success(bytesWritten) && success(status) && success(lag) && success(tag) && - success(lastError)); - - std::string errstr = "None"; - if (lastError.get().second != 0) - errstr = format("'%s' %llds ago.\n", - lastError.get().first.c_str(), - (tr->getReadVersion().get() - lastError.get().second) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND); - - TraceEvent("FileRestoreProgress") - .detail("RestoreUID", uid) - .detail("Tag", tag.get()) - .detail("State", status.get().toString()) - .detail("FileCount", fileCount.get()) - .detail("FileBlocksFinished", fileBlocksFinished.get()) - .detail("FileBlocksTotal", fileBlockCount.get()) - .detail("FileBlocksInProgress", fileBlocksDispatched.get() - fileBlocksFinished.get()) - .detail("BytesWritten", bytesWritten.get()) - .detail("ApplyLag", lag.get()) - .detail("TaskInstance", THIS_ADDR) - .backtrace(); - - return format("Tag: %s UID: %s State: %s Blocks: %lld/%lld BlocksInProgress: %lld Files: %lld BytesWritten: " - "%lld ApplyVersionLag: %lld LastError: %s", - tag.get().c_str(), - uid.toString().c_str(), - status.get().toString().c_str(), - fileBlocksFinished.get(), - fileBlockCount.get(), - fileBlocksDispatched.get() - fileBlocksFinished.get(), - fileCount.get(), - bytesWritten.get(), - lag.get(), - errstr.c_str()); -} -Future RestoreConfigFR::getProgress(Reference tr) { - Reference restore = Reference(this); - return getProgress_impl(restore, tr); -} - -// Meng: Change RestoreConfigFR to Reference -ACTOR Future RestoreConfigFR::getFullStatus_impl(Reference restore, - Reference tr) { - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - - state Future> ranges = restore->getRestoreRangesOrDefault(tr); - state Future addPrefix = restore->addPrefix().getD(tr); - state Future removePrefix = restore->removePrefix().getD(tr); - state Future url = restore->sourceContainerURL().getD(tr); - state Future restoreVersion = restore->restoreVersion().getD(tr); - state Future progress = restore->getProgress(tr); - - // restore might no longer be valid after the first wait so make sure it is not needed anymore. - wait(success(ranges) && success(addPrefix) && success(removePrefix) && success(url) && success(restoreVersion) && - success(progress)); - - std::string returnStr; - returnStr = format("%s URL: %s", progress.get().c_str(), url.get().toString().c_str()); - for (auto& range : ranges.get()) { - returnStr += format(" Range: '%s'-'%s'", printable(range.begin).c_str(), printable(range.end).c_str()); - } - returnStr += format(" AddPrefix: '%s' RemovePrefix: '%s' Version: %lld", - printable(addPrefix.get()).c_str(), - printable(removePrefix.get()).c_str(), - restoreVersion.get()); - return returnStr; -} -Future RestoreConfigFR::getFullStatus(Reference tr) { - Reference restore = Reference(this); - return getFullStatus_impl(restore, tr); -} - -std::string RestoreConfigFR::toString() { - std::stringstream ss; - ss << "uid:" << uid.toString() << " prefix:" << subspace.key().contents().toString(); - return ss.str(); -} - -// parallelFileRestore is copied from FileBackupAgent.actor.cpp for the same reason as RestoreConfigFR is copied -// The implementation of parallelFileRestore is copied from FileBackupAgent.actor.cpp -// parallelFileRestore is copied from FileBackupAgent.actor.cpp for the same reason as RestoreConfigFR is copied -namespace parallelFileRestore { - -ACTOR Future>> decodeLogFileBlock(Reference file, - int64_t offset, - int len) { - state Standalone buf = makeString(len); - int rLen = wait(file->read(mutateString(buf), len, offset)); - if (rLen != len) - throw restore_bad_read(); - - simulateBlobFailure(); - - Standalone> results({}, buf.arena()); - state StringRefReader reader(buf, restore_corrupted_data()); - - try { - // Read header, currently only decoding version BACKUP_AGENT_MLOG_VERSION - if (reader.consume() != BACKUP_AGENT_MLOG_VERSION) - throw restore_unsupported_file_version(); - - // Read k/v pairs. Block ends either at end of last value exactly or with 0xFF as first key len byte. - while (1) { - // If eof reached or first key len bytes is 0xFF then end of block was reached. - if (reader.eof() || *reader.rptr == 0xFF) - break; - - // Read key and value. If anything throws then there is a problem. - uint32_t kLen = reader.consumeNetworkUInt32(); - const uint8_t* k = reader.consume(kLen); - uint32_t vLen = reader.consumeNetworkUInt32(); - const uint8_t* v = reader.consume(vLen); - - results.push_back(results.arena(), KeyValueRef(KeyRef(k, kLen), ValueRef(v, vLen))); - } - - // Make sure any remaining bytes in the block are 0xFF - for (auto b : reader.remainder()) - if (b != 0xFF) - throw restore_corrupted_data_padding(); - - return results; - - } catch (Error& e) { - TraceEvent(SevError, "FileRestoreCorruptLogFileBlock") - .error(e) - .detail("Filename", file->getFilename()) - .detail("BlockOffset", offset) - .detail("BlockLen", len) - .detail("ErrorRelativeOffset", reader.rptr - buf.begin()) - .detail("ErrorAbsoluteOffset", reader.rptr - buf.begin() + offset); - throw; - } -} - -} // namespace parallelFileRestore diff --git a/fdbserver/RestoreController.actor.cpp b/fdbserver/RestoreController.actor.cpp deleted file mode 100644 index 22aa2b43d4f..00000000000 --- a/fdbserver/RestoreController.actor.cpp +++ /dev/null @@ -1,1222 +0,0 @@ -/* - * RestoreController.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This file implements the functions for RestoreController role - -#include "fdbrpc/RangeMap.h" -#include "fdbclient/NativeAPI.actor.h" -#include "fdbclient/SystemData.h" -#include "fdbclient/BackupAgent.actor.h" -#include "fdbclient/ManagementAPI.actor.h" -#include "fdbclient/MutationList.h" -#include "fdbclient/BackupContainer.h" -#include "fdbserver/RestoreUtil.h" -#include "fdbserver/RestoreCommon.actor.h" -#include "fdbserver/RestoreRoleCommon.actor.h" -#include "fdbserver/RestoreController.actor.h" -#include "fdbserver/RestoreApplier.actor.h" -#include "fdbserver/RestoreLoader.actor.h" - -#include "flow/Platform.h" -#include "flow/actorcompiler.h" // This must be the last #include. - -// TODO: Support [[maybe_unused]] attribute for actors -// ACTOR static Future clearDB(Database cx); -ACTOR static Future collectBackupFiles(Reference bc, - std::vector* rangeFiles, - std::vector* logFiles, - Version* minRangeVersion, - Database cx, - RestoreRequest request); -ACTOR static Future buildRangeVersions(KeyRangeMap* pRangeVersions, - std::vector* pRangeFiles, - Key url, - Optional proxy, - Database cx); - -ACTOR static Future processRestoreRequest(Reference self, - Database cx, - RestoreRequest request); -ACTOR static Future startProcessRestoreRequests(Reference self, Database cx); -ACTOR static Future distributeWorkloadPerVersionBatch(Reference self, - int batchIndex, - Database cx, - RestoreRequest request, - VersionBatch versionBatch); - -ACTOR static Future recruitRestoreRoles(Reference controllerWorker, - Reference controllerData); -ACTOR static Future distributeRestoreSysInfo(Reference controllerData, - KeyRangeMap* pRangeVersions); - -ACTOR static Future> collectRestoreRequests(Database cx); -ACTOR static Future initializeVersionBatch(std::map appliersInterf, - std::map loadersInterf, - int batchIndex); -ACTOR static Future notifyApplierToApplyMutations(Reference batchData, - Reference batchStatus, - std::map appliersInterf, - int batchIndex, - NotifiedVersion* finishedBatch); -ACTOR static Future notifyLoadersVersionBatchFinished(std::map loadersInterf, - int batchIndex); -ACTOR static Future notifyRestoreCompleted(Reference self, bool terminate); -ACTOR static Future signalRestoreCompleted(Reference self, Database cx); -// TODO: Support [[maybe_unused]] attribute for actors -// ACTOR static Future updateHeartbeatTime(Reference self); -ACTOR static Future checkRolesLiveness(Reference self); - -void splitKeyRangeForAppliers(Reference batchData, - std::map appliersInterf, - int batchIndex); - -ACTOR Future sampleBackups(Reference self, RestoreControllerInterface ci) { - loop { - try { - RestoreSamplesRequest req = waitNext(ci.samples.getFuture()); - TraceEvent(SevDebug, "FastRestoreControllerSampleBackups") - .detail("SampleID", req.id) - .detail("BatchIndex", req.batchIndex) - .detail("Samples", req.samples.size()); - ASSERT(req.batchIndex <= self->batch.size()); // batchIndex starts from 1 - - Reference batch = self->batch[req.batchIndex]; - ASSERT(batch.isValid()); - if (batch->sampleMsgs.find(req.id) != batch->sampleMsgs.end()) { - req.reply.send(RestoreCommonReply(req.id)); - continue; - } - batch->sampleMsgs.insert(req.id); - for (auto& m : req.samples) { - batch->samples.addMetric(m.key, m.size); - batch->samplesSize += m.size; - } - req.reply.send(RestoreCommonReply(req.id)); - } catch (Error& e) { - TraceEvent(SevWarn, "FastRestoreControllerSampleBackupsError", self->id()).error(e); - break; - } - } - - return Void(); -} - -ACTOR Future startRestoreController(Reference controllerWorker, Database cx) { - ASSERT(controllerWorker.isValid()); - ASSERT(controllerWorker->controllerInterf.present()); - state Reference self = - makeReference(controllerWorker->controllerInterf.get().id()); - state Future error = actorCollection(self->addActor.getFuture()); - - try { - // recruitRestoreRoles must come after controllerWorker has finished collectWorkerInterface - wait(recruitRestoreRoles(controllerWorker, self)); - - // self->addActor.send(updateHeartbeatTime(self)); - self->addActor.send(checkRolesLiveness(self)); - self->addActor.send(updateProcessMetrics(self)); - self->addActor.send(traceProcessMetrics(self, "RestoreController")); - self->addActor.send(sampleBackups(self, controllerWorker->controllerInterf.get())); - - wait(startProcessRestoreRequests(self, cx) || error); - } catch (Error& e) { - if (e.code() != error_code_operation_cancelled) { - TraceEvent(SevError, "FastRestoreControllerStart").error(e).detail("Reason", "Unexpected unhandled error"); - } - } - - return Void(); -} - -// RestoreWorker that has restore controller role: Recruite a role for each worker -ACTOR Future recruitRestoreRoles(Reference controllerWorker, - Reference controllerData) { - state int nodeIndex = 0; - state RestoreRole role = RestoreRole::Invalid; - - TraceEvent("FastRestoreController", controllerData->id()) - .detail("RecruitRestoreRoles", controllerWorker->workerInterfaces.size()) - .detail("NumLoaders", SERVER_KNOBS->FASTRESTORE_NUM_LOADERS) - .detail("NumAppliers", SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS); - ASSERT(controllerData->loadersInterf.empty() && controllerData->appliersInterf.empty()); - ASSERT(controllerWorker->controllerInterf.present()); - - ASSERT(controllerData.isValid()); - ASSERT(SERVER_KNOBS->FASTRESTORE_NUM_LOADERS > 0 && SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS > 0); - // We assign 1 role per worker for now - ASSERT(SERVER_KNOBS->FASTRESTORE_NUM_LOADERS + SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS <= - controllerWorker->workerInterfaces.size()); - - // Assign a role to each worker - std::vector> requests; - for (auto& workerInterf : controllerWorker->workerInterfaces) { - if (nodeIndex >= 0 && nodeIndex < SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS) { - // [0, numApplier) are appliers - role = RestoreRole::Applier; - } else if (nodeIndex >= SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS && - nodeIndex < SERVER_KNOBS->FASTRESTORE_NUM_LOADERS + SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS) { - // [numApplier, numApplier + numLoader) are loaders - role = RestoreRole::Loader; - } else { - break; - } - - TraceEvent("FastRestoreController", controllerData->id()) - .detail("WorkerNode", workerInterf.first) - .detail("NodeRole", role) - .detail("NodeIndex", nodeIndex); - requests.emplace_back(workerInterf.first, - RestoreRecruitRoleRequest(controllerWorker->controllerInterf.get(), role, nodeIndex)); - nodeIndex++; - } - - state std::vector replies; - wait(getBatchReplies(&RestoreWorkerInterface::recruitRole, controllerWorker->workerInterfaces, requests, &replies)); - for (auto& reply : replies) { - if (reply.role == RestoreRole::Applier) { - ASSERT_WE_THINK(reply.applier.present()); - controllerData->appliersInterf[reply.applier.get().id()] = reply.applier.get(); - } else if (reply.role == RestoreRole::Loader) { - ASSERT_WE_THINK(reply.loader.present()); - controllerData->loadersInterf[reply.loader.get().id()] = reply.loader.get(); - } else { - TraceEvent(SevError, "FastRestoreController").detail("RecruitRestoreRolesInvalidRole", reply.role); - } - } - controllerData->recruitedRoles.send(Void()); - TraceEvent("FastRestoreRecruitRestoreRolesDone", controllerData->id()) - .detail("Workers", controllerWorker->workerInterfaces.size()) - .detail("RecruitedRoles", replies.size()); - - return Void(); -} - -ACTOR Future distributeRestoreSysInfo(Reference controllerData, - KeyRangeMap* pRangeVersions) { - ASSERT(controllerData.isValid()); - ASSERT(!controllerData->loadersInterf.empty()); - RestoreSysInfo sysInfo(controllerData->appliersInterf); - // Construct serializable KeyRange versions - Standalone>> rangeVersionsVec; - auto ranges = pRangeVersions->ranges(); - int i = 0; - for (auto r = ranges.begin(); r != ranges.end(); ++r) { - rangeVersionsVec.push_back(rangeVersionsVec.arena(), - std::make_pair(KeyRangeRef(r->begin(), r->end()), r->value())); - TraceEvent("DistributeRangeVersions") - .detail("RangeIndex", i++) - .detail("RangeBegin", r->begin()) - .detail("RangeEnd", r->end()) - .detail("RangeVersion", r->value()); - } - std::vector> requests; - for (auto& loader : controllerData->loadersInterf) { - requests.emplace_back(loader.first, RestoreSysInfoRequest(sysInfo, rangeVersionsVec)); - } - - TraceEvent("FastRestoreDistributeRestoreSysInfoToLoaders", controllerData->id()) - .detail("Loaders", controllerData->loadersInterf.size()); - wait(sendBatchRequests(&RestoreLoaderInterface::updateRestoreSysInfo, controllerData->loadersInterf, requests)); - TraceEvent("FastRestoreDistributeRestoreSysInfoToLoadersDone", controllerData->id()) - .detail("Loaders", controllerData->loadersInterf.size()); - - return Void(); -} - -// The server of the restore controller. It drives the restore progress with the following steps: -// 1) Lock database and clear the normal keyspace -// 2) Wait on each RestoreRequest, which is sent by RestoreTool operated by DBA -// 3) Process each restore request in actor processRestoreRequest; -// 3.1) Sample workload to decide the key range for each applier, which is implemented as a dummy sampling; -// 3.2) Send each loader the map of key-range to applier interface; -// 3.3) Construct requests of which file should be loaded by which loader, and send requests to loaders; -// 4) After process all restore requests, finish restore by cleaning up the restore related system key -// and ask all restore roles to quit. -ACTOR Future startProcessRestoreRequests(Reference self, Database cx) { - state std::vector restoreRequests = wait(collectRestoreRequests(cx)); - state int restoreIndex = 0; - - TraceEvent("FastRestoreControllerWaitOnRestoreRequests", self->id()) - .detail("RestoreRequests", restoreRequests.size()); - - // TODO: Sanity check restoreRequests' key ranges do not overlap - - // Step: Perform the restore requests - try { - for (restoreIndex = 0; restoreIndex < restoreRequests.size(); restoreIndex++) { - state RestoreRequest request = restoreRequests[restoreIndex]; - state KeyRange range = request.range.removePrefix(request.removePrefix).withPrefix(request.addPrefix); - TraceEvent("FastRestoreControllerProcessRestoreRequests", self->id()) - .detail("RestoreRequestInfo", request.toString()) - .detail("TransformedKeyRange", range); - // TODO: Initialize controllerData and all loaders and appliers' data for each restore request! - self->resetPerRestoreRequest(); - - // clear the key range that will be restored - wait(runRYWTransaction(cx, [=](Reference tr) -> Future { - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - tr->clear(range); - return Void(); - })); - - wait(success(processRestoreRequest(self, cx, request))); - wait(notifyRestoreCompleted(self, false)); - } - } catch (Error& e) { - if (restoreIndex < restoreRequests.size()) { - TraceEvent(SevError, "FastRestoreControllerProcessRestoreRequestsFailed", self->id()) - .error(e) - .detail("RestoreRequest", restoreRequests[restoreIndex].toString()); - } else { - TraceEvent(SevError, "FastRestoreControllerProcessRestoreRequestsFailed", self->id()) - .error(e) - .detail("RestoreRequests", restoreRequests.size()) - .detail("RestoreIndex", restoreIndex); - } - } - - // Step: Notify all restore requests have been handled by cleaning up the restore keys - wait(signalRestoreCompleted(self, cx)); - - TraceEvent("FastRestoreControllerRestoreCompleted", self->id()); - - return Void(); -} - -ACTOR static Future monitorFinishedVersion(Reference self, RestoreRequest request) { - loop { - TraceEvent("FastRestoreMonitorFinishedVersion", self->id()) - .detail("RestoreRequest", request.toString()) - .detail("BatchIndex", self->finishedBatch.get()); - wait(delay(SERVER_KNOBS->FASTRESTORE_VB_MONITOR_DELAY)); - } -} - -ACTOR static Future processRestoreRequest(Reference self, - Database cx, - RestoreRequest request) { - state std::vector rangeFiles; - state std::vector logFiles; - state std::vector allFiles; - state Version minRangeVersion = MAX_VERSION; - - self->initBackupContainer(request.url, request.proxy); - - // Get all backup files' description and save them to files - state Version targetVersion = - wait(collectBackupFiles(self->bc, &rangeFiles, &logFiles, &minRangeVersion, cx, request)); - ASSERT(targetVersion > 0); - ASSERT(minRangeVersion != MAX_VERSION); // otherwise, all mutations will be skipped - - std::sort(rangeFiles.begin(), rangeFiles.end()); - std::sort(logFiles.begin(), logFiles.end(), [](RestoreFileFR const& f1, RestoreFileFR const& f2) -> bool { - return std::tie(f1.endVersion, f1.beginVersion, f1.fileIndex, f1.fileName) < - std::tie(f2.endVersion, f2.beginVersion, f2.fileIndex, f2.fileName); - }); - - // Build range versions: version of key ranges in range file - state KeyRangeMap rangeVersions(minRangeVersion, allKeys.end); - if (SERVER_KNOBS->FASTRESTORE_GET_RANGE_VERSIONS_EXPENSIVE) { - wait(buildRangeVersions(&rangeVersions, &rangeFiles, request.url, request.proxy, cx)); - } else { - // Debug purpose, dump range versions - auto ranges = rangeVersions.ranges(); - int i = 0; - for (auto r = ranges.begin(); r != ranges.end(); ++r) { - TraceEvent(SevDebug, "SingleRangeVersion") - .detail("RangeIndex", i++) - .detail("RangeBegin", r->begin()) - .detail("RangeEnd", r->end()) - .detail("RangeVersion", r->value()); - } - } - - wait(distributeRestoreSysInfo(self, &rangeVersions)); - - // Divide files into version batches. - self->buildVersionBatches(rangeFiles, logFiles, &self->versionBatches, targetVersion); - self->dumpVersionBatches(self->versionBatches); - - state std::vector> fBatches; - state std::vector versionBatches; // To randomize invoking order of version batchs - for (auto& vb : self->versionBatches) { - versionBatches.push_back(vb.second); - } - - // releaseVBOutOfOrder can only be true in simulation - state bool releaseVBOutOfOrder = g_network->isSimulated() ? deterministicRandom()->random01() < 0.5 : false; - ASSERT(g_network->isSimulated() || !releaseVBOutOfOrder); - if (releaseVBOutOfOrder) { - // Randomize invoking order of version batches - int permTimes = deterministicRandom()->randomInt(0, 100); - while (permTimes-- > 0) { - std::next_permutation(versionBatches.begin(), versionBatches.end()); - } - } - - self->addActor.send(monitorFinishedVersion(self, request)); - state std::vector::iterator versionBatch = versionBatches.begin(); - for (; versionBatch != versionBatches.end(); versionBatch++) { - while (self->runningVersionBatches.get() >= SERVER_KNOBS->FASTRESTORE_VB_PARALLELISM && !releaseVBOutOfOrder) { - // Control how many batches can be processed in parallel. Avoid dead lock due to OOM on loaders - TraceEvent("FastRestoreControllerDispatchVersionBatches") - .detail("WaitOnRunningVersionBatches", self->runningVersionBatches.get()); - wait(self->runningVersionBatches.onChange()); - } - int batchIndex = versionBatch->batchIndex; - TraceEvent("FastRestoreControllerDispatchVersionBatches") - .detail("BatchIndex", batchIndex) - .detail("BatchSize", versionBatch->size) - .detail("RunningVersionBatches", self->runningVersionBatches.get()) - .detail("VersionBatches", versionBatches.size()); - self->batch[batchIndex] = makeReference(); - self->batchStatus[batchIndex] = makeReference(); - fBatches.push_back(distributeWorkloadPerVersionBatch(self, batchIndex, cx, request, *versionBatch)); - // Wait a bit to give the current version batch a head start from the next version batch - wait(delay(SERVER_KNOBS->FASTRESTORE_VB_LAUNCH_DELAY)); - } - - try { - wait(waitForAll(fBatches)); - } catch (Error& e) { - TraceEvent(SevError, "FastRestoreControllerDispatchVersionBatchesUnexpectedError").error(e); - } - - TraceEvent("FastRestoreController").detail("RestoreToVersion", request.targetVersion); - return request.targetVersion; -} - -ACTOR static Future loadFilesOnLoaders(Reference batchData, - Reference batchStatus, - std::map loadersInterf, - int batchIndex, - Database cx, - RestoreRequest request, - VersionBatch versionBatch, - bool isRangeFile) { - // set is internally sorted - std::set* files = isRangeFile ? &versionBatch.rangeFiles : &versionBatch.logFiles; - - TraceEvent("FastRestoreControllerPhaseLoadFilesStart") - .detail("RestoreRequestID", request.randomUid) - .detail("BatchIndex", batchIndex) - .detail("FileTypeLoadedInVersionBatch", isRangeFile) - .detail("BeginVersion", versionBatch.beginVersion) - .detail("EndVersion", versionBatch.endVersion) - .detail("Files", (files != nullptr ? files->size() : -1)); - - std::vector> requests; - std::map::iterator loader = loadersInterf.begin(); - state std::vector assets; // all assets loaded, used for sanity check restore progress - - // Balance workload on loaders for parsing range and log files across version batches - int random = deterministicRandom()->randomInt(0, loadersInterf.size()); - while (random-- > 0) { - loader++; - } - - int paramIdx = 0; - for (auto& file : *files) { - // TODO: Allow empty files in version batch; Filter out them here. - if (loader == loadersInterf.end()) { - loader = loadersInterf.begin(); - } - // Prepare loading - LoadingParam param; - param.url = request.url; - param.isRangeFile = file.isRange; - param.rangeVersion = file.isRange ? file.version : -1; - param.blockSize = file.blockSize; - - param.asset.uid = deterministicRandom()->randomUniqueID(); - param.asset.filename = file.fileName; - param.asset.fileIndex = file.fileIndex; - param.asset.partitionId = file.partitionId; - param.asset.offset = 0; - param.asset.len = file.fileSize; - param.asset.range = request.range; - param.asset.beginVersion = versionBatch.beginVersion; - param.asset.endVersion = (isRangeFile || request.targetVersion == -1) - ? versionBatch.endVersion - : std::min(versionBatch.endVersion, request.targetVersion + 1); - param.asset.addPrefix = request.addPrefix; - param.asset.removePrefix = request.removePrefix; - param.asset.batchIndex = batchIndex; - - TraceEvent("FastRestoreControllerPhaseLoadFiles") - .detail("BatchIndex", batchIndex) - .detail("LoadParamIndex", paramIdx) - .detail("LoaderID", loader->first.toString()) - .detail("LoadParam", param.toString()); - ASSERT_WE_THINK(param.asset.len > 0); - ASSERT_WE_THINK(param.asset.offset >= 0); - ASSERT_WE_THINK(param.asset.offset <= file.fileSize); - ASSERT_WE_THINK(param.asset.beginVersion <= param.asset.endVersion); - - requests.emplace_back(loader->first, RestoreLoadFileRequest(batchIndex, param)); - // Restore asset should only be loaded exactly once. - if (batchStatus->raStatus.find(param.asset) != batchStatus->raStatus.end()) { - TraceEvent(SevError, "FastRestoreControllerPhaseLoadFiles") - .detail("LoadingParam", param.toString()) - .detail("RestoreAssetAlreadyProcessed", batchStatus->raStatus[param.asset]); - } - batchStatus->raStatus[param.asset] = RestoreAssetStatus::Loading; - assets.push_back(param.asset); - ++loader; - ++paramIdx; - } - TraceEvent(files->size() != paramIdx ? SevError : SevInfo, "FastRestoreControllerPhaseLoadFiles") - .detail("BatchIndex", batchIndex) - .detail("Files", files->size()) - .detail("LoadParams", paramIdx); - - state std::vector replies; - // Wait on the batch of load files or log files - wait(getBatchReplies( - &RestoreLoaderInterface::loadFile, loadersInterf, requests, &replies, TaskPriority::RestoreLoaderLoadFiles)); - - TraceEvent("FastRestoreControllerPhaseLoadFilesReply") - .detail("BatchIndex", batchIndex) - .detail("SamplingReplies", replies.size()); - for (auto& reply : replies) { - // Update and sanity check restore asset's status - RestoreAssetStatus status = batchStatus->raStatus[reply.param.asset]; - if (status == RestoreAssetStatus::Loading && !reply.isDuplicated) { - batchStatus->raStatus[reply.param.asset] = RestoreAssetStatus::Loaded; - } else if (status == RestoreAssetStatus::Loading && reply.isDuplicated) { - // Duplicate request wait on the restore asset to be processed before it replies - batchStatus->raStatus[reply.param.asset] = RestoreAssetStatus::Loaded; - TraceEvent(SevWarn, "FastRestoreControllerPhaseLoadFilesReply") - .detail("RestoreAsset", reply.param.asset.toString()) - .detail("DuplicateRequestArriveEarly", "RestoreAsset should have been processed"); - } else if (status == RestoreAssetStatus::Loaded && reply.isDuplicated) { - TraceEvent(SevDebug, "FastRestoreControllerPhaseLoadFilesReply") - .detail("RestoreAsset", reply.param.asset.toString()) - .detail("RequestIgnored", "Loading request was sent more than once"); - } else { - TraceEvent(SevError, "FastRestoreControllerPhaseLoadFilesReply") - .detail("RestoreAsset", reply.param.asset.toString()) - .detail("UnexpectedReply", reply.toString()); - } - } - - // Sanity check: all restore assets status should be Loaded - for (auto& asset : assets) { - if (batchStatus->raStatus[asset] != RestoreAssetStatus::Loaded) { - TraceEvent(SevError, "FastRestoreControllerPhaseLoadFilesReply") - .detail("RestoreAsset", asset.toString()) - .detail("UnexpectedStatus", batchStatus->raStatus[asset]); - } - } - - TraceEvent("FastRestoreControllerPhaseLoadFilesDone") - .detail("BatchIndex", batchIndex) - .detail("FileTypeLoadedInVersionBatch", isRangeFile) - .detail("BeginVersion", versionBatch.beginVersion) - .detail("EndVersion", versionBatch.endVersion); - return Void(); -} - -// Ask loaders to send its buffered mutations to appliers -ACTOR static Future sendMutationsFromLoaders(Reference batchData, - Reference batchStatus, - std::map loadersInterf, - int batchIndex, - bool useRangeFile) { - TraceEvent("FastRestoreControllerPhaseSendMutationsFromLoadersStart") - .detail("BatchIndex", batchIndex) - .detail("UseRangeFiles", useRangeFile) - .detail("Loaders", loadersInterf.size()); - - std::vector> requests; - for (auto& loader : loadersInterf) { - requests.emplace_back( - loader.first, RestoreSendMutationsToAppliersRequest(batchIndex, batchData->rangeToApplier, useRangeFile)); - batchStatus->loadStatus[loader.first] = - useRangeFile ? RestoreSendStatus::SendingRanges : RestoreSendStatus::SendingLogs; - } - state std::vector replies; - wait(getBatchReplies(&RestoreLoaderInterface::sendMutations, - loadersInterf, - requests, - &replies, - TaskPriority::RestoreLoaderSendMutations)); - - TraceEvent("FastRestoreControllerPhaseSendMutationsFromLoadersDone") - .detail("BatchIndex", batchIndex) - .detail("UseRangeFiles", useRangeFile) - .detail("Loaders", loadersInterf.size()); - - return Void(); -} - -// Process a version batch. Phases (loading files, send mutations) should execute in order -ACTOR static Future distributeWorkloadPerVersionBatch(Reference self, - int batchIndex, - Database cx, - RestoreRequest request, - VersionBatch versionBatch) { - state Reference batchData = self->batch[batchIndex]; - state Reference batchStatus = self->batchStatus[batchIndex]; - state double startTime = now(); - - TraceEvent("FastRestoreControllerDispatchVersionBatchesStart", self->id()) - .detail("BatchIndex", batchIndex) - .detail("BatchSize", versionBatch.size) - .detail("RunningVersionBatches", self->runningVersionBatches.get()); - - self->runningVersionBatches.set(self->runningVersionBatches.get() + 1); - - // In case sampling data takes too much memory on controller - wait(isSchedulable(self, batchIndex, __FUNCTION__)); - - wait(initializeVersionBatch(self->appliersInterf, self->loadersInterf, batchIndex)); - - ASSERT(!versionBatch.isEmpty()); - ASSERT(self->loadersInterf.size() > 0); - ASSERT(self->appliersInterf.size() > 0); - - // Parse log files and send mutations to appliers before we parse range files - // TODO: Allow loading both range and log files in parallel - ASSERT(batchData->samples.empty()); - ASSERT(batchData->samplesSize < 1 && batchData->samplesSize > -1); // samplesSize should be 0 - ASSERT(batchStatus->raStatus.empty()); - ASSERT(batchStatus->loadStatus.empty()); - ASSERT(batchStatus->applyStatus.empty()); - - // New backup has subversion to order mutations at the same version. For mutations at the same version, - // range file's mutations have the largest subversion and larger than log file's. - // SOMEDAY: Extend subversion to old-style backup. - wait( - loadFilesOnLoaders(batchData, batchStatus, self->loadersInterf, batchIndex, cx, request, versionBatch, false) && - loadFilesOnLoaders(batchData, batchStatus, self->loadersInterf, batchIndex, cx, request, versionBatch, true)); - - ASSERT(batchData->rangeToApplier.empty()); - splitKeyRangeForAppliers(batchData, self->appliersInterf, batchIndex); - - // Ask loaders to send parsed mutations to appliers; - // log mutations should be applied before range mutations at the same version, which is ensured by LogMessageVersion - wait(sendMutationsFromLoaders(batchData, batchStatus, self->loadersInterf, batchIndex, false) && - sendMutationsFromLoaders(batchData, batchStatus, self->loadersInterf, batchIndex, true)); - - // Synchronization point for version batch pipelining. - // self->finishedBatch will continuously increase by 1 per version batch. - wait(notifyApplierToApplyMutations(batchData, batchStatus, self->appliersInterf, batchIndex, &self->finishedBatch)); - - wait(notifyLoadersVersionBatchFinished(self->loadersInterf, batchIndex)); - - self->runningVersionBatches.set(self->runningVersionBatches.get() - 1); - - if (self->delayedActors > 0) { - self->checkMemory.trigger(); - } - - TraceEvent("FastRestoreControllerDispatchVersionBatchesDone", self->id()) - .detail("BatchIndex", batchIndex) - .detail("BatchSize", versionBatch.size) - .detail("RunningVersionBatches", self->runningVersionBatches.get()) - .detail("Latency", now() - startTime); - - return Void(); -} - -// Decide which key range should be taken by which applier -// Input: samples in batchData -// Output: rangeToApplier in batchData -void splitKeyRangeForAppliers(Reference batchData, - std::map appliersInterf, - int batchIndex) { - ASSERT(batchData->samplesSize >= 0); - // Sanity check: samples should not be used after freed - ASSERT((batchData->samplesSize > 0 && !batchData->samples.empty()) || - (batchData->samplesSize == 0 && batchData->samples.empty())); - int numAppliers = appliersInterf.size(); - double slotSize = std::max(batchData->samplesSize / numAppliers, 1.0); - double cumulativeSize = slotSize; - TraceEvent("FastRestoreControllerPhaseCalculateApplierKeyRangesStart") - .detail("BatchIndex", batchIndex) - .detail("SamplingSize", batchData->samplesSize) - .detail("SlotSize", slotSize); - - std::set keyrangeSplitter; // unique key to split key range for appliers - keyrangeSplitter.insert(allKeys.begin); // First slot - TraceEvent("FastRestoreControllerPhaseCalculateApplierKeyRanges") - .detail("BatchIndex", batchIndex) - .detail("CumulativeSize", cumulativeSize) - .detail("Slot", 0) - .detail("LowerBoundKey", allKeys.begin); - int slotIdx = 1; - while (cumulativeSize < batchData->samplesSize) { - IndexedSet::iterator lowerBound = batchData->samples.index(cumulativeSize); - if (lowerBound == batchData->samples.end()) { - break; - } - keyrangeSplitter.insert(*lowerBound); - TraceEvent("FastRestoreControllerPhaseCalculateApplierKeyRanges") - .detail("BatchIndex", batchIndex) - .detail("CumulativeSize", cumulativeSize) - .detail("Slot", slotIdx++) - .detail("LowerBoundKey", lowerBound->toString()); - cumulativeSize += slotSize; - } - if (keyrangeSplitter.size() < numAppliers) { - TraceEvent(SevWarnAlways, "FastRestoreControllerPhaseCalculateApplierKeyRanges") - .detail("NotAllAppliersAreUsed", keyrangeSplitter.size()) - .detail("NumAppliers", numAppliers); - } else if (keyrangeSplitter.size() > numAppliers) { - bool expected = (keyrangeSplitter.size() == numAppliers + 1); - TraceEvent(expected ? SevWarn : SevError, "FastRestoreControllerPhaseCalculateApplierKeyRanges") - .detail("TooManySlotsThanAppliers", keyrangeSplitter.size()) - .detail("NumAppliers", numAppliers) - .detail("SamplingSize", batchData->samplesSize) - .detail("PerformanceMayDegrade", "Last applier handles more data than others"); - } - - std::set::iterator splitter = keyrangeSplitter.begin(); - batchData->rangeToApplier.clear(); - for (auto& applier : appliersInterf) { - if (splitter == keyrangeSplitter.end()) { - break; // Not all appliers will be used - } - batchData->rangeToApplier[*splitter] = applier.first; - splitter++; - } - ASSERT(batchData->rangeToApplier.size() > 0); - ASSERT(batchData->sanityCheckApplierKeyRange()); - batchData->logApplierKeyRange(batchIndex); - TraceEvent("FastRestoreControllerPhaseCalculateApplierKeyRangesDone") - .detail("BatchIndex", batchIndex) - .detail("SamplingSize", batchData->samplesSize) - .detail("SlotSize", slotSize); - batchData->samples.clear(); -} - -ACTOR static Future> collectRestoreRequests(Database cx) { - state std::vector restoreRequests; - state Future watch4RestoreRequest; - state ReadYourWritesTransaction tr(cx); - - // restoreRequestTriggerKey should already been set - loop { - try { - TraceEvent("FastRestoreControllerPhaseCollectRestoreRequestsWait").log(); - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - - // Sanity check - Optional numRequests = wait(tr.get(restoreRequestTriggerKey)); - ASSERT(numRequests.present()); - - RangeResult restoreRequestValues = wait(tr.getRange(restoreRequestKeys, CLIENT_KNOBS->TOO_MANY)); - ASSERT(!restoreRequestValues.more); - if (restoreRequestValues.size()) { - for (auto& it : restoreRequestValues) { - restoreRequests.push_back(decodeRestoreRequestValue(it.value)); - TraceEvent("FastRestoreControllerPhaseCollectRestoreRequests") - .detail("RestoreRequest", restoreRequests.back().toString()); - } - break; - } else { - TraceEvent(SevError, "FastRestoreControllerPhaseCollectRestoreRequestsEmptyRequests").log(); - wait(delay(5.0)); - } - } catch (Error& e) { - wait(tr.onError(e)); - } - } - - return restoreRequests; -} - -// Collect the backup files' description into output_files by reading the backupContainer bc. -// Returns the restore target version. -ACTOR static Future collectBackupFiles(Reference bc, - std::vector* rangeFiles, - std::vector* logFiles, - Version* minRangeVersion, - Database cx, - RestoreRequest request) { - state BackupDescription desc = wait(bc->describeBackup()); - - // Convert version to real time for operators to read the BackupDescription desc. - wait(desc.resolveVersionTimes(cx)); - - if (request.targetVersion == invalidVersion && desc.maxRestorableVersion.present()) { - request.targetVersion = desc.maxRestorableVersion.get(); - } - - TraceEvent("FastRestoreControllerPhaseCollectBackupFilesStart") - .detail("TargetVersion", request.targetVersion) - .detail("BackupDesc", desc.toString()) - .detail("UseRangeFile", SERVER_KNOBS->FASTRESTORE_USE_RANGE_FILE) - .detail("UseLogFile", SERVER_KNOBS->FASTRESTORE_USE_LOG_FILE); - if (g_network->isSimulated()) { - std::cout << "Restore to version: " << request.targetVersion << "\nBackupDesc: \n" << desc.toString() << "\n\n"; - } - - state VectorRef restoreRanges; - restoreRanges.add(request.range); - Optional restorable = wait(bc->getRestoreSet(request.targetVersion, restoreRanges)); - - if (!restorable.present()) { - TraceEvent(SevWarn, "FastRestoreControllerPhaseCollectBackupFiles") - .detail("NotRestorable", request.targetVersion); - throw restore_missing_data(); - } - - ASSERT(rangeFiles->empty()); - ASSERT(logFiles->empty()); - - std::set uniqueRangeFiles; - std::set uniqueLogFiles; - double rangeSize = 0; - double logSize = 0; - *minRangeVersion = MAX_VERSION; - if (SERVER_KNOBS->FASTRESTORE_USE_RANGE_FILE) { - for (const RangeFile& f : restorable.get().ranges) { - TraceEvent(SevFRDebugInfo, "FastRestoreControllerPhaseCollectBackupFiles") - .detail("RangeFile", f.toString()); - if (f.fileSize <= 0) { - continue; - } - RestoreFileFR file(f); - TraceEvent(SevFRDebugInfo, "FastRestoreControllerPhaseCollectBackupFiles") - .detail("RangeFileFR", file.toString()); - uniqueRangeFiles.insert(file); - rangeSize += file.fileSize; - *minRangeVersion = std::min(*minRangeVersion, file.version); - } - } - if (MAX_VERSION == *minRangeVersion) { - *minRangeVersion = 0; // If no range file, range version must be 0 so that we apply all mutations - } - - if (SERVER_KNOBS->FASTRESTORE_USE_LOG_FILE) { - for (const LogFile& f : restorable.get().logs) { - TraceEvent(SevFRDebugInfo, "FastRestoreControllerPhaseCollectBackupFiles").detail("LogFile", f.toString()); - if (f.fileSize <= 0) { - continue; - } - RestoreFileFR file(f); - TraceEvent(SevFRDebugInfo, "FastRestoreControllerPhaseCollectBackupFiles") - .detail("LogFileFR", file.toString()); - logFiles->push_back(file); - uniqueLogFiles.insert(file); - logSize += file.fileSize; - } - } - - // Assign unique range files and log files to output - rangeFiles->assign(uniqueRangeFiles.begin(), uniqueRangeFiles.end()); - logFiles->assign(uniqueLogFiles.begin(), uniqueLogFiles.end()); - - TraceEvent("FastRestoreControllerPhaseCollectBackupFilesDone") - .detail("BackupDesc", desc.toString()) - .detail("RangeFiles", rangeFiles->size()) - .detail("LogFiles", logFiles->size()) - .detail("RangeFileBytes", rangeSize) - .detail("LogFileBytes", logSize) - .detail("UseRangeFile", SERVER_KNOBS->FASTRESTORE_USE_RANGE_FILE) - .detail("UseLogFile", SERVER_KNOBS->FASTRESTORE_USE_LOG_FILE); - return request.targetVersion; -} - -// By the first and last block of *file to get (beginKey, endKey); -// set (beginKey, endKey) and file->version to pRangeVersions -ACTOR static Future insertRangeVersion(KeyRangeMap* pRangeVersions, - RestoreFileFR* file, - Reference bc, - Database cx) { - TraceEvent("FastRestoreControllerDecodeRangeVersion").detail("File", file->toString()); - RangeFile rangeFile = { file->version, (uint32_t)file->blockSize, file->fileName, file->fileSize }; - - // First and last key are the range for this file: endKey is exclusive - KeyRange fileRange = wait(bc->getSnapshotFileKeyRange(rangeFile, cx)); - TraceEvent("FastRestoreControllerInsertRangeVersion") - .detail("DecodedRangeFile", file->fileName) - .detail("KeyRange", fileRange) - .detail("Version", file->version); - // Update version for pRangeVersions's ranges in fileRange - auto ranges = pRangeVersions->modify(fileRange); - for (auto r = ranges.begin(); r != ranges.end(); ++r) { - r->value() = std::max(r->value(), file->version); - } - - if (SERVER_KNOBS->FASTRESTORE_DUMP_INSERT_RANGE_VERSION) { - // Dump the new key ranges for debugging purpose. - ranges = pRangeVersions->ranges(); - int i = 0; - for (auto r = ranges.begin(); r != ranges.end(); ++r) { - TraceEvent(SevDebug, "RangeVersionsAfterUpdate") - .detail("File", file->toString()) - .detail("FileRange", fileRange.toString()) - .detail("FileVersion", file->version) - .detail("RangeIndex", i++) - .detail("RangeBegin", r->begin()) - .detail("RangeEnd", r->end()) - .detail("RangeVersion", r->value()); - } - } - - return Void(); -} - -// Build the version skyline of snapshot ranges by parsing range files; -// Expensive and slow operation that should not run in real prod. -ACTOR static Future buildRangeVersions(KeyRangeMap* pRangeVersions, - std::vector* pRangeFiles, - Key url, - Optional proxy, - Database cx) { - if (!g_network->isSimulated()) { - TraceEvent(SevError, "ExpensiveBuildRangeVersions") - .detail("Reason", "Parsing all range files is slow and memory intensive"); - return Void(); - } - Reference bc = IBackupContainer::openContainer(url.toString(), proxy, {}); - - // Key ranges not in range files are empty; - // Assign highest version to avoid applying any mutation in these ranges - state int fileIndex = 0; - state std::vector> fInsertRangeVersions; - for (; fileIndex < pRangeFiles->size(); ++fileIndex) { - fInsertRangeVersions.push_back(insertRangeVersion(pRangeVersions, &pRangeFiles->at(fileIndex), bc, cx)); - } - - wait(waitForAll(fInsertRangeVersions)); - - return Void(); -} - -/* -ACTOR static Future clearDB(Database cx) { - wait(runRYWTransaction(cx, [](Reference tr) -> Future { - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - tr->clear(normalKeys); - return Void(); - })); - - return Void(); -} -*/ - -ACTOR static Future initializeVersionBatch(std::map appliersInterf, - std::map loadersInterf, - int batchIndex) { - TraceEvent("FastRestoreControllerPhaseInitVersionBatchForAppliersStart") - .detail("BatchIndex", batchIndex) - .detail("Appliers", appliersInterf.size()); - std::vector> requestsToAppliers; - requestsToAppliers.reserve(appliersInterf.size()); - for (auto& applier : appliersInterf) { - requestsToAppliers.emplace_back(applier.first, RestoreVersionBatchRequest(batchIndex)); - } - wait(sendBatchRequests(&RestoreApplierInterface::initVersionBatch, appliersInterf, requestsToAppliers)); - - TraceEvent("FastRestoreControllerPhaseInitVersionBatchForLoaders") - .detail("BatchIndex", batchIndex) - .detail("Loaders", loadersInterf.size()); - std::vector> requestsToLoaders; - requestsToLoaders.reserve(loadersInterf.size()); - for (auto& loader : loadersInterf) { - requestsToLoaders.emplace_back(loader.first, RestoreVersionBatchRequest(batchIndex)); - } - wait(sendBatchRequests(&RestoreLoaderInterface::initVersionBatch, loadersInterf, requestsToLoaders)); - - TraceEvent("FastRestoreControllerPhaseInitVersionBatchForAppliersDone").detail("BatchIndex", batchIndex); - return Void(); -} - -// Calculate the amount of data each applier should keep outstanding to DB; -// This is the amount of data that are in in-progress transactions. -ACTOR static Future updateApplierWriteBW(Reference batchData, - std::map appliersInterf, - int batchIndex) { - state std::unordered_map applierRemainMB; - state double totalRemainMB = SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB; - state double standardAvgBW = SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB / SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS; - state int loopCount = 0; - state std::vector replies; - state std::vector> requests; - for (auto& applier : appliersInterf) { - applierRemainMB[applier.first] = SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB / SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS; - } - - loop { - requests.clear(); - for (auto& applier : appliersInterf) { - double writeRate = totalRemainMB > 1 ? (applierRemainMB[applier.first] / totalRemainMB) * - SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB - : standardAvgBW; - requests.emplace_back(applier.first, RestoreUpdateRateRequest(batchIndex, writeRate)); - } - replies.clear(); - wait(getBatchReplies( - &RestoreApplierInterface::updateRate, - appliersInterf, - requests, - &replies, - TaskPriority::DefaultEndpoint)); // DefaultEndpoint has higher priority than fast restore endpoints - ASSERT(replies.size() == requests.size()); - totalRemainMB = 0; - for (int i = 0; i < replies.size(); i++) { - UID& applierID = requests[i].first; - applierRemainMB[applierID] = replies[i].remainMB; - totalRemainMB += replies[i].remainMB; - } - ASSERT(totalRemainMB >= 0); - double delayTime = SERVER_KNOBS->FASTRESTORE_RATE_UPDATE_SECONDS; - if (loopCount == 0) { // First loop: Need to update writeRate quicker - delayTime = 0.2; - } - loopCount++; - wait(delay(delayTime)); - } -} - -// Ask each applier to apply its received mutations to DB -// NOTE: Controller cannot start applying mutations at batchIndex until all appliers have applied for (batchIndex - 1) -// because appliers at different batchIndex may have overlapped key ranges. -ACTOR static Future notifyApplierToApplyMutations(Reference batchData, - Reference batchStatus, - std::map appliersInterf, - int batchIndex, - NotifiedVersion* finishedBatch) { - TraceEvent("FastRestoreControllerPhaseApplyToDBStart") - .detail("BatchIndex", batchIndex) - .detail("FinishedBatch", finishedBatch->get()); - - wait(finishedBatch->whenAtLeast(batchIndex - 1)); - - state Future updateRate; - - if (finishedBatch->get() == batchIndex - 1) { - // Prepare the applyToDB requests - std::vector> requests; - - TraceEvent("FastRestoreControllerPhaseApplyToDBRunning") - .detail("BatchIndex", batchIndex) - .detail("Appliers", appliersInterf.size()); - for (auto& applier : appliersInterf) { - ASSERT(batchStatus->applyStatus.find(applier.first) == batchStatus->applyStatus.end()); - requests.emplace_back(applier.first, RestoreVersionBatchRequest(batchIndex)); - batchStatus->applyStatus[applier.first] = RestoreApplyStatus::Applying; - } - state std::vector replies; - // The actor at each batchIndex should only occur once. - // Use batchData->applyToDB just in case the actor at a batchIndex is executed more than once. - if (!batchData->applyToDB.present()) { - batchData->applyToDB = Never(); - batchData->applyToDB = getBatchReplies(&RestoreApplierInterface::applyToDB, - appliersInterf, - requests, - &replies, - TaskPriority::RestoreApplierWriteDB); - updateRate = updateApplierWriteBW(batchData, appliersInterf, batchIndex); - } else { - TraceEvent(SevError, "FastRestoreControllerPhaseApplyToDB") - .detail("BatchIndex", batchIndex) - .detail("Attention", "Actor should not be invoked twice for the same batch index"); - } - - ASSERT(batchData->applyToDB.present()); - ASSERT(!batchData->applyToDB.get().isError()); - wait(batchData->applyToDB.get()); - - // Sanity check all appliers have applied data to destination DB - for (auto& reply : replies) { - if (batchStatus->applyStatus[reply.id] == RestoreApplyStatus::Applying) { - batchStatus->applyStatus[reply.id] = RestoreApplyStatus::Applied; - if (reply.isDuplicated) { - TraceEvent(SevWarn, "FastRestoreControllerPhaseApplyToDB") - .detail("Applier", reply.id) - .detail("DuplicateRequestReturnEarlier", "Apply db request should have been processed"); - } - } - } - for (auto& applier : appliersInterf) { - if (batchStatus->applyStatus[applier.first] != RestoreApplyStatus::Applied) { - TraceEvent(SevError, "FastRestoreControllerPhaseApplyToDB") - .detail("Applier", applier.first) - .detail("ApplyStatus", batchStatus->applyStatus[applier.first]); - } - } - finishedBatch->set(batchIndex); - } - - TraceEvent("FastRestoreControllerPhaseApplyToDBDone") - .detail("BatchIndex", batchIndex) - .detail("FinishedBatch", finishedBatch->get()); - - return Void(); -} - -// Notify loaders that all data in the version batch has been applied to DB. -ACTOR static Future notifyLoadersVersionBatchFinished(std::map loadersInterf, - int batchIndex) { - TraceEvent("FastRestoreControllerPhaseNotifyLoadersVersionBatchFinishedStart").detail("BatchIndex", batchIndex); - std::vector> requestsToLoaders; - requestsToLoaders.reserve(loadersInterf.size()); - for (auto& loader : loadersInterf) { - requestsToLoaders.emplace_back(loader.first, RestoreVersionBatchRequest(batchIndex)); - } - wait(sendBatchRequests(&RestoreLoaderInterface::finishVersionBatch, loadersInterf, requestsToLoaders)); - TraceEvent("FastRestoreControllerPhaseNotifyLoadersVersionBatchFinishedDone").detail("BatchIndex", batchIndex); - - return Void(); -} - -// Ask all loaders and appliers to perform housecleaning at the end of a restore request -// Terminate those roles if terminate = true -ACTOR static Future notifyRestoreCompleted(Reference self, bool terminate = false) { - std::vector> requests; - TraceEvent("FastRestoreControllerPhaseNotifyRestoreCompletedStart").log(); - for (auto& loader : self->loadersInterf) { - requests.emplace_back(loader.first, RestoreFinishRequest(terminate)); - } - - Future endLoaders = sendBatchRequests(&RestoreLoaderInterface::finishRestore, self->loadersInterf, requests); - - requests.clear(); - for (auto& applier : self->appliersInterf) { - requests.emplace_back(applier.first, RestoreFinishRequest(terminate)); - } - Future endAppliers = - sendBatchRequests(&RestoreApplierInterface::finishRestore, self->appliersInterf, requests); - - // If terminate = true, loaders and appliers exits immediately after it receives the request. Controller may not - // receive acks. - if (!terminate) { - wait(endLoaders && endAppliers); - } - - TraceEvent("FastRestoreControllerPhaseNotifyRestoreCompletedDone").log(); - - return Void(); -} - -// Register the restoreRequestDoneKey to signal the end of restore -ACTOR static Future signalRestoreCompleted(Reference self, Database cx) { - state Reference tr(new ReadYourWritesTransaction(cx)); - - wait(notifyRestoreCompleted(self, true)); // notify workers the restore has completed - - wait(delay(5.0)); // Give some time for loaders and appliers to exit - - // Notify tester that the restore has finished - loop { - try { - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - tr->clear(restoreRequestTriggerKey); - tr->clear(restoreRequestKeys); - Version readVersion = wait(tr->getReadVersion()); - tr->set(restoreRequestDoneKey, restoreRequestDoneVersionValue(readVersion)); - wait(tr->commit()); - break; - } catch (Error& e) { - wait(tr->onError(e)); - } - } - - TraceEvent("FastRestoreControllerAllRestoreCompleted").log(); - - return Void(); -} - -/* -// Update the most recent time when controller receives heartbeat from each loader and applier -// TODO: Replace the heartbeat mechanism with FDB failure monitoring mechanism -ACTOR static Future updateHeartbeatTime(Reference self) { - wait(self->recruitedRoles.getFuture()); - - int numRoles = self->loadersInterf.size() + self->appliersInterf.size(); - state std::map::iterator loader = self->loadersInterf.begin(); - state std::map::iterator applier = self->appliersInterf.begin(); - state std::vector> fReplies(numRoles, Never()); // TODO: Reserve memory for this vector - state std::vector nodes; - state int index = 0; - state Future fTimeout = Void(); - - // Initialize nodes only once - std::transform(self->loadersInterf.begin(), - self->loadersInterf.end(), - std::back_inserter(nodes), - [](const std::pair& in) { return in.first; }); - std::transform(self->appliersInterf.begin(), - self->appliersInterf.end(), - std::back_inserter(nodes), - [](const std::pair& in) { return in.first; }); - - loop { - loader = self->loadersInterf.begin(); - applier = self->appliersInterf.begin(); - index = 0; - std::fill(fReplies.begin(), fReplies.end(), Never()); - // ping loaders and appliers - while (loader != self->loadersInterf.end()) { - fReplies[index] = loader->second.heartbeat.getReply(RestoreSimpleRequest()); - loader++; - index++; - } - while (applier != self->appliersInterf.end()) { - fReplies[index] = applier->second.heartbeat.getReply(RestoreSimpleRequest()); - applier++; - index++; - } - - fTimeout = delay(SERVER_KNOBS->FASTRESTORE_HEARTBEAT_DELAY); - - // Here we have to handle error, otherwise controller worker will fail and exit. - try { - wait(waitForAll(fReplies) || fTimeout); - } catch (Error& e) { - // This should be an ignorable error. - TraceEvent(g_network->isSimulated() ? SevWarnAlways : SevError, "FastRestoreUpdateHeartbeatError").error(e); - } - - // Update the most recent heart beat time for each role - for (int i = 0; i < fReplies.size(); ++i) { - if (!fReplies[i].isError() && fReplies[i].isReady()) { - double currentTime = now(); - auto item = self->rolesHeartBeatTime.emplace(nodes[i], currentTime); - item.first->second = currentTime; - } - } - wait(fTimeout); // Ensure not updating heartbeat too quickly - } -} -*/ - -// Check if a restore role dies or disconnected -ACTOR static Future checkRolesLiveness(Reference self) { - loop { - wait(delay(SERVER_KNOBS->FASTRESTORE_HEARTBEAT_MAX_DELAY)); - for (auto& role : self->rolesHeartBeatTime) { - if (now() - role.second > SERVER_KNOBS->FASTRESTORE_HEARTBEAT_MAX_DELAY) { - TraceEvent(SevWarnAlways, "FastRestoreUnavailableRole", role.first) - .detail("Delta", now() - role.second) - .detail("LastAliveTime", role.second); - } - } - } -} diff --git a/fdbserver/RestoreLoader.actor.cpp b/fdbserver/RestoreLoader.actor.cpp deleted file mode 100644 index dce213e8083..00000000000 --- a/fdbserver/RestoreLoader.actor.cpp +++ /dev/null @@ -1,1605 +0,0 @@ -/* - * RestoreLoader.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This file implements the functions and actors used by the RestoreLoader role. -// The RestoreLoader role starts with the restoreLoaderCore actor - -#include "fdbclient/BlobCipher.h" -#include "fdbclient/CommitProxyInterface.h" -#include "flow/UnitTest.h" -#include "fdbclient/BackupContainer.h" -#include "fdbclient/BackupAgent.actor.h" -#include "fdbclient/GetEncryptCipherKeys.h" -#include "fdbclient/DatabaseContext.h" -#include "fdbserver/RestoreLoader.actor.h" -#include "fdbserver/RestoreRoleCommon.actor.h" -#include "fdbserver/MutationTracking.h" -#include "fdbserver/StorageMetrics.actor.h" - -#include "flow/actorcompiler.h" // This must be the last #include. - -// SerializedMutationListMap: Buffered mutation lists from data blocks in log files -// Key is the signature/version of the mutation list; Value.first is the mutation list which may come from multiple -// data blocks of log file; Value.second is the largest part number of the mutation list, which is used to sanity check -// the data blocks for the same mutation list are concatenated in increasing order of part number. -typedef std::map, std::pair, uint32_t>> SerializedMutationListMap; - -std::vector getApplierIDs(std::map& rangeToApplier); -void splitMutation(const KeyRangeMap& krMap, - MutationRef m, - Arena& mvector_arena, - VectorRef& mvector, - Arena& nodeIDs_arena, - VectorRef& nodeIDs); -ACTOR Future _parseSerializedMutation(KeyRangeMap* pRangeVersions, - std::map::iterator kvOpsIter, - SerializedMutationListMap* mutationMap, - std::map::iterator samplesIter, - LoaderCounters* cc, - RestoreAsset asset, - Database cx); - -void handleRestoreSysInfoRequest(const RestoreSysInfoRequest& req, Reference self); -ACTOR Future handleLoadFileRequest(RestoreLoadFileRequest req, Reference self, Database cx); -ACTOR Future handleSendMutationsRequest(RestoreSendMutationsToAppliersRequest req, - Reference self, - Database cx); -ACTOR Future sendMutationsToApplier( - std::priority_queue* sendLoadParamQueue, - std::map* inflightSendLoadParamReqs, - NotifiedVersion* finishedBatch, - VersionedMutationsMap* pkvOps, - int batchIndex, - RestoreAsset asset, - bool isRangeFile, - std::map* pRangeToApplier, - std::map* pApplierInterfaces, - Database cx); -ACTOR static Future _parseLogFileToMutationsOnLoader(NotifiedVersion* pProcessedFileOffset, - SerializedMutationListMap* mutationMap, - Reference bc, - RestoreAsset asset); -ACTOR static Future parseLogFileToMutationsOnLoader(NotifiedVersion* pProcessedFileOffset, - SerializedMutationListMap* mutationMap, - Reference bc, - RestoreAsset asset); -ACTOR static Future _parseRangeFileToMutationsOnLoader( - std::map::iterator kvOpsIter, - std::map::iterator samplesIter, - LoaderCounters* cc, - Reference bc, - Version version, - RestoreAsset asset, - Database cx); -ACTOR Future handleFinishVersionBatchRequest(RestoreVersionBatchRequest req, Reference self); - -// Dispatch requests based on node's business (i.e, cpu usage for now) and requests' priorities -// Requests for earlier version batches are preferred; which is equivalent to -// sendMuttionsRequests are preferred than loadingFileRequests -ACTOR Future dispatchRequests(Reference self, Database cx) { - try { - state int curVBInflightReqs = 0; - state int sendLoadParams = 0; - state int lastLoadReqs = 0; - loop { - TraceEvent(SevVerbose, "FastRestoreLoaderDispatchRequests", self->id()) - .detail("SendingQueue", self->sendingQueue.size()) - .detail("LoadingQueue", self->loadingQueue.size()) - .detail("SendingLoadParamQueue", self->sendLoadParamQueue.size()) - .detail("InflightSendingReqs", self->inflightSendingReqs) - .detail("InflightSendingReqsThreshold", SERVER_KNOBS->FASTRESTORE_SCHED_INFLIGHT_SEND_REQS) - .detail("InflightLoadingReqs", self->inflightLoadingReqs) - .detail("InflightLoadingReqsThreshold", SERVER_KNOBS->FASTRESTORE_SCHED_INFLIGHT_LOAD_REQS) - .detail("LastLoadFileRequests", lastLoadReqs) - .detail("LoadFileRequestsBatchThreshold", SERVER_KNOBS->FASTRESTORE_SCHED_LOAD_REQ_BATCHSIZE) - .detail("LastDispatchSendLoadParamReqsForCurrentVB", curVBInflightReqs) - .detail("LastDispatchSendLoadParamReqsForFutureVB", sendLoadParams) - .detail("CpuUsage", self->cpuUsage) - .detail("TargetCpuUsage", SERVER_KNOBS->FASTRESTORE_SCHED_TARGET_CPU_PERCENT) - .detail("MaxCpuUsage", SERVER_KNOBS->FASTRESTORE_SCHED_MAX_CPU_PERCENT); - - // TODO: Pop old requests whose version batch <= finishedBatch.get() - // TODO2: Simulate delayed request can be too old by introducing artificial delay - if (SERVER_KNOBS->FASTRESTORE_EXPENSIVE_VALIDATION) { - // Sanity check: All requests before and in finishedBatch must have been processed; otherwise, - // those requests may cause segmentation fault after applier remove the batch data - if (!self->loadingQueue.empty() && self->loadingQueue.top().batchIndex <= self->finishedBatch.get()) { - // Still has pending requests from earlier batchIndex and current batchIndex, which should not - // happen - TraceEvent(SevError, "FastRestoreLoaderSchedulerHasOldLoadFileRequests") - .detail("FinishedBatchIndex", self->finishedBatch.get()) - .detail("PendingRequest", self->loadingQueue.top().toString()); - } - if (!self->sendingQueue.empty() && self->sendingQueue.top().batchIndex <= self->finishedBatch.get()) { - TraceEvent(SevError, "FastRestoreLoaderSchedulerHasOldSendRequests") - .detail("FinishedBatchIndex", self->finishedBatch.get()) - .detail("PendingRequest", self->sendingQueue.top().toString()); - } - if (!self->sendLoadParamQueue.empty() && - self->sendLoadParamQueue.top().batchIndex <= self->finishedBatch.get()) { - TraceEvent(SevError, "FastRestoreLoaderSchedulerHasOldSendLoadParamRequests") - .detail("FinishedBatchIndex", self->finishedBatch.get()) - .detail("PendingRequest", self->sendLoadParamQueue.top().toString()); - } - } - - if (!self->sendingQueue.empty()) { - // Only release one sendMutationRequest at a time because it sends all data for a version batch - // and it takes large amount of resource - const RestoreSendMutationsToAppliersRequest& req = self->sendingQueue.top(); - // Dispatch the request if it is the next version batch to process or if cpu usage is low - if (req.batchIndex - 1 == self->finishedSendingVB || - self->cpuUsage < SERVER_KNOBS->FASTRESTORE_SCHED_TARGET_CPU_PERCENT) { - self->addActor.send(handleSendMutationsRequest(req, self, cx)); - self->sendingQueue.pop(); - } - } - // When shall the node pause the process of other requests, e.g., load file requests - // TODO: Revisit if we should have (self->inflightSendingReqs > 0 && self->inflightLoadingReqs > 0) - if ((self->inflightSendingReqs > 0 && self->inflightLoadingReqs > 0) && - (self->inflightSendingReqs >= SERVER_KNOBS->FASTRESTORE_SCHED_INFLIGHT_SEND_REQS || - self->inflightLoadingReqs >= SERVER_KNOBS->FASTRESTORE_SCHED_INFLIGHT_LOAD_REQS || - (self->inflightSendingReqs >= 1 && - self->cpuUsage >= SERVER_KNOBS->FASTRESTORE_SCHED_TARGET_CPU_PERCENT) || - self->cpuUsage >= SERVER_KNOBS->FASTRESTORE_SCHED_MAX_CPU_PERCENT)) { - if (self->inflightSendingReqs >= SERVER_KNOBS->FASTRESTORE_SCHED_INFLIGHT_SEND_REQS) { - TraceEvent(SevWarn, "FastRestoreLoaderTooManyInflightRequests") - .detail("VersionBatchesBlockedAtSendingMutationsToAppliers", self->inflightSendingReqs) - .detail("CpuUsage", self->cpuUsage) - .detail("InflightSendingReq", self->inflightSendingReqs) - .detail("InflightSendingReqThreshold", SERVER_KNOBS->FASTRESTORE_SCHED_INFLIGHT_SEND_REQS) - .detail("InflightLoadingReq", self->inflightLoadingReqs) - .detail("InflightLoadingReqThreshold", SERVER_KNOBS->FASTRESTORE_SCHED_INFLIGHT_LOAD_REQS); - } - wait(delay(SERVER_KNOBS->FASTRESTORE_SCHED_UPDATE_DELAY)); - updateProcessStats(self); - continue; - } - // Dispatch queued requests of sending mutations per loading param - while (!self->sendLoadParamQueue.empty()) { // dispatch current VB first - const RestoreLoaderSchedSendLoadParamRequest& req = self->sendLoadParamQueue.top(); - if (req.batchIndex - 1 > self->finishedSendingVB) { // future VB - break; - } else { - req.toSched.send(Void()); - self->sendLoadParamQueue.pop(); - } - } - sendLoadParams = 0; - curVBInflightReqs = self->inflightSendLoadParamReqs[self->finishedSendingVB + 1]; - while (!self->sendLoadParamQueue.empty()) { - const RestoreLoaderSchedSendLoadParamRequest& req = self->sendLoadParamQueue.top(); - if (curVBInflightReqs >= SERVER_KNOBS->FASTRESTORE_SCHED_INFLIGHT_SENDPARAM_THRESHOLD || - sendLoadParams >= SERVER_KNOBS->FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH) { - // Too many future VB requests are released - break; - } else { - req.toSched.send(Void()); - self->sendLoadParamQueue.pop(); - sendLoadParams++; - } - } - - // Dispatch loading backup file requests - lastLoadReqs = 0; - while (!self->loadingQueue.empty()) { - if (lastLoadReqs >= SERVER_KNOBS->FASTRESTORE_SCHED_LOAD_REQ_BATCHSIZE) { - break; - } - const RestoreLoadFileRequest& req = self->loadingQueue.top(); - if (req.batchIndex <= self->finishedBatch.get()) { - TraceEvent(SevError, "FastRestoreLoaderDispatchRestoreLoadFileRequestTooOld") - .detail("FinishedBatchIndex", self->finishedBatch.get()) - .detail("RequestBatchIndex", req.batchIndex); - req.reply.send(RestoreLoadFileReply(req.param, true)); - self->loadingQueue.pop(); - ASSERT(false); // Check if this ever happens easily - } else { - self->addActor.send(handleLoadFileRequest(req, self, cx)); - self->loadingQueue.pop(); - lastLoadReqs++; - } - } - - if (self->cpuUsage >= SERVER_KNOBS->FASTRESTORE_SCHED_TARGET_CPU_PERCENT) { - wait(delay(SERVER_KNOBS->FASTRESTORE_SCHED_UPDATE_DELAY)); - } - updateProcessStats(self); - - if (self->loadingQueue.empty() && self->sendingQueue.empty() && self->sendLoadParamQueue.empty()) { - TraceEvent(SevVerbose, "FastRestoreLoaderDispatchRequestsWaitOnRequests", self->id()) - .detail("HasPendingRequests", self->hasPendingRequests->get()); - self->hasPendingRequests->set(false); - wait(self->hasPendingRequests->onChange()); // CAREFUL:Improper req release may cause restore stuck here - } - } - } catch (Error& e) { - if (e.code() != error_code_actor_cancelled) { - TraceEvent(SevError, "FastRestoreLoaderDispatchRequests").errorUnsuppressed(e); - throw e; - } - } - return Void(); -} - -ACTOR Future restoreLoaderCore(RestoreLoaderInterface loaderInterf, - int nodeIndex, - Database cx, - RestoreControllerInterface ci) { - state Reference self = makeReference(loaderInterf.id(), nodeIndex, ci); - state Future error = actorCollection(self->addActor.getFuture()); - state ActorCollection actors(false); // actors whose errors can be ignored - state Future exitRole = Never(); - state bool hasQueuedRequests = false; - - actors.add(updateProcessMetrics(self)); - actors.add(traceProcessMetrics(self, "RestoreLoader")); - - self->addActor.send(dispatchRequests(self, cx)); - - loop { - state std::string requestTypeStr = "[Init]"; - - try { - choose { - when(RestoreSimpleRequest req = waitNext(loaderInterf.heartbeat.getFuture())) { - requestTypeStr = "heartbeat"; - actors.add(handleHeartbeat(req, loaderInterf.id())); - } - when(RestoreSysInfoRequest req = waitNext(loaderInterf.updateRestoreSysInfo.getFuture())) { - requestTypeStr = "updateRestoreSysInfo"; - handleRestoreSysInfoRequest(req, self); - } - when(RestoreLoadFileRequest req = waitNext(loaderInterf.loadFile.getFuture())) { - requestTypeStr = "loadFile"; - hasQueuedRequests = !self->loadingQueue.empty() || !self->sendingQueue.empty(); - self->initBackupContainer(req.param.url, req.param.proxy); - self->loadingQueue.push(req); - if (!hasQueuedRequests) { - self->hasPendingRequests->set(true); - } - } - when(RestoreSendMutationsToAppliersRequest req = waitNext(loaderInterf.sendMutations.getFuture())) { - requestTypeStr = "sendMutations"; - hasQueuedRequests = !self->loadingQueue.empty() || !self->sendingQueue.empty(); - self->sendingQueue.push(req); - if (!hasQueuedRequests) { - self->hasPendingRequests->set(true); - } - } - when(RestoreVersionBatchRequest req = waitNext(loaderInterf.initVersionBatch.getFuture())) { - requestTypeStr = "initVersionBatch"; - actors.add(handleInitVersionBatchRequest(req, self)); - } - when(RestoreVersionBatchRequest req = waitNext(loaderInterf.finishVersionBatch.getFuture())) { - requestTypeStr = "finishVersionBatch"; - actors.add(handleFinishVersionBatchRequest(req, self)); - } - when(RestoreFinishRequest req = waitNext(loaderInterf.finishRestore.getFuture())) { - requestTypeStr = "finishRestore"; - handleFinishRestoreRequest(req, self); - if (req.terminate) { - exitRole = Void(); - } - } - when(wait(actors.getResult())) {} - when(wait(exitRole)) { - TraceEvent("FastRestoreLoaderCoreExitRole", self->id()); - break; - } - when(wait(error)) { - TraceEvent("FastRestoreLoaderActorCollectionError", self->id()); - } - } - } catch (Error& e) { - bool isError = e.code() != error_code_operation_cancelled; // == error_code_broken_promise - TraceEvent(isError ? SevError : SevWarnAlways, "FastRestoreLoaderError", self->id()) - .errorUnsuppressed(e) - .detail("RequestType", requestTypeStr); - actors.clear(false); - break; - } - } - - return Void(); -} - -static inline bool _logMutationTooOld(KeyRangeMap* pRangeVersions, KeyRangeRef keyRange, Version v) { - ASSERT(pRangeVersions != nullptr); - auto ranges = pRangeVersions->intersectingRanges(keyRange); - Version minVersion = MAX_VERSION; - for (auto r = ranges.begin(); r != ranges.end(); ++r) { - minVersion = std::min(minVersion, r->value()); - } - ASSERT(minVersion != MAX_VERSION); // pRangeVersions is initialized as entire keyspace, ranges cannot be empty - return minVersion >= v; -} - -static inline bool logMutationTooOld(KeyRangeMap* pRangeVersions, MutationRef mutation, Version v) { - return isRangeMutation(mutation) - ? _logMutationTooOld(pRangeVersions, KeyRangeRef(mutation.param1, mutation.param2), v) - : _logMutationTooOld(pRangeVersions, KeyRangeRef(singleKeyRange(mutation.param1)), v); -} - -// Assume: Only update the local data if it (applierInterf) has not been set -void handleRestoreSysInfoRequest(const RestoreSysInfoRequest& req, Reference self) { - TraceEvent("FastRestoreLoader", self->id()).detail("HandleRestoreSysInfoRequest", self->id()); - ASSERT(self.isValid()); - - // The loader has received the appliers interfaces - if (!self->appliersInterf.empty()) { - req.reply.send(RestoreCommonReply(self->id())); - return; - } - - self->appliersInterf = req.sysInfo.appliers; - // Update rangeVersions - ASSERT(req.rangeVersions.size() > 0); // At least the min version of range files will be used - ASSERT(self->rangeVersions.size() == 1); // rangeVersions has not been set - for (auto rv = req.rangeVersions.begin(); rv != req.rangeVersions.end(); ++rv) { - self->rangeVersions.insert(rv->first, rv->second); - } - - // Debug message for range version in each loader - auto ranges = self->rangeVersions.ranges(); - int i = 0; - for (auto r = ranges.begin(); r != ranges.end(); ++r) { - TraceEvent("FastRestoreLoader", self->id()) - .detail("RangeIndex", i++) - .detail("RangeBegin", r->begin()) - .detail("RangeEnd", r->end()) - .detail("Version", r->value()); - } - - req.reply.send(RestoreCommonReply(self->id())); -} - -ACTOR static Future _decryptMutation(MutationRef mutation, Database cx, Arena* arena) { - ASSERT(mutation.isEncrypted()); - - Reference const> dbInfo = cx->clientInfo; - std::unordered_set cipherDetails; - mutation.updateEncryptCipherDetails(cipherDetails); - std::unordered_map> getCipherKeysResult = wait( - GetEncryptCipherKeys::getEncryptCipherKeys(dbInfo, cipherDetails, BlobCipherMetrics::BACKUP)); - return mutation.decrypt(getCipherKeysResult, *arena, BlobCipherMetrics::BACKUP); -} - -// Parse a data block in a partitioned mutation log file and store mutations -// into "kvOpsIter" and samples into "samplesIter". -ACTOR static Future _parsePartitionedLogFileOnLoader( - KeyRangeMap* pRangeVersions, - NotifiedVersion* processedFileOffset, - std::map::iterator kvOpsIter, - std::map::iterator samplesIter, - LoaderCounters* cc, - Reference bc, - RestoreAsset asset, - Database cx) { - state Standalone buf = makeString(asset.len); - state Reference file = wait(bc->readFile(asset.filename)); - int rLen = wait(file->read(mutateString(buf), asset.len, asset.offset)); - if (rLen != asset.len) - throw restore_bad_read(); - - simulateBlobFailure(); - - TraceEvent("FastRestoreLoaderDecodingLogFile") - .detail("BatchIndex", asset.batchIndex) - .detail("Filename", asset.filename) - .detail("Offset", asset.offset) - .detail("Length", asset.len); - - state Arena tempArena; - state StringRefReader reader(buf, restore_corrupted_data()); - try { - // Read block header - if (reader.consume() != PARTITIONED_MLOG_VERSION) - throw restore_unsupported_file_version(); - - state VersionedMutationsMap* kvOps = &kvOpsIter->second; - while (1) { - // If eof reached or first key len bytes is 0xFF then end of block was reached. - if (reader.eof() || *reader.rptr == 0xFF) - break; - - // Deserialize messages written in saveMutationsToFile(). - state LogMessageVersion msgVersion; - msgVersion.version = reader.consumeNetworkUInt64(); - msgVersion.sub = reader.consumeNetworkUInt32(); - state int msgSize = reader.consumeNetworkInt32(); - state const uint8_t* message = reader.consume(msgSize); - - // Skip mutations out of the version range - if (!asset.isInVersionRange(msgVersion.version)) { - wait(yield()); // avoid potential stack overflows - continue; - } - - state VersionedMutationsMap::iterator it; - bool inserted; - std::tie(it, inserted) = kvOps->emplace(msgVersion, MutationsVec()); - // A clear mutation can be split into multiple mutations with the same (version, sub). - // See saveMutationsToFile(). Current tests only use one key range per backup, thus - // only one clear mutation is generated (i.e., always inserted). - ASSERT(inserted); - - ArenaReader rd(buf.arena(), StringRef(message, msgSize), AssumeVersion(g_network->protocolVersion())); - state MutationRef mutation; - rd >> mutation; - if (mutation.isEncrypted()) { - MutationRef decryptedMutation = wait(_decryptMutation(mutation, cx, &tempArena)); - mutation = decryptedMutation; - } - - // Skip mutation whose commitVesion < range kv's version - if (logMutationTooOld(pRangeVersions, mutation, msgVersion.version)) { - cc->oldLogMutations += 1; - wait(yield()); // avoid potential stack overflows - continue; - } - - // Should this mutation be skipped? - if (mutation.param1 >= asset.range.end || - (isRangeMutation(mutation) && mutation.param2 < asset.range.begin) || - (!isRangeMutation(mutation) && mutation.param1 < asset.range.begin)) { - wait(yield()); // avoid potential stack overflows - continue; - } - - // Only apply mutation within the asset.range - ASSERT(asset.removePrefix.size() == 0); - if (isRangeMutation(mutation)) { - mutation.param1 = mutation.param1 >= asset.range.begin ? mutation.param1 : asset.range.begin; - mutation.param2 = mutation.param2 < asset.range.end ? mutation.param2 : asset.range.end; - // Remove prefix or add prefix when we restore to a new key space - if (asset.hasPrefix()) { // Avoid creating new Key - mutation.param1 = - mutation.param1.removePrefix(asset.removePrefix).withPrefix(asset.addPrefix, tempArena); - mutation.param2 = - mutation.param2.removePrefix(asset.removePrefix).withPrefix(asset.addPrefix, tempArena); - } - } else { - if (asset.hasPrefix()) { // Avoid creating new Key - mutation.param1 = - mutation.param1.removePrefix(asset.removePrefix).withPrefix(asset.addPrefix, tempArena); - } - } - - TraceEvent(SevFRMutationInfo, "FastRestoreDecodePartitionedLogFile") - .detail("CommitVersion", msgVersion.toString()) - .detail("ParsedMutation", mutation.toString()); - it->second.push_back_deep(it->second.arena(), mutation); - cc->loadedLogBytes += mutation.totalSize(); - // Sampling data similar to SS sample kvs - ByteSampleInfo sampleInfo = isKeyValueInSample(KeyValueRef(mutation.param1, mutation.param2)); - if (sampleInfo.inSample) { - cc->sampledLogBytes += sampleInfo.sampledSize; - samplesIter->second.push_back_deep(samplesIter->second.arena(), - SampledMutation(mutation.param1, sampleInfo.sampledSize)); - } - } - - // Make sure any remaining bytes in the block are 0xFF - for (auto b : reader.remainder()) { - if (b != 0xFF) - throw restore_corrupted_data_padding(); - } - } catch (Error& e) { - TraceEvent(SevWarn, "FileRestoreCorruptLogFileBlock") - .error(e) - .detail("BatchIndex", asset.batchIndex) - .detail("Filename", file->getFilename()) - .detail("BlockOffset", asset.offset) - .detail("BlockLen", asset.len); - throw; - } - return Void(); -} - -// wrapper of _parsePartitionedLogFileOnLoader to retry on blob error -ACTOR static Future parsePartitionedLogFileOnLoader( - KeyRangeMap* pRangeVersions, - NotifiedVersion* processedFileOffset, - std::map::iterator kvOpsIter, - std::map::iterator samplesIter, - LoaderCounters* cc, - Reference bc, - RestoreAsset asset, - Database cx) { - state int readFileRetries = 0; - loop { - try { - // Ensure data blocks in the same file are processed in order - wait(processedFileOffset->whenAtLeast(asset.offset)); - ASSERT(processedFileOffset->get() == asset.offset); - - wait(_parsePartitionedLogFileOnLoader( - pRangeVersions, processedFileOffset, kvOpsIter, samplesIter, cc, bc, asset, cx)); - processedFileOffset->set(asset.offset + asset.len); - - TraceEvent("FastRestoreLoaderDecodingLogFileDone") - .detail("BatchIndex", asset.batchIndex) - .detail("Filename", asset.filename) - .detail("Offset", asset.offset) - .detail("Length", asset.len); - break; - } catch (Error& e) { - if (e.code() == error_code_restore_bad_read || e.code() == error_code_restore_unsupported_file_version || - e.code() == error_code_restore_corrupted_data_padding) { // no retriable error - TraceEvent(SevError, "FastRestoreFileRestoreCorruptedPartitionedLogFileBlock").error(e); - throw; - } else if (e.code() == error_code_http_request_failed || e.code() == error_code_connection_failed || - e.code() == error_code_timed_out || e.code() == error_code_lookup_failed) { - // blob http request failure, retry - TraceEvent(SevWarnAlways, "FastRestoreDecodedPartitionedLogFileConnectionFailure") - .error(e) - .detail("Retries", ++readFileRetries); - wait(delayJittered(0.1)); - } else { - TraceEvent(SevError, "FastRestoreParsePartitionedLogFileOnLoaderUnexpectedError").error(e); - throw; - } - } - } - return Void(); -} - -ACTOR Future _processLoadingParam(KeyRangeMap* pRangeVersions, - LoadingParam param, - Reference batchData, - UID loaderID, - Reference bc, - Database cx) { - // Temporary data structure for parsing log files into (version, ) - // Must use StandAlone to save mutations, otherwise, the mutationref memory will be corrupted - // mutationMap: Key is the unique identifier for a batch of mutation logs at the same version - state SerializedMutationListMap mutationMap; - state NotifiedVersion processedFileOffset(0); - state std::vector> fileParserFutures; - state std::map::iterator kvOpsPerLPIter = batchData->kvOpsPerLP.end(); - state std::map::iterator samplesIter = batchData->sampleMutations.end(); - - TraceEvent("FastRestoreLoaderProcessLoadingParam", loaderID) - .detail("BatchIndex", param.asset.batchIndex) - .detail("LoadingParam", param.toString()); - ASSERT(param.blockSize > 0); - ASSERT(param.asset.offset % param.blockSize == 0); // Parse file must be at block boundary. - ASSERT(batchData->kvOpsPerLP.find(param) == batchData->kvOpsPerLP.end()); - - // NOTE: map's iterator is guaranteed to be stable, but pointer may not. - bool inserted; - std::tie(kvOpsPerLPIter, inserted) = batchData->kvOpsPerLP.emplace(param, VersionedMutationsMap()); - ASSERT(inserted); - std::tie(samplesIter, inserted) = batchData->sampleMutations.emplace(param, SampledMutationsVec()); - ASSERT(inserted); - - for (int64_t j = param.asset.offset; j < param.asset.len; j += param.blockSize) { - RestoreAsset subAsset = param.asset; - subAsset.offset = j; - subAsset.len = std::min(param.blockSize, param.asset.len - j); - if (param.isRangeFile) { - fileParserFutures.push_back(_parseRangeFileToMutationsOnLoader( - kvOpsPerLPIter, samplesIter, &batchData->counters, bc, param.rangeVersion.get(), subAsset, cx)); - } else { - // TODO: Sanity check the log file's range is overlapped with the restored version range - if (param.isPartitionedLog()) { - fileParserFutures.push_back(parsePartitionedLogFileOnLoader(pRangeVersions, - &processedFileOffset, - kvOpsPerLPIter, - samplesIter, - &batchData->counters, - bc, - subAsset, - cx)); - } else { - fileParserFutures.push_back( - parseLogFileToMutationsOnLoader(&processedFileOffset, &mutationMap, bc, subAsset)); - } - } - } - wait(waitForAll(fileParserFutures)); - - if (!param.isRangeFile && !param.isPartitionedLog()) { - wait(_parseSerializedMutation( - pRangeVersions, kvOpsPerLPIter, &mutationMap, samplesIter, &batchData->counters, param.asset, cx)); - } - - TraceEvent("FastRestoreLoaderProcessLoadingParamDone", loaderID) - .detail("BatchIndex", param.asset.batchIndex) - .detail("LoadingParam", param.toString()); - - return Void(); -} - -// A loader can process multiple RestoreLoadFileRequest in parallel. -ACTOR Future handleLoadFileRequest(RestoreLoadFileRequest req, Reference self, Database cx) { - state Reference batchData = self->batch[req.batchIndex]; - state bool isDuplicated = true; - state bool printTrace = false; - ASSERT(batchData.isValid()); - ASSERT(req.batchIndex > self->finishedBatch.get()); - bool paramExist = batchData->processedFileParams.find(req.param) != batchData->processedFileParams.end(); - bool isReady = paramExist ? batchData->processedFileParams[req.param].isReady() : false; - - batchData->loadFileReqs += 1; - printTrace = (batchData->loadFileReqs % 10 == 1); - // TODO: Make the actor priority lower than sendMutation priority. (Unsure it will help performance though) - TraceEvent(printTrace ? SevInfo : SevFRDebugInfo, "FastRestoreLoaderPhaseLoadFile", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("ProcessLoadParam", req.param.toString()) - .detail("NotProcessed", !paramExist) - .detail("Processed", isReady) - .detail("CurrentMemory", getSystemStatistics().processMemory); - // Loader destroy batchData once the batch finishes and self->finishedBatch.set(req.batchIndex); - ASSERT(self->finishedBatch.get() < req.batchIndex); - - wait(isSchedulable(self, req.batchIndex, __FUNCTION__)); - - if (batchData->processedFileParams.find(req.param) == batchData->processedFileParams.end()) { - TraceEvent(SevFRDebugInfo, "FastRestoreLoadFile", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("ProcessLoadParam", req.param.toString()); - ASSERT(batchData->sampleMutations.find(req.param) == batchData->sampleMutations.end()); - batchData->processedFileParams[req.param] = - _processLoadingParam(&self->rangeVersions, req.param, batchData, self->id(), self->bc, cx); - self->inflightLoadingReqs++; - isDuplicated = false; - } else { - TraceEvent(SevFRDebugInfo, "FastRestoreLoadFile", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("WaitOnProcessLoadParam", req.param.toString()); - } - auto it = batchData->processedFileParams.find(req.param); - ASSERT(it != batchData->processedFileParams.end()); - wait(it->second); // wait on the processing of the req.param. - - // Send sampled mutations back to controller: batchData->sampleMutations[req.param] - std::vector> fSendSamples; - SampledMutationsVec& samples = batchData->sampleMutations[req.param]; - SampledMutationsVec sampleBatch = SampledMutationsVec(); // sampleBatch: Standalone pointer to the created object - long sampleBatchSize = 0; - for (int i = 0; i < samples.size(); ++i) { - sampleBatchSize += samples[i].totalSize(); - sampleBatch.push_back_deep(sampleBatch.arena(), samples[i]); // TODO: may not need deep copy - if (sampleBatchSize >= SERVER_KNOBS->FASTRESTORE_SAMPLE_MSG_BYTES) { - fSendSamples.push_back(self->ci.samples.getReply( - RestoreSamplesRequest(deterministicRandom()->randomUniqueID(), req.batchIndex, sampleBatch))); - sampleBatchSize = 0; - sampleBatch = SampledMutationsVec(); - } - } - if (sampleBatchSize > 0) { - fSendSamples.push_back(self->ci.samples.getReply( - RestoreSamplesRequest(deterministicRandom()->randomUniqueID(), req.batchIndex, sampleBatch))); - sampleBatchSize = 0; - } - - state int samplesMessages = fSendSamples.size(); - try { - wait(waitForAll(fSendSamples)); - } catch (Error& e) { // In case ci.samples throws broken_promise due to unstable network - if (e.code() == error_code_broken_promise || e.code() == error_code_operation_cancelled) { - TraceEvent(SevWarnAlways, "FastRestoreLoaderPhaseLoadFileSendSamples") - .errorUnsuppressed(e) - .detail("SamplesMessages", samplesMessages); - } else { - TraceEvent(SevError, "FastRestoreLoaderPhaseLoadFileSendSamplesUnexpectedError").errorUnsuppressed(e); - } - } - - // Ack restore controller the param is processed - self->inflightLoadingReqs--; - req.reply.send(RestoreLoadFileReply(req.param, isDuplicated)); - TraceEvent(printTrace ? SevInfo : SevFRDebugInfo, "FastRestoreLoaderPhaseLoadFileDone", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("ProcessLoadParam", req.param.toString()); - - return Void(); -} - -// Send buffered mutations to appliers. -// Do not need to block on low memory usage because this actor should not increase memory usage. -ACTOR Future handleSendMutationsRequest(RestoreSendMutationsToAppliersRequest req, - Reference self, - Database cx) { - state Reference batchData; - state Reference batchStatus; - state bool isDuplicated = true; - - if (req.batchIndex <= self->finishedBatch.get()) { - TraceEvent(SevWarn, "FastRestoreLoaderRestoreSendMutationsToAppliersRequestTooOld") - .detail("FinishedBatchIndex", self->finishedBatch.get()) - .detail("RequestBatchIndex", req.batchIndex); - req.reply.send(RestoreCommonReply(self->id(), isDuplicated)); - return Void(); - } - - batchData = self->batch[req.batchIndex]; - batchStatus = self->status[req.batchIndex]; - ASSERT(batchData.isValid() && batchStatus.isValid()); - // Loader destroy batchData once the batch finishes and self->finishedBatch.set(req.batchIndex); - ASSERT(req.batchIndex > self->finishedBatch.get()); - TraceEvent("FastRestoreLoaderPhaseSendMutations", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("UseRangeFile", req.useRangeFile) - .detail("LoaderSendStatus", batchStatus->toString()); - // The VB must finish loading phase before it can send mutations; update finishedLoadingVB for scheduler - self->finishedLoadingVB = std::max(self->finishedLoadingVB, req.batchIndex); - - // Ensure each file is sent exactly once by using batchStatus->sendAllLogs and batchStatus->sendAllRanges - if (!req.useRangeFile) { - if (!batchStatus->sendAllLogs.present()) { // Has not sent - batchStatus->sendAllLogs = Never(); - isDuplicated = false; - TraceEvent(SevInfo, "FastRestoreSendMutationsProcessLogRequest", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("UseRangeFile", req.useRangeFile); - } else if (!batchStatus->sendAllLogs.get().isReady()) { // In the process of sending - TraceEvent(SevDebug, "FastRestoreSendMutationsWaitDuplicateLogRequest", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("UseRangeFile", req.useRangeFile); - wait(batchStatus->sendAllLogs.get()); - } else { // Already sent - TraceEvent(SevDebug, "FastRestoreSendMutationsSkipDuplicateLogRequest", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("UseRangeFile", req.useRangeFile); - } - } else { - if (!batchStatus->sendAllRanges.present()) { - batchStatus->sendAllRanges = Never(); - isDuplicated = false; - TraceEvent(SevInfo, "FastRestoreSendMutationsProcessRangeRequest", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("UseRangeFile", req.useRangeFile); - } else if (!batchStatus->sendAllRanges.get().isReady()) { - TraceEvent(SevDebug, "FastRestoreSendMutationsWaitDuplicateRangeRequest", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("UseRangeFile", req.useRangeFile); - wait(batchStatus->sendAllRanges.get()); - } else { - TraceEvent(SevDebug, "FastRestoreSendMutationsSkipDuplicateRangeRequest", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("UseRangeFile", req.useRangeFile); - } - } - - if (!isDuplicated) { - self->inflightSendingReqs++; - std::vector> fSendMutations; - batchData->rangeToApplier = req.rangeToApplier; - for (auto& [loadParam, kvOps] : batchData->kvOpsPerLP) { - if (loadParam.isRangeFile == req.useRangeFile) { - // Send the parsed mutation to applier who will apply the mutation to DB - fSendMutations.push_back(sendMutationsToApplier(&self->sendLoadParamQueue, - &self->inflightSendLoadParamReqs, - &self->finishedBatch, - &kvOps, - req.batchIndex, - loadParam.asset, - loadParam.isRangeFile, - &batchData->rangeToApplier, - &self->appliersInterf, - cx)); - } - } - wait(waitForAll(fSendMutations)); - self->inflightSendingReqs--; - if (req.useRangeFile) { - batchStatus->sendAllRanges = Void(); // Finish sending kvs parsed from range files - } else { - batchStatus->sendAllLogs = Void(); - } - if ((batchStatus->sendAllRanges.present() && batchStatus->sendAllRanges.get().isReady()) && - (batchStatus->sendAllLogs.present() && batchStatus->sendAllLogs.get().isReady())) { - // Both log and range files have been sent. - self->finishedSendingVB = std::max(self->finishedSendingVB, req.batchIndex); - batchData->kvOpsPerLP.clear(); - } - } - - TraceEvent("FastRestoreLoaderPhaseSendMutationsDone", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("UseRangeFile", req.useRangeFile) - .detail("LoaderSendStatus", batchStatus->toString()); - req.reply.send(RestoreCommonReply(self->id(), isDuplicated)); - return Void(); -} - -void buildApplierRangeMap(KeyRangeMap* krMap, std::map* pRangeToApplier) { - std::map::iterator beginKey = pRangeToApplier->begin(); - std::map::iterator endKey = std::next(beginKey, 1); - while (endKey != pRangeToApplier->end()) { - krMap->insert(KeyRangeRef(beginKey->first, endKey->first), beginKey->second); - beginKey = endKey; - endKey++; - } - if (beginKey != pRangeToApplier->end()) { - krMap->insert(KeyRangeRef(beginKey->first, allKeys.end), beginKey->second); - } -} - -// Assume: kvOps data are from the same RestoreAsset. -// Input: pkvOps: versioned kv mutation for the asset in the version batch (batchIndex) -// isRangeFile: is pkvOps from range file? Let receiver (applier) know if the mutation is log mutation; -// pRangeToApplier: range to applierID mapping, deciding which applier is responsible for which range -// pApplierInterfaces: applier interfaces to send the mutations to -ACTOR Future sendMutationsToApplier( - std::priority_queue* sendLoadParamQueue, - std::map* inflightSendLoadParamReqs, - NotifiedVersion* finishedBatch, - VersionedMutationsMap* pkvOps, - int batchIndex, - RestoreAsset asset, - bool isRangeFile, - std::map* pRangeToApplier, - std::map* pApplierInterfaces, - Database cx) { - state VersionedMutationsMap& kvOps = *pkvOps; - state VersionedMutationsMap::iterator kvOp = kvOps.begin(); - state int kvCount = 0; - state int splitMutationIndex = 0; - state Version msgIndex = 1; // Monotonically increased index for send message, must start at 1 - state std::vector applierIDs = getApplierIDs(*pRangeToApplier); - state double msgSize = 0; // size of mutations in the message - state Arena arena; - - // Wait for scheduler to kick it off - Promise toSched; - sendLoadParamQueue->push(RestoreLoaderSchedSendLoadParamRequest(batchIndex, toSched, now())); - wait(toSched.getFuture()); - if (finishedBatch->get() >= batchIndex) { - TraceEvent(SevError, "FastRestoreLoaderSendMutationToApplierLateRequest") - .detail("FinishedBatchIndex", finishedBatch->get()) - .detail("RequestBatchIndex", batchIndex); - ASSERT(false); - return Void(); - } - - (*inflightSendLoadParamReqs)[batchIndex]++; - - TraceEvent("FastRestoreLoaderSendMutationToApplier") - .detail("IsRangeFile", isRangeFile) - .detail("EndVersion", asset.endVersion) - .detail("RestoreAsset", asset.toString()); - - // There should be no mutation at asset.endVersion version because it is exclusive - if (kvOps.lower_bound(LogMessageVersion(asset.endVersion)) != kvOps.end()) { - TraceEvent(SevError, "FastRestoreLoaderSendMutationToApplier") - .detail("BatchIndex", batchIndex) - .detail("RestoreAsset", asset.toString()) - .detail("IsRangeFile", isRangeFile) - .detail("Data loss at version", asset.endVersion); - } else { - // Ensure there is a mutation request sent at endVersion, so that applier can advance its notifiedVersion - kvOps[LogMessageVersion(asset.endVersion)] = MutationsVec(); // Empty mutation vector will be handled by applier - } - - splitMutationIndex = 0; - kvCount = 0; - - // applierVersionedMutationsBuffer is the mutation-and-its-version vector to be sent to each applier - state std::map applierVersionedMutationsBuffer; - state int mIndex = 0; - state LogMessageVersion commitVersion; - state std::vector> fSends; - for (auto& applierID : applierIDs) { - applierVersionedMutationsBuffer[applierID] = VersionedMutationsVec(); - } - state KeyRangeMap krMap; - buildApplierRangeMap(&krMap, pRangeToApplier); - for (kvOp = kvOps.begin(); kvOp != kvOps.end(); kvOp++) { - commitVersion = kvOp->first; - ASSERT(commitVersion.version >= asset.beginVersion); - ASSERT(commitVersion.version <= asset.endVersion); // endVersion is an empty commit to ensure progress - for (mIndex = 0; mIndex < kvOp->second.size(); mIndex++) { - state MutationRef kvm = kvOp->second[mIndex]; - if (kvm.isEncrypted()) { - MutationRef decryptedMutation = wait(_decryptMutation(kvm, cx, &arena)); - kvm = decryptedMutation; - } - // Send the mutation to applier - if (isRangeMutation(kvm)) { - MutationsVec mvector; - Standalone> nodeIDs; - // Because using a vector of mutations causes overhead, and the range mutation should happen rarely; - // We handle the range mutation and key mutation differently for the benefit of avoiding memory copy - splitMutation(krMap, kvm, mvector.arena(), mvector.contents(), nodeIDs.arena(), nodeIDs.contents()); - ASSERT(mvector.size() == nodeIDs.size()); - - if (MUTATION_TRACKING_ENABLED) { - TraceEvent&& e = debugMutation("RestoreLoaderDebugSplit", commitVersion.version, kvm); - if (e.isEnabled()) { - int i = 0; - for (auto& [key, uid] : *pRangeToApplier) { - e.detail(format("Range%d", i).c_str(), printable(key)) - .detail(format("UID%d", i).c_str(), uid.toString()); - i++; - } - } - } - for (splitMutationIndex = 0; splitMutationIndex < mvector.size(); splitMutationIndex++) { - MutationRef mutation = mvector[splitMutationIndex]; - UID applierID = nodeIDs[splitMutationIndex]; - DEBUG_MUTATION("RestoreLoaderSplitMutation", commitVersion.version, mutation) - .detail("CommitVersion", commitVersion.toString()); - // CAREFUL: The split mutations' lifetime is shorter than the for-loop - // Must use deep copy for split mutations - applierVersionedMutationsBuffer[applierID].push_back_deep( - applierVersionedMutationsBuffer[applierID].arena(), - VersionedMutationSerialized(mutation, commitVersion)); - msgSize += mutation.expectedSize(); - - kvCount++; - } - } else { // mutation operates on a particular key - std::map::iterator itlow = pRangeToApplier->upper_bound(kvm.param1); - --itlow; // make sure itlow->first <= m.param1 - ASSERT(itlow->first <= kvm.param1); - UID applierID = itlow->second; - kvCount++; - - DEBUG_MUTATION("RestoreLoaderSendMutation", commitVersion.version, kvm) - .detail("Applier", applierID) - .detail("SubVersion", commitVersion.toString()); - // kvm data is saved in pkvOps in batchData, so shallow copy is ok here. - applierVersionedMutationsBuffer[applierID].push_back(applierVersionedMutationsBuffer[applierID].arena(), - VersionedMutationSerialized(kvm, commitVersion)); - msgSize += kvm.expectedSize(); - } - - // Batch mutations at multiple versions up to FASTRESTORE_LOADER_SEND_MUTATION_MSG_BYTES size - // to improve bandwidth from a loader to appliers - if (msgSize >= SERVER_KNOBS->FASTRESTORE_LOADER_SEND_MUTATION_MSG_BYTES) { - std::vector> requests; - requests.reserve(applierIDs.size()); - for (const UID& applierID : applierIDs) { - requests.emplace_back( - applierID, - RestoreSendVersionedMutationsRequest( - batchIndex, asset, msgIndex, isRangeFile, applierVersionedMutationsBuffer[applierID])); - } - TraceEvent(SevInfo, "FastRestoreLoaderSendMutationToApplier") - .detail("MessageIndex", msgIndex) - .detail("RestoreAsset", asset.toString()) - .detail("Requests", requests.size()); - fSends.push_back(sendBatchRequests(&RestoreApplierInterface::sendMutationVector, - *pApplierInterfaces, - requests, - TaskPriority::RestoreLoaderSendMutations)); - msgIndex++; - msgSize = 0; - for (auto& applierID : applierIDs) { - applierVersionedMutationsBuffer[applierID] = VersionedMutationsVec(); - } - } - } // Mutations at the same LogMessageVersion - } // all versions of mutations in the same file - - // Send the remaining mutations in the applierMutationsBuffer - if (msgSize > 0) { - // TODO: Sanity check each asset has been received exactly once! - std::vector> requests; - requests.reserve(applierIDs.size()); - for (const UID& applierID : applierIDs) { - requests.emplace_back( - applierID, - RestoreSendVersionedMutationsRequest( - batchIndex, asset, msgIndex, isRangeFile, applierVersionedMutationsBuffer[applierID])); - } - TraceEvent(SevInfo, "FastRestoreLoaderSendMutationToApplier") - .detail("MessageIndex", msgIndex) - .detail("RestoreAsset", asset.toString()) - .detail("Requests", requests.size()); - fSends.push_back(sendBatchRequests(&RestoreApplierInterface::sendMutationVector, - *pApplierInterfaces, - requests, - TaskPriority::RestoreLoaderSendMutations)); - } - wait(waitForAll(fSends)); - - (*inflightSendLoadParamReqs)[batchIndex]--; - - if (finishedBatch->get() < batchIndex) { - kvOps = VersionedMutationsMap(); // Free memory for parsed mutations at the restore asset. - TraceEvent("FastRestoreLoaderSendMutationToApplierDone") - .detail("BatchIndex", batchIndex) - .detail("RestoreAsset", asset.toString()) - .detail("Mutations", kvCount); - } else { - TraceEvent(SevWarnAlways, "FastRestoreLoaderSendMutationToApplierDoneTooLate") - .detail("BatchIndex", batchIndex) - .detail("FinishedBatchIndex", finishedBatch->get()) - .detail("RestoreAsset", asset.toString()) - .detail("Mutations", kvCount); - } - - return Void(); -} - -// Splits a clear range mutation for Appliers and puts results of split mutations and -// Applier IDs into "mvector" and "nodeIDs" on return. -void splitMutation(const KeyRangeMap& krMap, - MutationRef m, - Arena& mvector_arena, - VectorRef& mvector, - Arena& nodeIDs_arena, - VectorRef& nodeIDs) { - TraceEvent(SevVerbose, "FastRestoreSplitMutation").detail("Mutation", m); - ASSERT(mvector.empty()); - ASSERT(nodeIDs.empty()); - auto r = krMap.intersectingRanges(KeyRangeRef(m.param1, m.param2)); - for (auto i = r.begin(); i != r.end(); ++i) { - // Calculate the overlap range - KeyRef rangeBegin = m.param1 > i->range().begin ? m.param1 : i->range().begin; - KeyRef rangeEnd = m.param2 < i->range().end ? m.param2 : i->range().end; - KeyRange krange1(KeyRangeRef(rangeBegin, rangeEnd)); - mvector.push_back_deep(mvector_arena, MutationRef(MutationRef::ClearRange, rangeBegin, rangeEnd)); - nodeIDs.push_back(nodeIDs_arena, i->cvalue()); - } -} - -// key_input format: -// [logRangeMutation.first][hash_value_of_commit_version:1B][bigEndian64(commitVersion)][bigEndian32(part)] -// value_input: serialized binary of mutations at the same version -bool concatenateBackupMutationForLogFile(SerializedMutationListMap* pMutationMap, - Standalone key_input, - Standalone val_input, - const RestoreAsset& asset) { - SerializedMutationListMap& mutationMap = *pMutationMap; - const int key_prefix_len = sizeof(uint8_t) + sizeof(Version) + sizeof(uint32_t); - - StringRefReader readerKey(key_input, restore_corrupted_data()); // read key_input! - int logRangeMutationFirstLength = key_input.size() - key_prefix_len; - bool concatenated = false; - - ASSERT_WE_THINK(key_input.size() >= key_prefix_len); - - if (logRangeMutationFirstLength > 0) { - // Strip out the [logRangeMutation.first]; otherwise, the following readerKey.consume will produce wrong value - readerKey.consume(logRangeMutationFirstLength); - } - - readerKey.consume(); // uint8_t hashValue = readerKey.consume() - Version commitVersion = readerKey.consumeNetworkUInt64(); - // Skip mutations not in [asset.beginVersion, asset.endVersion), which is what we are only processing right now - if (!asset.isInVersionRange(commitVersion)) { - return false; - } - - uint32_t part = readerKey.consumeNetworkUInt32(); - // Use commitVersion as id - Standalone id = StringRef((uint8_t*)&commitVersion, sizeof(Version)); - - auto it = mutationMap.find(id); - if (it == mutationMap.end()) { - mutationMap.emplace(id, std::make_pair(val_input, 0)); - if (part != 0) { - TraceEvent(SevError, "FastRestoreLoader") - .detail("FirstPartNotZero", part) - .detail("KeyInput", getHexString(key_input)); - } - } else { // Concatenate the val string with the same commitVersion - it->second.first = - it->second.first.contents().withSuffix(val_input.contents()); // Assign the new Areana to the map's value - auto& currentPart = it->second.second; - if (part != (currentPart + 1)) { - // Check if the same range or log file has been processed more than once! - TraceEvent(SevError, "FastRestoreLoader") - .detail("CurrentPart1", currentPart) - .detail("CurrentPart2", part) - .detail("KeyInput", getHexString(key_input)) - .detail("Hint", "Check if the same range or log file has been processed more than once"); - } - currentPart = part; - concatenated = true; - } - - return concatenated; -} - -// Parse the kv pair (version, serialized_mutation), which are the results parsed from log file, into -// (version, ) pair; -// Put the parsed versioned mutations into *pkvOps. -// -// Input key: [commitVersion_of_the_mutation_batch:uint64_t]; -// Input value: [includeVersion:uint64_t][val_length:uint32_t][encoded_list_of_mutations], where -// includeVersion is the serialized version in the batch commit. It is not the commitVersion in Input key. -// -// val_length is always equal to (val.size() - 12); otherwise, -// we may not get the entire mutation list for the version encoded_list_of_mutations: -// [mutation1][mutation2]...[mutationk], where -// a mutation is encoded as [type:uint32_t][keyLength:uint32_t][valueLength:uint32_t][keyContent][valueContent] -ACTOR Future _parseSerializedMutation(KeyRangeMap* pRangeVersions, - std::map::iterator kvOpsIter, - SerializedMutationListMap* pmutationMap, - std::map::iterator samplesIter, - LoaderCounters* cc, - RestoreAsset asset, - Database cx) { - state VersionedMutationsMap* kvOps = &kvOpsIter->second; - state SampledMutationsVec* samples = &samplesIter->second; - state SerializedMutationListMap::iterator mutationMapIterator = pmutationMap->begin(); - - TraceEvent(SevFRMutationInfo, "FastRestoreLoaderParseSerializedLogMutation") - .detail("BatchIndex", asset.batchIndex) - .detail("RestoreAsset", asset.toString()); - - state Arena tempArena; - loop { - if (mutationMapIterator == pmutationMap->end()) { - break; - } - StringRef k = mutationMapIterator->first.contents(); - state StringRef val = mutationMapIterator->second.first.contents(); - - StringRefReader kReader(k, restore_corrupted_data()); - state uint64_t commitVersion = kReader.consume(); // Consume little Endian data - // We have already filter the commit not in [beginVersion, endVersion) when we concatenate kv pair in log file - ASSERT_WE_THINK(asset.isInVersionRange(commitVersion)); - - state StringRefReader vReader(val, restore_corrupted_data()); - vReader.consume(); // Consume the includeVersion - // TODO(xumengpanda): verify the protocol version is compatible and raise error if needed - - // Parse little endian value, confirmed it is correct! - uint32_t val_length_decoded = vReader.consume(); - ASSERT(val_length_decoded == val.size() - sizeof(uint64_t) - sizeof(uint32_t)); - - state int sub = 0; - loop { - // stop when reach the end of the string - if (vReader.eof()) { //|| *reader.rptr == 0xFF - break; - } - - state uint32_t type = vReader.consume(); - state uint32_t kLen = vReader.consume(); - state uint32_t vLen = vReader.consume(); - state const uint8_t* k = vReader.consume(kLen); - state const uint8_t* v = vReader.consume(vLen); - - state MutationRef mutation((MutationRef::Type)type, KeyRef(k, kLen), KeyRef(v, vLen)); - if (mutation.isEncrypted()) { - MutationRef decryptedMutation = wait(_decryptMutation(mutation, cx, &tempArena)); - mutation = decryptedMutation; - } - // Should this mutation be skipped? - // Skip mutation whose commitVesion < range kv's version - if (logMutationTooOld(pRangeVersions, mutation, commitVersion)) { - cc->oldLogMutations += 1; - } else { - if (mutation.param1 >= asset.range.end || - (isRangeMutation(mutation) && mutation.param2 < asset.range.begin) || - (!isRangeMutation(mutation) && mutation.param1 < asset.range.begin)) { - } else { - // Only apply mutation within the asset.range and apply removePrefix and addPrefix - ASSERT(asset.removePrefix.size() == 0); - if (isRangeMutation(mutation)) { - mutation.param1 = mutation.param1 >= asset.range.begin ? mutation.param1 : asset.range.begin; - mutation.param2 = mutation.param2 < asset.range.end ? mutation.param2 : asset.range.end; - // Remove prefix or add prefix if we restore data to a new key space - if (asset.hasPrefix()) { // Avoid creating new Key - mutation.param1 = - mutation.param1.removePrefix(asset.removePrefix).withPrefix(asset.addPrefix, tempArena); - mutation.param2 = - mutation.param2.removePrefix(asset.removePrefix).withPrefix(asset.addPrefix, tempArena); - } - } else { - if (asset.hasPrefix()) { // Avoid creating new Key - mutation.param1 = - mutation.param1.removePrefix(asset.removePrefix).withPrefix(asset.addPrefix, tempArena); - } - } - - cc->loadedLogBytes += mutation.totalSize(); - - TraceEvent(SevFRMutationInfo, "FastRestoreDecodeLogFile") - .detail("CommitVersion", commitVersion) - .detail("ParsedMutation", mutation.toString()); - - auto it = kvOps->insert(std::make_pair(LogMessageVersion(commitVersion, sub++), MutationsVec())); - ASSERT(it.second); // inserted is true - ASSERT(sub < - std::numeric_limits::max()); // range file mutation uses int32_max as subversion - it.first->second.push_back_deep(it.first->second.arena(), mutation); - - // Sampling data similar to how SS sample bytes - ByteSampleInfo sampleInfo = isKeyValueInSample(KeyValueRef(mutation.param1, mutation.param2)); - if (sampleInfo.inSample) { - cc->sampledLogBytes += sampleInfo.sampledSize; - samples->push_back_deep(samples->arena(), - SampledMutation(mutation.param1, sampleInfo.sampledSize)); - } - ASSERT_WE_THINK(kLen >= 0 && kLen < val.size()); - ASSERT_WE_THINK(vLen >= 0 && vLen < val.size()); - } - } - } - mutationMapIterator++; - } - return Void(); -} - -// Parsing the data blocks in a range file -// kvOpsIter: saves the parsed versioned-mutations for the specific LoadingParam; -// samplesIter: saves the sampled mutations from the parsed versioned-mutations; -// bc: backup container to read the backup file -// version: the version the parsed mutations should be at -// asset: RestoreAsset about which backup data should be parsed -ACTOR static Future _parseRangeFileToMutationsOnLoader( - std::map::iterator kvOpsIter, - std::map::iterator samplesIter, - LoaderCounters* cc, - Reference bc, - Version version, - RestoreAsset asset, - Database cx) { - state VersionedMutationsMap& kvOps = kvOpsIter->second; - state SampledMutationsVec& sampleMutations = samplesIter->second; - - TraceEvent(SevFRDebugInfo, "FastRestoreDecodedRangeFile") - .detail("BatchIndex", asset.batchIndex) - .detail("Filename", asset.filename) - .detail("Version", version) - .detail("BeginVersion", asset.beginVersion) - .detail("EndVersion", asset.endVersion) - .detail("RestoreAsset", asset.toString()); - // Sanity check the range file is within the restored version range - ASSERT_WE_THINK(asset.isInVersionRange(version)); - - state Standalone> blockData; - // should retry here - state int readFileRetries = 0; - loop { - try { - // The set of key value version is rangeFile.version. the key-value set in the same range file has the same - // version - Reference inFile = wait(bc->readFile(asset.filename)); - Standalone> kvs = - wait(fileBackup::decodeRangeFileBlock(inFile, asset.offset, asset.len, cx)); - TraceEvent("FastRestoreLoaderDecodedRangeFile") - .detail("BatchIndex", asset.batchIndex) - .detail("Filename", asset.filename) - .detail("DataSize", kvs.contents().size()); - blockData = kvs; - break; - } catch (Error& e) { - if (e.code() == error_code_restore_bad_read || e.code() == error_code_restore_unsupported_file_version || - e.code() == error_code_restore_corrupted_data_padding) { // no retriable error - TraceEvent(SevError, "FastRestoreFileRestoreCorruptedRangeFileBlock").error(e); - throw; - } else if (e.code() == error_code_http_request_failed || e.code() == error_code_connection_failed || - e.code() == error_code_timed_out || e.code() == error_code_lookup_failed) { - // blob http request failure, retry - TraceEvent(SevWarnAlways, "FastRestoreDecodedRangeFileConnectionFailure") - .error(e) - .detail("Retries", ++readFileRetries); - wait(delayJittered(0.1)); - } else { - TraceEvent(SevError, "FastRestoreParseRangeFileOnLoaderUnexpectedError").error(e); - throw; - } - } - } - - // First and last key are the range for this file - KeyRange fileRange = KeyRangeRef(blockData.front().key, blockData.back().key); - - // If fileRange doesn't intersect restore range then we're done. - if (!fileRange.intersects(asset.range)) { - return Void(); - } - - // We know the file range intersects the restore range but there could still be keys outside the restore range. - // Find the subvector of kv pairs that intersect the restore range. - // Note that the first and last keys are just the range endpoints for this file. - // They are metadata, not the real data. - int rangeStart = 1; - int rangeEnd = blockData.size() - 1; // The rangeStart and rangeEnd is [,) - - // Slide start from beginning, stop if something in range is found - // Move rangeStart and rangeEnd until they is within restoreRange - while (rangeStart < rangeEnd && !asset.range.contains(blockData[rangeStart].key)) { - ++rangeStart; - } - // Side end from back, stop if something at (rangeEnd-1) is found in range - while (rangeEnd > rangeStart && !asset.range.contains(blockData[rangeEnd - 1].key)) { - --rangeEnd; - } - - // Now data only contains the kv mutation within restoreRange - VectorRef data = blockData.slice(rangeStart, rangeEnd); - - // Note we give INT_MAX as the sub sequence number to override any log mutations. - const LogMessageVersion msgVersion(version, std::numeric_limits::max()); - - // Convert KV in data into SET mutations of different keys in kvOps - Arena tempArena; - for (const KeyValueRef& kv : data) { - // NOTE: The KV pairs in range files are the real KV pairs in original DB. - MutationRef m(MutationRef::Type::SetValue, kv.key, kv.value); - // Remove prefix or add prefix in case we restore data to a different sub keyspace - if (asset.hasPrefix()) { // Avoid creating new Key - ASSERT(asset.removePrefix.size() == 0); - m.param1 = m.param1.removePrefix(asset.removePrefix).withPrefix(asset.addPrefix, tempArena); - } - - cc->loadedRangeBytes += m.totalSize(); - - // We cache all kv operations into kvOps, and apply all kv operations later in one place - auto it = kvOps.insert(std::make_pair(msgVersion, MutationsVec())); - TraceEvent(SevFRMutationInfo, "FastRestoreDecodeRangeFile") - .detail("BatchIndex", asset.batchIndex) - .detail("CommitVersion", version) - .detail("ParsedMutationKV", m.toString()); - - it.first->second.push_back_deep(it.first->second.arena(), m); - // Sampling (FASTRESTORE_SAMPLING_PERCENT%) data - ByteSampleInfo sampleInfo = isKeyValueInSample(KeyValueRef(m.param1, m.param2)); - if (sampleInfo.inSample) { - cc->sampledRangeBytes += sampleInfo.sampledSize; - sampleMutations.push_back_deep(sampleMutations.arena(), SampledMutation(m.param1, sampleInfo.sampledSize)); - } - } - - return Void(); -} - -// Parse data blocks in a log file into a vector of pairs. -// Each pair.second contains the mutations at a version encoded in pair.first; -// Step 1: decodeLogFileBlock into pairs; -// Step 2: Concatenate the second of pairs with the same pair.first. -// pProcessedFileOffset: ensure each data block is processed in order exactly once; -// pMutationMap: concatenated mutation list string at the mutation's commit version -ACTOR static Future _parseLogFileToMutationsOnLoader(NotifiedVersion* pProcessedFileOffset, - SerializedMutationListMap* pMutationMap, - Reference bc, - RestoreAsset asset) { - Reference inFile = wait(bc->readFile(asset.filename)); - // decodeLogFileBlock() must read block by block! - state Standalone> data = - wait(parallelFileRestore::decodeLogFileBlock(inFile, asset.offset, asset.len)); - TraceEvent("FastRestoreLoaderDecodeLogFile") - .detail("BatchIndex", asset.batchIndex) - .detail("RestoreAsset", asset.toString()) - .detail("DataSize", data.contents().size()); - - // Ensure data blocks in the same file are processed in order - wait(pProcessedFileOffset->whenAtLeast(asset.offset)); - - if (pProcessedFileOffset->get() == asset.offset) { - for (const KeyValueRef& kv : data) { - // Concatenate the backup param1 and param2 (KV) at the same version. - concatenateBackupMutationForLogFile(pMutationMap, kv.key, kv.value, asset); - } - pProcessedFileOffset->set(asset.offset + asset.len); - } - - return Void(); -} - -// retry on _parseLogFileToMutationsOnLoader -ACTOR static Future parseLogFileToMutationsOnLoader(NotifiedVersion* pProcessedFileOffset, - SerializedMutationListMap* pMutationMap, - Reference bc, - RestoreAsset asset) { - state int readFileRetries = 0; - loop { - try { - wait(_parseLogFileToMutationsOnLoader(pProcessedFileOffset, pMutationMap, bc, asset)); - break; - } catch (Error& e) { - if (e.code() == error_code_restore_bad_read || e.code() == error_code_restore_unsupported_file_version || - e.code() == error_code_restore_corrupted_data_padding) { // non retriable error - TraceEvent(SevError, "FastRestoreFileRestoreCorruptedLogFileBlock").error(e); - throw; - } else if (e.code() == error_code_http_request_failed || e.code() == error_code_connection_failed || - e.code() == error_code_timed_out || e.code() == error_code_lookup_failed) { - // blob http request failure, retry - TraceEvent(SevWarnAlways, "FastRestoreDecodedLogFileConnectionFailure") - .error(e) - .detail("Retries", ++readFileRetries); - wait(delayJittered(0.1)); - } else { - TraceEvent(SevError, "FastRestoreParseLogFileToMutationsOnLoaderUnexpectedError").error(e); - throw; - } - } - } - return Void(); -} - -// Return applier IDs that are used to apply key-values -std::vector getApplierIDs(std::map& rangeToApplier) { - std::vector applierIDs; - applierIDs.reserve(rangeToApplier.size()); - for (auto& applier : rangeToApplier) { - applierIDs.push_back(applier.second); - } - - ASSERT(!applierIDs.empty()); - return applierIDs; -} - -// Notify loaders that the version batch (index) has been applied. -// This affects which version batch each loader can release actors even when the worker has low memory -ACTOR Future handleFinishVersionBatchRequest(RestoreVersionBatchRequest req, Reference self) { - // Ensure batch (i-1) is applied before batch i - TraceEvent("FastRestoreLoaderHandleFinishVersionBatch", self->id()) - .detail("FinishedBatchIndex", self->finishedBatch.get()) - .detail("RequestedBatchIndex", req.batchIndex); - wait(self->finishedBatch.whenAtLeast(req.batchIndex - 1)); - if (self->finishedBatch.get() == req.batchIndex - 1) { - // Sanity check: All requests before and in this batchIndex must have been processed; otherwise, - // those requests may cause segmentation fault after applier remove the batch data - while (!self->loadingQueue.empty() && self->loadingQueue.top().batchIndex <= req.batchIndex) { - // Still has pending requests from earlier batchIndex and current batchIndex, which should not happen - TraceEvent(SevWarn, "FastRestoreLoaderHasPendingLoadFileRequests") - .detail("PendingRequest", self->loadingQueue.top().toString()); - self->loadingQueue.pop(); - } - while (!self->sendingQueue.empty() && self->sendingQueue.top().batchIndex <= req.batchIndex) { - TraceEvent(SevWarn, "FastRestoreLoaderHasPendingSendRequests") - .detail("PendingRequest", self->sendingQueue.top().toString()); - self->sendingQueue.pop(); - } - while (!self->sendLoadParamQueue.empty() && self->sendLoadParamQueue.top().batchIndex <= req.batchIndex) { - TraceEvent(SevWarn, "FastRestoreLoaderHasPendingSendLoadParamRequests") - .detail("PendingRequest", self->sendLoadParamQueue.top().toString()); - self->sendLoadParamQueue.pop(); - } - - self->finishedBatch.set(req.batchIndex); - // Clean up batchData - self->batch.erase(req.batchIndex); - self->status.erase(req.batchIndex); - } - if (self->delayedActors > 0) { - self->checkMemory.trigger(); - } - req.reply.send(RestoreCommonReply(self->id(), false)); - return Void(); -} - -namespace { - -void oldSplitMutation(std::map* pRangeToApplier, - MutationRef m, - Arena& mvector_arena, - VectorRef& mvector, - Arena& nodeIDs_arena, - VectorRef& nodeIDs) { - // mvector[i] should be mapped to nodeID[i] - ASSERT(mvector.empty()); - ASSERT(nodeIDs.empty()); - // key range [m->param1, m->param2) - std::map::iterator itlow, itup; // we will return [itlow, itup) - itlow = pRangeToApplier->lower_bound(m.param1); // lower_bound returns the iterator that is >= m.param1 - if (itlow == pRangeToApplier->end()) { - --itlow; - mvector.push_back_deep(mvector_arena, m); - nodeIDs.push_back(nodeIDs_arena, itlow->second); - return; - } - if (itlow->first > m.param1) { - if (itlow != pRangeToApplier->begin()) { - --itlow; - } - } - - itup = pRangeToApplier->upper_bound(m.param2); // return rmap::end if no key is after m.param2. - ASSERT(itup == pRangeToApplier->end() || itup->first > m.param2); - - std::map::iterator itApplier; - while (itlow != itup) { - Standalone curm; // current mutation - curm.type = m.type; - // The first split mutation should starts with m.first. - // The later ones should start with the rangeToApplier boundary. - if (m.param1 > itlow->first) { - curm.param1 = m.param1; - } else { - curm.param1 = itlow->first; - } - itApplier = itlow; - itlow++; - if (itlow == itup) { - ASSERT(m.param2 <= allKeys.end); - curm.param2 = m.param2; - } else if (m.param2 < itlow->first) { - UNREACHABLE(); - curm.param2 = m.param2; - } else { - curm.param2 = itlow->first; - } - ASSERT(curm.param1 <= curm.param2); - // itup > m.param2: (itup-1) may be out of mutation m's range - // Ensure the added mutations have overlap with mutation m - if (m.param1 < curm.param2 && m.param2 > curm.param1) { - mvector.push_back_deep(mvector_arena, curm); - nodeIDs.push_back(nodeIDs_arena, itApplier->second); - } - } -} - -// Test splitMutation -TEST_CASE("/FastRestore/RestoreLoader/splitMutation") { - std::map rangeToApplier; - MutationsVec mvector; - Standalone> nodeIDs; - - // Prepare RangeToApplier - rangeToApplier.emplace(allKeys.begin, deterministicRandom()->randomUniqueID()); - int numAppliers = deterministicRandom()->randomInt(1, 50); - for (int i = 0; i < numAppliers; ++i) { - Key k = Key(deterministicRandom()->randomAlphaNumeric(deterministicRandom()->randomInt(1, 1000))); - UID node = deterministicRandom()->randomUniqueID(); - rangeToApplier.emplace(k, node); - TraceEvent("RangeToApplier").detail("Key", k).detail("Node", node); - } - Key k1 = Key(deterministicRandom()->randomAlphaNumeric(deterministicRandom()->randomInt(1, 500))); - Key k2 = Key(deterministicRandom()->randomAlphaNumeric(deterministicRandom()->randomInt(1, 1000))); - Key beginK = k1 < k2 ? k1 : k2; - Key endK = k1 < k2 ? k2 : k1; - Standalone mutation(MutationRef(MutationRef::ClearRange, beginK.contents(), endK.contents())); - - // Method 1: Use old splitMutation - oldSplitMutation( - &rangeToApplier, mutation, mvector.arena(), mvector.contents(), nodeIDs.arena(), nodeIDs.contents()); - ASSERT(mvector.size() == nodeIDs.size()); - - // Method 2: Use new intersection based method - KeyRangeMap krMap; - buildApplierRangeMap(&krMap, &rangeToApplier); - - MutationsVec mvector2; - Standalone> nodeIDs2; - splitMutation(krMap, mutation, mvector2.arena(), mvector2.contents(), nodeIDs2.arena(), nodeIDs2.contents()); - ASSERT(mvector2.size() == nodeIDs2.size()); - - ASSERT(mvector.size() == mvector2.size()); - int splitMutationIndex = 0; - for (; splitMutationIndex < mvector.size(); splitMutationIndex++) { - MutationRef result = mvector[splitMutationIndex]; - MutationRef result2 = mvector2[splitMutationIndex]; - UID applierID = nodeIDs[splitMutationIndex]; - UID applierID2 = nodeIDs2[splitMutationIndex]; - KeyRange krange(KeyRangeRef(result.param1, result.param2)); - KeyRange krange2(KeyRangeRef(result2.param1, result2.param2)); - TraceEvent("Result") - .detail("KeyRange1", krange) - .detail("KeyRange2", krange2) - .detail("ApplierID1", applierID) - .detail("ApplierID2", applierID2); - if (krange != krange2 || applierID != applierID2) { - TraceEvent(SevError, "IncorrectResult") - .detail("Mutation", mutation) - .detail("KeyRange1", krange) - .detail("KeyRange2", krange2) - .detail("ApplierID1", applierID) - .detail("ApplierID2", applierID2); - } - } - - return Void(); -} - -} // namespace diff --git a/fdbserver/RestoreRoleCommon.actor.cpp b/fdbserver/RestoreRoleCommon.actor.cpp deleted file mode 100644 index b8de8f4689e..00000000000 --- a/fdbserver/RestoreRoleCommon.actor.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/* - * RestoreRoleCommon.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fdbclient/NativeAPI.actor.h" -#include "fdbclient/MutationList.h" -#include "fdbclient/ReadYourWrites.h" -#include "fdbclient/RunRYWTransaction.actor.h" - -#include "fdbserver/RestoreUtil.h" -#include "fdbserver/RestoreRoleCommon.actor.h" -#include "fdbserver/RestoreLoader.actor.h" -#include "fdbserver/RestoreApplier.actor.h" -#include "fdbserver/RestoreController.actor.h" - -#include "flow/actorcompiler.h" // This must be the last #include. - -class Database; -struct RestoreWorkerData; - -// id is the id of the worker to be monitored -// This actor is used for both restore loader and restore applier -ACTOR Future handleHeartbeat(RestoreSimpleRequest req, UID id) { - wait(delayJittered(5.0)); // Random jitter reduces heat beat monitor's pressure - req.reply.send(RestoreCommonReply(id)); - return Void(); -} - -void handleFinishRestoreRequest(const RestoreFinishRequest& req, Reference self) { - self->resetPerRestoreRequest(); - TraceEvent("FastRestoreRolePhaseFinishRestoreRequest", self->id()) - .detail("FinishRestoreRequest", req.terminate) - .detail("Role", getRoleStr(self->role)); - - req.reply.send(RestoreCommonReply(self->id())); -} - -// Multiple version batches may execute in parallel and init their version batches -ACTOR Future handleInitVersionBatchRequest(RestoreVersionBatchRequest req, Reference self) { - TraceEvent("FastRestoreRolePhaseInitVersionBatch", self->id()) - .detail("BatchIndex", req.batchIndex) - .detail("Role", getRoleStr(self->role)) - .detail("VersionBatchNotifiedVersion", self->versionBatchId.get()); - // Loader destroy batchData once the batch finishes and self->finishedBatch.set(req.batchIndex); - ASSERT(self->finishedBatch.get() < req.batchIndex); - - // batchId is continuous. (req.batchIndex-1) is the id of the just finished batch. - wait(self->versionBatchId.whenAtLeast(req.batchIndex - 1)); - - if (self->versionBatchId.get() == req.batchIndex - 1) { - self->initVersionBatch(req.batchIndex); - self->setVersionBatchState(req.batchIndex, ApplierVersionBatchState::INIT); - TraceEvent("FastRestoreInitVersionBatch") - .detail("BatchIndex", req.batchIndex) - .detail("Role", getRoleStr(self->role)) - .detail("Node", self->id()); - self->versionBatchId.set(req.batchIndex); - } - - req.reply.send(RestoreCommonReply(self->id())); - return Void(); -} - -void updateProcessStats(Reference self) { - if (g_network->isSimulated()) { - // memUsage and cpuUsage are not relevant in the simulator, - // and relying on the actual values could break seed determinism - if (deterministicRandom()->random01() < 0.2) { // not fully utilized cpu - self->cpuUsage = deterministicRandom()->random01() * SERVER_KNOBS->FASTRESTORE_SCHED_TARGET_CPU_PERCENT; - } else if (deterministicRandom()->random01() < 0.6) { // achieved target cpu but cpu is not busy - self->cpuUsage = SERVER_KNOBS->FASTRESTORE_SCHED_TARGET_CPU_PERCENT + - deterministicRandom()->random01() * (SERVER_KNOBS->FASTRESTORE_SCHED_MAX_CPU_PERCENT - - SERVER_KNOBS->FASTRESTORE_SCHED_TARGET_CPU_PERCENT); - } else { // reach desired max cpu usage; use max cpu as 200 to simulate incorrect cpu profiling - self->cpuUsage = - SERVER_KNOBS->FASTRESTORE_SCHED_MAX_CPU_PERCENT + - deterministicRandom()->random01() * (200 - SERVER_KNOBS->FASTRESTORE_SCHED_MAX_CPU_PERCENT); - } - self->memory = 100.0; - self->residentMemory = 100.0; - return; - } - - SystemStatistics sysStats = getSystemStatistics(); - if (sysStats.initialized) { - self->cpuUsage = 100 * sysStats.processCPUSeconds / sysStats.elapsed; - self->memory = sysStats.processMemory; - self->residentMemory = sysStats.processResidentMemory; - } -} - -// An actor is schedulable to run if the current worker has enough resources, i.e., -// the worker's memory usage is below the threshold; -// Exception: If the actor is working on the current version batch, we have to schedule -// the actor to run to avoid dead-lock. -// Future: When we release the actors that are blocked by memory usage, we should release them -// in increasing order of their version batch. -ACTOR Future isSchedulable(Reference self, int actorBatchIndex, std::string name) { - self->delayedActors++; - state double memoryThresholdBytes = SERVER_KNOBS->FASTRESTORE_MEMORY_THRESHOLD_MB_SOFT * 1024 * 1024; - loop { - double memory = getSystemStatistics().processMemory; - if (g_network->isSimulated() && BUGGIFY) { - // Intentionally randomly block actors for low memory reason. - // memory will be larger than threshold when deterministicRandom()->random01() > 1/2 - if (deterministicRandom()->random01() < 0.4) { // enough memory - memory = SERVER_KNOBS->FASTRESTORE_MEMORY_THRESHOLD_MB_SOFT * deterministicRandom()->random01(); - } else { // used too much memory, needs throttling - memory = SERVER_KNOBS->FASTRESTORE_MEMORY_THRESHOLD_MB_SOFT + - deterministicRandom()->random01() * SERVER_KNOBS->FASTRESTORE_MEMORY_THRESHOLD_MB_SOFT; - } - } - if (memory < memoryThresholdBytes || self->finishedBatch.get() + 1 == actorBatchIndex) { - if (memory >= memoryThresholdBytes) { - TraceEvent(SevWarn, "FastRestoreMemoryUsageAboveThreshold", self->id()) - .suppressFor(5.0) - .detail("Role", getRoleStr(self->role)) - .detail("BatchIndex", actorBatchIndex) - .detail("FinishedBatch", self->finishedBatch.get()) - .detail("Actor", name) - .detail("Memory", memory); - } - self->delayedActors--; - break; - } else { - TraceEvent(SevInfo, "FastRestoreMemoryUsageAboveThresholdWait", self->id()) - .suppressFor(5.0) - .detail("Role", getRoleStr(self->role)) - .detail("BatchIndex", actorBatchIndex) - .detail("Actor", name) - .detail("CurrentMemory", memory); - // TODO: Set FASTRESTORE_WAIT_FOR_MEMORY_LATENCY to a large value. It should be able to avoided - wait(delay(SERVER_KNOBS->FASTRESTORE_WAIT_FOR_MEMORY_LATENCY) || self->checkMemory.onTrigger()); - } - } - return Void(); -} - -// Updated process metrics will be used by scheduler for throttling as well -ACTOR Future updateProcessMetrics(Reference self) { - loop { - updateProcessStats(self); - wait(delay(SERVER_KNOBS->FASTRESTORE_UPDATE_PROCESS_STATS_INTERVAL)); - } -} - -ACTOR Future traceProcessMetrics(Reference self, std::string role) { - loop { - TraceEvent("FastRestoreTraceProcessMetrics", self->nodeID) - .detail("Role", role) - .detail("PipelinedMaxVersionBatchIndex", self->versionBatchId.get()) - .detail("FinishedVersionBatchIndex", self->finishedBatch.get()) - .detail("CurrentVersionBatchPhase", self->getVersionBatchState(self->finishedBatch.get() + 1)) - .detail("CpuUsage", self->cpuUsage) - .detail("UsedMemory", self->memory) - .detail("ResidentMemory", self->residentMemory); - wait(delay(SERVER_KNOBS->FASTRESTORE_ROLE_LOGGING_DELAY)); - } -} - -ACTOR Future traceRoleVersionBatchProgress(Reference self, std::string role) { - loop { - int batchIndex = self->finishedBatch.get(); - int maxBatchIndex = self->versionBatchId.get(); - int maxPrintBatchIndex = batchIndex + SERVER_KNOBS->FASTRESTORE_VB_PARALLELISM; - - TraceEvent ev("FastRestoreVersionBatchProgressState", self->nodeID); - ev.detail("Role", role) - .detail("Node", self->nodeID) - .detail("FinishedBatch", batchIndex) - .detail("InitializedBatch", maxBatchIndex); - while (batchIndex <= maxBatchIndex) { - if (batchIndex > maxPrintBatchIndex) { - ev.detail("SkipVersionBatches", maxBatchIndex - batchIndex + 1); - break; - } - std::stringstream typeName; - typeName << "VersionBatch" << batchIndex; - ev.detail(typeName.str(), self->getVersionBatchState(batchIndex)); - batchIndex++; - } - - wait(delay(SERVER_KNOBS->FASTRESTORE_ROLE_LOGGING_DELAY)); - } -} - -//-------Helper functions -std::string getHexString(StringRef input) { - std::stringstream ss; - for (int i = 0; i < input.size(); i++) { - if (i % 4 == 0) - ss << " "; - if (i == 12) { // The end of 12bytes, which is the version size for value - ss << "|"; - } - if (i == (12 + 12)) { // The end of version + header - ss << "@"; - } - ss << std::setfill('0') << std::setw(2) << std::hex - << (int)input[i]; // [] operator moves the pointer in step of unit8 - } - return ss.str(); -} diff --git a/fdbserver/RestoreUtil.actor.cpp b/fdbserver/RestoreUtil.actor.cpp deleted file mode 100644 index 8e3ed005a8b..00000000000 --- a/fdbserver/RestoreUtil.actor.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/* - * RestoreUtil.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This file implements the functions defined in RestoreUtil.h - -#include "fdbserver/RestoreUtil.h" - -#include "flow/actorcompiler.h" // This must be the last #include. - -const std::vector RestoreRoleStr = { "Invalid", "Controller", "Loader", "Applier" }; -int numRoles = RestoreRoleStr.size(); - -// Similar to debugMutation(), we use debugFRMutation to track mutations for fast restore systems only. -#if CENABLED(0, NOT_IN_CLEAN) -StringRef debugFRKey = "\xff\xff\xff\xff"_sr; - -// Track any mutation in fast restore that has overlap with debugFRKey -bool debugFRMutation(const char* context, Version version, MutationRef const& mutation) { - if (mutation.type != mutation.ClearRange && mutation.param1 == debugFRKey) { // Single key mutation - TraceEvent("FastRestoreMutationTracking") - .detail("At", context) - .detail("Version", version) - .detail("MutationType", getTypeString((MutationRef::Type)mutation.type)) - .detail("Key", mutation.param1) - .detail("Value", mutation.param2); - } else if (mutation.type == mutation.ClearRange && debugFRKey >= mutation.param1 && - debugFRKey < mutation.param2) { // debugFRKey is in the range mutation - TraceEvent("FastRestoreMutationTracking") - .detail("At", context) - .detail("Version", version) - .detail("MutationType", getTypeString((MutationRef::Type)mutation.type)) - .detail("Begin", mutation.param1) - .detail("End", mutation.param2); - } else - return false; - - return true; -} -#else -// Default implementation. -bool debugFRMutation(const char* context, Version version, MutationRef const& mutation) { - return false; -} -#endif - -std::string getRoleStr(RestoreRole role) { - if ((int)role >= numRoles || (int)role < 0) { - printf("[ERROR] role:%d is out of scope\n", (int)role); - return "[Unset]"; - } - return RestoreRoleStr[(int)role]; -} - -bool isRangeMutation(MutationRef m) { - if (m.type == MutationRef::Type::ClearRange) { - ASSERT(m.type != MutationRef::Type::DebugKeyRange); - return true; - } else { - ASSERT(m.type == MutationRef::Type::SetValue || isAtomicOp((MutationRef::Type)m.type)); - return false; - } -} diff --git a/fdbserver/RestoreWorker.actor.cpp b/fdbserver/RestoreWorker.actor.cpp deleted file mode 100644 index 915fe225f60..00000000000 --- a/fdbserver/RestoreWorker.actor.cpp +++ /dev/null @@ -1,423 +0,0 @@ -/* - * RestoreWorker.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -#include "fdbclient/NativeAPI.actor.h" -#include "fdbclient/SystemData.h" -#include "fdbclient/BackupAgent.actor.h" -#include "fdbclient/ManagementAPI.actor.h" -#include "fdbclient/MutationList.h" -#include "fdbclient/BackupContainer.h" -#include "flow/ApiVersion.h" -#include "flow/IAsyncFile.h" -#include "fdbrpc/simulator.h" -#include "flow/genericactors.actor.h" -#include "flow/Hash3.h" -#include "flow/ActorCollection.h" -#include "fdbserver/RestoreWorker.actor.h" -#include "fdbserver/RestoreController.actor.h" -#include "fdbrpc/SimulatorProcessInfo.h" - -#include "flow/actorcompiler.h" // This must be the last #include. - -class RestoreConfigFR; -struct RestoreWorkerData; // Only declare the struct exist but we cannot use its field - -ACTOR Future handlerTerminateWorkerRequest(RestoreSimpleRequest req, - Reference self, - RestoreWorkerInterface workerInterf, - Database cx); -ACTOR Future monitorWorkerLiveness(Reference self); -void handleRecruitRoleRequest(RestoreRecruitRoleRequest req, - Reference self, - ActorCollection* actors, - Database cx); -ACTOR Future collectRestoreWorkerInterface(Reference self, - Database cx, - int min_num_workers = 2); -ACTOR Future monitorleader(Reference> leader, - Database cx, - RestoreWorkerInterface myWorkerInterf); -ACTOR Future startRestoreWorkerLeader(Reference self, - RestoreWorkerInterface workerInterf, - Database cx); - -// Remove the worker interface from restoreWorkerKey and remove its roles interfaces from their keys. -ACTOR Future handlerTerminateWorkerRequest(RestoreSimpleRequest req, - Reference self, - RestoreWorkerInterface workerInterf, - Database cx) { - wait(runRYWTransaction(cx, [=](Reference tr) -> Future { - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - tr->clear(restoreWorkerKeyFor(workerInterf.id())); - return Void(); - })); - - TraceEvent("FastRestoreWorker").detail("HandleTerminateWorkerReq", self->id()); - - return Void(); -} - -// Assume only 1 role on a restore worker. -// Future: Multiple roles in a restore worker -void handleRecruitRoleRequest(RestoreRecruitRoleRequest req, - Reference self, - ActorCollection* actors, - Database cx) { - // Future: Allow multiple restore roles on a restore worker. The design should easily allow this. - ASSERT(!self->loaderInterf.present() || !self->applierInterf.present()); // Only one role per worker for now - // Already recruited a role - if (self->loaderInterf.present()) { - ASSERT(req.role == RestoreRole::Loader); - req.reply.send(RestoreRecruitRoleReply(self->id(), RestoreRole::Loader, self->loaderInterf.get())); - return; - } else if (self->applierInterf.present()) { - req.reply.send(RestoreRecruitRoleReply(self->id(), RestoreRole::Applier, self->applierInterf.get())); - return; - } - - if (req.role == RestoreRole::Loader) { - ASSERT(!self->loaderInterf.present()); - self->controllerInterf = req.ci; - self->loaderInterf = RestoreLoaderInterface(); - self->loaderInterf.get().initEndpoints(); - RestoreLoaderInterface& recruited = self->loaderInterf.get(); - DUMPTOKEN(recruited.heartbeat); - DUMPTOKEN(recruited.updateRestoreSysInfo); - DUMPTOKEN(recruited.initVersionBatch); - DUMPTOKEN(recruited.loadFile); - DUMPTOKEN(recruited.sendMutations); - DUMPTOKEN(recruited.initVersionBatch); - DUMPTOKEN(recruited.finishVersionBatch); - DUMPTOKEN(recruited.collectRestoreRoleInterfaces); - DUMPTOKEN(recruited.finishRestore); - actors->add(restoreLoaderCore(self->loaderInterf.get(), req.nodeIndex, cx, req.ci)); - TraceEvent("FastRestoreWorker").detail("RecruitedLoaderNodeIndex", req.nodeIndex); - req.reply.send( - RestoreRecruitRoleReply(self->loaderInterf.get().id(), RestoreRole::Loader, self->loaderInterf.get())); - } else if (req.role == RestoreRole::Applier) { - ASSERT(!self->applierInterf.present()); - self->controllerInterf = req.ci; - self->applierInterf = RestoreApplierInterface(); - self->applierInterf.get().initEndpoints(); - RestoreApplierInterface& recruited = self->applierInterf.get(); - DUMPTOKEN(recruited.heartbeat); - DUMPTOKEN(recruited.sendMutationVector); - DUMPTOKEN(recruited.applyToDB); - DUMPTOKEN(recruited.initVersionBatch); - DUMPTOKEN(recruited.collectRestoreRoleInterfaces); - DUMPTOKEN(recruited.finishRestore); - actors->add(restoreApplierCore(self->applierInterf.get(), req.nodeIndex, cx)); - TraceEvent("FastRestoreWorker").detail("RecruitedApplierNodeIndex", req.nodeIndex); - req.reply.send( - RestoreRecruitRoleReply(self->applierInterf.get().id(), RestoreRole::Applier, self->applierInterf.get())); - } else { - TraceEvent(SevError, "FastRestoreWorkerHandleRecruitRoleRequestUnknownRole").detail("Request", req.toString()); - } - - return; -} - -// Read restoreWorkersKeys from DB to get each restore worker's workerInterface and set it to self->workerInterfaces; -// This is done before we assign restore roles for restore workers. -ACTOR Future collectRestoreWorkerInterface(Reference self, Database cx, int min_num_workers) { - state Transaction tr(cx); - state std::vector agents; // agents is cmdsInterf - - loop { - try { - self->workerInterfaces.clear(); - agents.clear(); - tr.reset(); - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - RangeResult agentValues = wait(tr.getRange(restoreWorkersKeys, CLIENT_KNOBS->TOO_MANY)); - ASSERT(!agentValues.more); - // If agentValues.size() < min_num_workers, we should wait for coming workers to register their - // workerInterface before we read them once for all - if (agentValues.size() >= min_num_workers) { - for (auto& it : agentValues) { - agents.push_back(BinaryReader::fromStringRef(it.value, IncludeVersion())); - // Save the RestoreWorkerInterface for the later operations - self->workerInterfaces.insert(std::make_pair(agents.back().id(), agents.back())); - } - break; - } - TraceEvent("FastRestoreWorker") - .suppressFor(10.0) - .detail("NotEnoughWorkers", agentValues.size()) - .detail("MinWorkers", min_num_workers); - wait(delay(5.0)); - } catch (Error& e) { - wait(tr.onError(e)); - } - } - ASSERT(agents.size() >= min_num_workers); // ASSUMPTION: We must have at least 1 loader and 1 applier - - TraceEvent("FastRestoreWorker").detail("CollectWorkerInterfaceNumWorkers", self->workerInterfaces.size()); - - return Void(); -} - -// Periodically send worker heartbeat to -ACTOR Future monitorWorkerLiveness(Reference self) { - ASSERT(!self->workerInterfaces.empty()); - - state std::map::iterator workerInterf; - loop { - std::vector> requests; - for (auto& worker : self->workerInterfaces) { - requests.emplace_back(worker.first, RestoreSimpleRequest()); - } - wait(sendBatchRequests(&RestoreWorkerInterface::heartbeat, self->workerInterfaces, requests)); - wait(delay(60.0)); - } -} - -// RestoreWorkerLeader is the worker that runs RestoreController role -ACTOR Future startRestoreWorkerLeader(Reference self, - RestoreWorkerInterface workerInterf, - Database cx) { - // We must wait for enough time to make sure all restore workers have registered their workerInterfaces into the DB - TraceEvent("FastRestoreWorker") - .detail("Controller", workerInterf.id()) - .detail("WaitForRestoreWorkerInterfaces", - SERVER_KNOBS->FASTRESTORE_NUM_LOADERS + SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS); - wait(delay(10.0)); - TraceEvent("FastRestoreWorker") - .detail("Controller", workerInterf.id()) - .detail("CollectRestoreWorkerInterfaces", - SERVER_KNOBS->FASTRESTORE_NUM_LOADERS + SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS); - - wait(collectRestoreWorkerInterface( - self, cx, SERVER_KNOBS->FASTRESTORE_NUM_LOADERS + SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS)); - - // TODO: Needs to keep this monitor's future. May use actorCollection - state Future workersFailureMonitor = monitorWorkerLiveness(self); - - RestoreControllerInterface recruited; - DUMPTOKEN(recruited.samples); - - self->controllerInterf = recruited; - wait(startRestoreController(self, cx) || workersFailureMonitor); - - return Void(); -} - -ACTOR Future startRestoreWorker(Reference self, RestoreWorkerInterface interf, Database cx) { - state double lastLoopTopTime = now(); - state ActorCollection actors(false); // Collect the main actor for each role - state Future exitRole = Never(); - - loop { - double loopTopTime = now(); - double elapsedTime = loopTopTime - lastLoopTopTime; - if (elapsedTime > 0.050) { - if (deterministicRandom()->random01() < 0.01) - TraceEvent(SevWarn, "SlowRestoreWorkerLoopx100") - .detail("NodeDesc", self->describeNode()) - .detail("Elapsed", elapsedTime); - } - lastLoopTopTime = loopTopTime; - state std::string requestTypeStr = "[Init]"; - - try { - choose { - when(RestoreSimpleRequest req = waitNext(interf.heartbeat.getFuture())) { - requestTypeStr = "heartbeat"; - actors.add(handleHeartbeat(req, interf.id())); - } - when(RestoreRecruitRoleRequest req = waitNext(interf.recruitRole.getFuture())) { - requestTypeStr = "recruitRole"; - handleRecruitRoleRequest(req, self, &actors, cx); - } - when(RestoreSimpleRequest req = waitNext(interf.terminateWorker.getFuture())) { - // Destroy the worker at the end of the restore - requestTypeStr = "terminateWorker"; - exitRole = handlerTerminateWorkerRequest(req, self, interf, cx); - } - when(wait(exitRole)) { - TraceEvent("FastRestoreWorkerCoreExitRole", self->id()); - break; - } - } - } catch (Error& e) { - TraceEvent(SevWarn, "FastRestoreWorkerError").errorUnsuppressed(e).detail("RequestType", requestTypeStr); - break; - } - } - - return Void(); -} - -ACTOR static Future waitOnRestoreRequests(Database cx, UID nodeID = UID()) { - state ReadYourWritesTransaction tr(cx); - state Optional numRequests; - - // wait for the restoreRequestTriggerKey to be set by the client/test workload - TraceEvent("FastRestoreWaitOnRestoreRequest", nodeID).log(); - loop { - try { - tr.reset(); - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - Optional _numRequests = wait(tr.get(restoreRequestTriggerKey)); - numRequests = _numRequests; - if (!numRequests.present()) { - state Future watchForRestoreRequest = tr.watch(restoreRequestTriggerKey); - wait(tr.commit()); - TraceEvent(SevInfo, "FastRestoreWaitOnRestoreRequestTriggerKey", nodeID).log(); - wait(watchForRestoreRequest); - TraceEvent(SevInfo, "FastRestoreDetectRestoreRequestTriggerKeyChanged", nodeID).log(); - } else { - TraceEvent(SevInfo, "FastRestoreRestoreRequestTriggerKey", nodeID) - .detail("TriggerKey", numRequests.get().toString()); - break; - } - } catch (Error& e) { - wait(tr.onError(e)); - } - } - - return Void(); -} - -// RestoreController is the leader -ACTOR Future monitorleader(Reference> leader, - Database cx, - RestoreWorkerInterface myWorkerInterf) { - wait(delay(SERVER_KNOBS->FASTRESTORE_MONITOR_LEADER_DELAY)); - TraceEvent("FastRestoreWorker", myWorkerInterf.id()).detail("MonitorLeader", "StartLeaderElection"); - state int count = 0; - state RestoreWorkerInterface leaderInterf; - state ReadYourWritesTransaction tr(cx); // MX: Somewhere here program gets stuck - loop { - try { - count++; - tr.reset(); - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - Optional leaderValue = wait(tr.get(restoreLeaderKey)); - TraceEvent(SevInfo, "FastRestoreLeaderElection") - .detail("Round", count) - .detail("LeaderExisted", leaderValue.present()); - if (leaderValue.present()) { - leaderInterf = BinaryReader::fromStringRef(leaderValue.get(), IncludeVersion()); - // Register my interface as an worker if I am not the leader - if (leaderInterf != myWorkerInterf) { - tr.set(restoreWorkerKeyFor(myWorkerInterf.id()), restoreWorkerInterfaceValue(myWorkerInterf)); - } - } else { - // Workers compete to be the leader - tr.set(restoreLeaderKey, - BinaryWriter::toValue(myWorkerInterf, - IncludeVersion(ProtocolVersion::withRestoreWorkerInterfaceValue()))); - leaderInterf = myWorkerInterf; - } - wait(tr.commit()); - leader->set(leaderInterf); - break; - } catch (Error& e) { - TraceEvent(SevInfo, "FastRestoreLeaderElection").detail("ErrorCode", e.code()).detail("Error", e.what()); - wait(tr.onError(e)); - } - } - - TraceEvent("FastRestoreWorker", myWorkerInterf.id()) - .detail("MonitorLeader", "FinishLeaderElection") - .detail("Leader", leaderInterf.id()) - .detail("IamLeader", leaderInterf == myWorkerInterf); - return Void(); -} - -ACTOR Future _restoreWorker(Database cx, LocalityData locality) { - state ActorCollection actors(false); - state Future myWork = Never(); - state Reference> leader = makeReference>(); - state RestoreWorkerInterface myWorkerInterf; - state Reference self = makeReference(); - - myWorkerInterf.initEndpoints(); - self->workerID = myWorkerInterf.id(); - - // Protect restore worker from being killed in simulation; - // Future: Remove the protection once restore can tolerate failure - if (g_network->isSimulated()) { - auto addresses = g_simulator->getProcessByAddress(myWorkerInterf.address())->addresses; - - g_simulator->protectedAddresses.insert(addresses.address); - if (addresses.secondaryAddress.present()) { - g_simulator->protectedAddresses.insert(addresses.secondaryAddress.get()); - } - ISimulator::ProcessInfo* p = g_simulator->getProcessByAddress(myWorkerInterf.address()); - TraceEvent("ProtectRestoreWorker") - .detail("Address", addresses.toString()) - .detail("IsReliable", p->isReliable()) - .detail("ReliableInfo", p->getReliableInfo()) - .backtrace(); - ASSERT(p->isReliable()); - } - - TraceEvent("FastRestoreWorkerKnobs", myWorkerInterf.id()) - .detail("FailureTimeout", SERVER_KNOBS->FASTRESTORE_FAILURE_TIMEOUT) - .detail("HeartBeat", SERVER_KNOBS->FASTRESTORE_HEARTBEAT_INTERVAL) - .detail("SamplePercentage", SERVER_KNOBS->FASTRESTORE_SAMPLING_PERCENT) - .detail("NumLoaders", SERVER_KNOBS->FASTRESTORE_NUM_LOADERS) - .detail("NumAppliers", SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS) - .detail("TxnBatchSize", SERVER_KNOBS->FASTRESTORE_TXN_BATCH_MAX_BYTES) - .detail("VersionBatchSize", SERVER_KNOBS->FASTRESTORE_VERSIONBATCH_MAX_BYTES); - - wait(waitOnRestoreRequests(cx, myWorkerInterf.id())); - - wait(monitorleader(leader, cx, myWorkerInterf)); - - TraceEvent("FastRestoreWorker", myWorkerInterf.id()).detail("LeaderElection", "WaitForLeader"); - if (leader->get() == myWorkerInterf) { - // Restore controller worker: doLeaderThings(); - myWork = startRestoreWorkerLeader(self, myWorkerInterf, cx); - } else { - // Restore normal worker (for RestoreLoader and RestoreApplier roles): doWorkerThings(); - myWork = startRestoreWorker(self, myWorkerInterf, cx); - } - - wait(myWork); - return Void(); -} - -ACTOR Future restoreWorker(Reference connRecord, - LocalityData locality, - std::string coordFolder) { - try { - Database cx = Database::createDatabase(connRecord, ApiVersion::LATEST_VERSION, IsInternal::True, locality); - wait(reportErrors(_restoreWorker(cx, locality), "RestoreWorker")); - } catch (Error& e) { - TraceEvent("FastRestoreWorker").detail("Error", e.what()); - throw e; - } - - return Void(); -} diff --git a/fdbserver/RestoreWorkerInterface.actor.cpp b/fdbserver/RestoreWorkerInterface.actor.cpp deleted file mode 100644 index d20be77f158..00000000000 --- a/fdbserver/RestoreWorkerInterface.actor.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* - * RestoreWorkerInterface.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fdbserver/RestoreWorkerInterface.actor.h" -#include "flow/actorcompiler.h" // must be last include - -const KeyRef restoreLeaderKey = "\xff\x02/restoreLeader"_sr; -const KeyRangeRef restoreWorkersKeys("\xff\x02/restoreWorkers/"_sr, "\xff\x02/restoreWorkers0"_sr); -const KeyRef restoreStatusKey = "\xff\x02/restoreStatus/"_sr; -const KeyRangeRef restoreApplierKeys("\xff\x02/restoreApplier/"_sr, "\xff\x02/restoreApplier0"_sr); -const KeyRef restoreApplierTxnValue = "1"_sr; - -// restoreApplierKeys: track atomic transaction progress to ensure applying atomicOp exactly once -// Version and batchIndex are passed in as LittleEndian, -// they must be converted to BigEndian to maintain ordering in lexical order -const Key restoreApplierKeyFor(UID const& applierID, int64_t batchIndex, Version version) { - BinaryWriter wr(Unversioned()); - wr.serializeBytes(restoreApplierKeys.begin); - wr << applierID << bigEndian64(batchIndex) << bigEndian64(version); - return wr.toValue(); -} - -std::tuple decodeRestoreApplierKey(ValueRef const& key) { - BinaryReader rd(key, Unversioned()); - UID applierID; - int64_t batchIndex; - Version version; - rd >> applierID >> batchIndex >> version; - return std::make_tuple(applierID, bigEndian64(batchIndex), bigEndian64(version)); -} - -// Encode restore worker key for workerID -const Key restoreWorkerKeyFor(UID const& workerID) { - BinaryWriter wr(Unversioned()); - wr.serializeBytes(restoreWorkersKeys.begin); - wr << workerID; - return wr.toValue(); -} - -// Encode restore agent value -const Value restoreWorkerInterfaceValue(RestoreWorkerInterface const& cmdInterf) { - BinaryWriter wr(IncludeVersion(ProtocolVersion::withRestoreWorkerInterfaceValue())); - wr << cmdInterf; - return wr.toValue(); -} - -RestoreWorkerInterface decodeRestoreWorkerInterfaceValue(ValueRef const& value) { - RestoreWorkerInterface s; - BinaryReader reader(value, IncludeVersion()); - reader >> s; - return s; -} - -Value restoreRequestDoneVersionValue(Version readVersion) { - BinaryWriter wr(IncludeVersion(ProtocolVersion::withRestoreRequestDoneVersionValue())); - wr << readVersion; - return wr.toValue(); -} -Version decodeRestoreRequestDoneVersionValue(ValueRef const& value) { - Version v; - BinaryReader reader(value, IncludeVersion()); - reader >> v; - return v; -} - -RestoreRequest decodeRestoreRequestValue(ValueRef const& value) { - RestoreRequest s; - BinaryReader reader(value, IncludeVersion()); - reader >> s; - return s; -} - -// TODO: Register restore performance data to restoreStatus key -const Key restoreStatusKeyFor(StringRef statusType) { - BinaryWriter wr(Unversioned()); - wr.serializeBytes(restoreStatusKey); - wr << statusType; - return wr.toValue(); -} - -const Value restoreStatusValue(double val) { - BinaryWriter wr(IncludeVersion(ProtocolVersion::withRestoreStatusValue())); - wr << StringRef(std::to_string(val)); - return wr.toValue(); -} diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 36b03924ecb..5d869143def 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -65,7 +65,6 @@ #include "fdbserver/MoveKeys.actor.h" #include "fdbserver/NetworkTest.h" #include "fdbserver/RemoteIKeyValueStore.actor.h" -#include "fdbserver/RestoreWorkerInterface.actor.h" #include "fdbserver/ServerDBInfo.h" #include "fdbserver/SimulatedCluster.h" #include "fdbserver/Status.actor.h" @@ -1323,8 +1322,6 @@ struct CLIOptions { role = ServerRole::NetworkTestClient; else if (!strcmp(sRole, "networktestserver")) role = ServerRole::NetworkTestServer; - else if (!strcmp(sRole, "restore")) - role = ServerRole::Restore; else if (!strcmp(sRole, "kvfileintegritycheck")) role = ServerRole::KVFileIntegrityCheck; else if (!strcmp(sRole, "kvfilegeneratesums")) @@ -2111,8 +2108,8 @@ int main(int argc, char* argv[]) { FlowTransport::createInstance(false, 1, WLTOKEN_RESERVED_COUNT, &opts.allowList); opts.buildNetwork(argv[0]); - const bool expectsPublicAddress = (role == ServerRole::FDBD || role == ServerRole::NetworkTestServer || - role == ServerRole::Restore || role == ServerRole::FlowProcess); + const bool expectsPublicAddress = + (role == ServerRole::FDBD || role == ServerRole::NetworkTestServer || role == ServerRole::FlowProcess); if (opts.publicAddressStrs.empty()) { if (expectsPublicAddress) { fprintf(stderr, "ERROR: The -p or --public-address option is required\n"); @@ -2376,55 +2373,40 @@ int main(int argc, char* argv[]) { } } - // Call fast restore for the class FastRestoreClass. This is a short-cut to run fast restore in circus - if (opts.processClass == ProcessClass::FastRestoreClass) { - printf("Run as fast restore worker\n"); - ASSERT(opts.connectionFile); - auto dataFolder = opts.dataFolder; - if (!dataFolder.size()) - dataFolder = format("fdb/%d/", opts.publicAddresses.address.port); // SOMEDAY: Better default - - std::vector> actors(listenErrors.begin(), listenErrors.end()); - actors.push_back(restoreWorker(opts.connectionFile, opts.localities, dataFolder)); - f = stopAfter(waitForAll(actors)); - printf("Fast restore worker started\n"); - g_network->run(); - printf("g_network->run() done\n"); - } else { // Call fdbd roles in conventional way - ASSERT(opts.connectionFile); - - setupRunLoopProfiler(); - - auto dataFolder = opts.dataFolder; - if (!dataFolder.size()) - dataFolder = format("fdb/%d/", opts.publicAddresses.address.port); // SOMEDAY: Better default - - std::vector> actors(listenErrors.begin(), listenErrors.end()); - actors.push_back(fdbd(opts.connectionFile, - opts.localities, - opts.processClass, - dataFolder, - dataFolder, - opts.storageMemLimit, - opts.metricsConnFile, - opts.metricsPrefix, - opts.rsssize, - opts.whitelistBinPaths, - opts.configPath, - opts.manualKnobOverrides, - opts.configDBType, - opts.consistencyCheckUrgentMode)); - actors.push_back(histogramReport()); - // actors.push_back( recurring( []{}, .001 ) ); // for ASIO latency measurement + ASSERT(opts.connectionFile); + + setupRunLoopProfiler(); + + auto dataFolder = opts.dataFolder; + if (!dataFolder.size()) + dataFolder = format("fdb/%d/", opts.publicAddresses.address.port); // SOMEDAY: Better default + + std::vector> actors(listenErrors.begin(), listenErrors.end()); + actors.push_back(fdbd(opts.connectionFile, + opts.localities, + opts.processClass, + dataFolder, + dataFolder, + opts.storageMemLimit, + opts.metricsConnFile, + opts.metricsPrefix, + opts.rsssize, + opts.whitelistBinPaths, + opts.configPath, + opts.manualKnobOverrides, + opts.configDBType, + opts.consistencyCheckUrgentMode)); + actors.push_back(histogramReport()); + // actors.push_back( recurring( []{}, .001 ) ); // for ASIO latency measurement #ifdef FLOW_GRPC_ENABLED - if (opts.grpcAddressStrs.size() > 0) { - FlowGrpc::init(&opts.tlsConfig, NetworkAddress::parse(opts.grpcAddressStrs[0])); - actors.push_back(GrpcServer::instance()->run()); - } -#endif - f = stopAfter(waitForAll(actors)); - g_network->run(); + if (opts.grpcAddressStrs.size() > 0) { + FlowGrpc::init(&opts.tlsConfig, NetworkAddress::parse(opts.grpcAddressStrs[0])); + actors.push_back(GrpcServer::instance()->run()); } +#endif + f = stopAfter(waitForAll(actors)); + g_network->run(); + } else if (role == ServerRole::MultiTester) { setupRunLoopProfiler(); f = stopAfter(runTests(opts.connectionFile, @@ -2489,9 +2471,6 @@ int main(int argc, char* argv[]) { } else if (role == ServerRole::NetworkTestServer) { f = stopAfter(networkTestServer()); g_network->run(); - } else if (role == ServerRole::Restore) { - f = stopAfter(restoreWorker(opts.connectionFile, opts.localities, opts.dataFolder)); - g_network->run(); } else if (role == ServerRole::KVFileIntegrityCheck) { f = stopAfter(KVFileCheck(opts.kvFile, true)); g_network->run(); diff --git a/fdbserver/include/fdbserver/RestoreApplier.actor.h b/fdbserver/include/fdbserver/RestoreApplier.actor.h deleted file mode 100644 index 64cd7bfd934..00000000000 --- a/fdbserver/include/fdbserver/RestoreApplier.actor.h +++ /dev/null @@ -1,417 +0,0 @@ -/* - * RestoreApplier.actor.h - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This file declears RestoreApplier interface and actors - -#pragma once -#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RESTORE_APPLIER_G_H) -#define FDBSERVER_RESTORE_APPLIER_G_H -#include "fdbserver/RestoreApplier.actor.g.h" -#elif !defined(FDBSERVER_RESTORE_APPLIER_H) -#define FDBSERVER_RESTORE_APPLIER_H - -#include -#include "fdbclient/Atomic.h" -#include "fdbclient/FDBTypes.h" -#include "fdbclient/CommitTransaction.h" -#include "fdbrpc/fdbrpc.h" -#include "fdbrpc/Locality.h" -#include "fdbrpc/Stats.h" -#include "fdbserver/CoordinationInterface.h" -#include "fdbserver/MutationTracking.h" -#include "fdbserver/RestoreUtil.h" -#include "fdbserver/RestoreRoleCommon.actor.h" -#include "fdbserver/RestoreWorkerInterface.actor.h" - -#include "flow/actorcompiler.h" // has to be last include - -Value applyAtomicOp(Optional existingValue, Value value, MutationRef::Type type); - -// Key whose mutations are buffered on applier. -// key, value, type and version defines the parsed mutation at version. -// pendingMutations has all versioned mutations to be applied. -// Mutations in pendingMutations whose version is below the version in StagingKey can be ignored in applying phase. -struct StagingKey { - Key key; // TODO: Maybe not needed? - Value val; - MutationRef::Type type; // set or clear - LogMessageVersion version; // largest version of set or clear for the key - std::map> pendingMutations; // mutations not set or clear type - - explicit StagingKey(Key key) : key(key), type(MutationRef::MAX_ATOMIC_OP), version(0) {} - - // Add mutation m at newVersion to stagingKey - // Assume: SetVersionstampedKey and SetVersionstampedValue have been converted to set - void add(const MutationRef& m, LogMessageVersion newVersion) { - ASSERT(m.type != MutationRef::SetVersionstampedKey && m.type != MutationRef::SetVersionstampedValue); - DEBUG_MUTATION("StagingKeyAdd", newVersion.version, m) - .detail("SubVersion", version.toString()) - .detail("NewSubVersion", newVersion.toString()); - if (version == newVersion) { - // This could happen because the same mutation can be present in - // overlapping mutation logs, because new TLogs can copy mutations - // from old generation TLogs (or backup worker is recruited without - // knowning previously saved progress). - ASSERT(type == m.type && key == m.param1 && val == m.param2); - TraceEvent("SameVersion").detail("Version", version.toString()).detail("Mutation", m); - return; - } - - // newVersion can be smaller than version as different loaders can send - // mutations out of order. - if (m.type == MutationRef::SetValue || m.type == MutationRef::ClearRange) { - if (m.type == MutationRef::ClearRange) { - // We should only clear this key! Otherwise, it causes side effect to other keys - ASSERT(m.param1 == m.param2); - } - if (version < newVersion) { - DEBUG_MUTATION("StagingKeyAdd", newVersion.version, m) - .detail("SubVersion", version.toString()) - .detail("NewSubVersion", newVersion.toString()) - .detail("MType", getTypeString(type)) - .detail("Key", key) - .detail("Val", val) - .detail("NewMutation", m.toString()); - key = m.param1; - val = m.param2; - type = (MutationRef::Type)m.type; - version = newVersion; - } - } else { - auto it = pendingMutations.find(newVersion); - if (it == pendingMutations.end()) { - pendingMutations.emplace(newVersion, m); - } else { - // Duplicated mutation ignored. - // TODO: Add SevError here - TraceEvent("SameVersion") - .detail("Version", version.toString()) - .detail("NewVersion", newVersion.toString()) - .detail("OldMutation", it->second) - .detail("NewMutation", m); - ASSERT(it->second.type == m.type && it->second.param1 == m.param1 && it->second.param2 == m.param2); - } - } - } - - // Precompute the final value of the key. - // TODO: Look at the last LogMessageVersion, if it set or clear, we can ignore the rest of versions. - void precomputeResult(const char* context, UID applierID, int batchIndex) { - TraceEvent(SevFRMutationInfo, "FastRestoreApplierPrecomputeResult", applierID) - .detail("BatchIndex", batchIndex) - .detail("Context", context) - .detail("Version", version.toString()) - .detail("Key", key) - .detail("Value", val) - .detail("MType", type < MutationRef::MAX_ATOMIC_OP ? getTypeString(type) : "[Unset]") - .detail("LargestPendingVersion", - (pendingMutations.empty() ? "[none]" : pendingMutations.rbegin()->first.toString())) - .detail("PendingMutations", pendingMutations.size()); - std::map>::iterator lb = pendingMutations.lower_bound(version); - if (lb == pendingMutations.end()) { - return; - } - ASSERT(!pendingMutations.empty()); - if (lb->first == version) { - // Sanity check mutations at version are either atomicOps which can be ignored or the same value as buffered - MutationRef m = lb->second; - if (m.type == MutationRef::SetValue || m.type == MutationRef::ClearRange) { - if (std::tie(type, key, val) != std::tie(m.type, m.param1, m.param2)) { - TraceEvent(SevError, "FastRestoreApplierPrecomputeResultUnhandledSituation", applierID) - .detail("BatchIndex", batchIndex) - .detail("Context", context) - .detail("BufferedType", getTypeString(type)) - .detail("PendingType", getTypeString(m.type)) - .detail("BufferedVal", val.toString()) - .detail("PendingVal", m.param2.toString()); - } - } - lb++; - } - for (; lb != pendingMutations.end(); lb++) { - MutationRef mutation = lb->second; - if (mutation.type == MutationRef::CompareAndClear) { // Special atomicOp - Arena arena; - Optional inputVal; - if (hasBaseValue()) { - inputVal = val; - } - Optional retVal = doCompareAndClear(inputVal, mutation.param2, arena); - if (!retVal.present()) { - val = key; - type = MutationRef::ClearRange; - } // else no-op - } else if (isAtomicOp((MutationRef::Type)mutation.type)) { - Optional inputVal; - if (hasBaseValue()) { - inputVal = val; - } - val = applyAtomicOp(inputVal, mutation.param2, (MutationRef::Type)mutation.type); - type = MutationRef::SetValue; // Precomputed result should be set to DB. - } else if (mutation.type == MutationRef::SetValue || mutation.type == MutationRef::ClearRange) { - type = MutationRef::SetValue; - TraceEvent(SevError, "FastRestoreApplierPrecomputeResultUnexpectedSet", applierID) - .detail("BatchIndex", batchIndex) - .detail("Context", context) - .detail("MutationType", getTypeString(mutation.type)) - .detail("Version", lb->first.toString()); - } else { - TraceEvent(SevError, "FastRestoreApplierPrecomputeResultSkipUnexpectedBackupMutation", applierID) - .detail("BatchIndex", batchIndex) - .detail("Context", context) - .detail("MutationType", getTypeString(mutation.type)) - .detail("Version", lb->first.toString()); - } - ASSERT(lb->first > version); - version = lb->first; - } - } - - // Does the key has at least 1 set or clear mutation to get the base value - bool hasBaseValue() const { - if (version.version > 0) { - ASSERT(type == MutationRef::SetValue || type == MutationRef::ClearRange); - } - return version.version > 0; - } - - // Has all pendingMutations been pre-applied to the val? - bool hasPrecomputed() const { - ASSERT(pendingMutations.empty() || pendingMutations.rbegin()->first >= pendingMutations.begin()->first); - return pendingMutations.empty() || version >= pendingMutations.rbegin()->first; - } - - int totalSize() const { return MutationRef::OVERHEAD_BYTES + key.size() + val.size(); } -}; - -// The range mutation received on applier. -// Range mutations should be applied both to the destination DB and to the StagingKeys -struct StagingKeyRange { - Standalone mutation; - LogMessageVersion version; - - explicit StagingKeyRange(MutationRef m, LogMessageVersion newVersion) : mutation(m), version(newVersion) {} - - bool operator<(const StagingKeyRange& rhs) const { - return std::tie(version, mutation.type, mutation.param1, mutation.param2) < - std::tie(rhs.version, rhs.mutation.type, rhs.mutation.param1, rhs.mutation.param2); - } -}; - -// Applier state in each version batch -class ApplierVersionBatchState : RoleVersionBatchState { -public: - static const int NOT_INIT = 0; - static const int INIT = 1; - static const int RECEIVE_MUTATIONS = 2; - static const int WRITE_TO_DB = 3; - static const int DONE = 4; - static const int INVALID = 5; - - explicit ApplierVersionBatchState(int newState) { vbState = newState; } - - ~ApplierVersionBatchState() override = default; - - void operator=(int newState) override { vbState = newState; } - - int get() const override { return vbState; } -}; - -struct ApplierBatchData : public ReferenceCounted { - // processedFileState: key: RestoreAsset; value: largest version of mutation received on the applier - std::map processedFileState; - Optional> dbApplier; - VersionedMutationsMap kvOps; // Mutations at each version - std::map stagingKeys; - std::set stagingKeyRanges; - - Future pollMetrics; - - RoleVersionBatchState vbState; - - long receiveMutationReqs; - - // Stats - double receivedBytes; // received mutation size - double appliedBytes; // after coalesce, how many bytes to write to DB - double targetWriteRateMB; // target amount of data outstanding for DB; - double totalBytesToWrite; // total amount of data in bytes to write - double applyingDataBytes; // amount of data in flight of committing - AsyncTrigger releaseTxnTrigger; // trigger to release more txns - Future rateTracer; // trace transaction rate control info - - // Status counters - struct Counters { - CounterCollection cc; - Counter receivedBytes, receivedWeightedBytes, receivedMutations, receivedAtomicOps; - Counter appliedBytes, appliedWeightedBytes, appliedMutations, appliedAtomicOps; - Counter appliedTxns, appliedTxnRetries; - Counter fetchKeys, fetchTxns, fetchTxnRetries; // number of keys to fetch from dest. FDB cluster. - Counter clearOps, clearTxns; - - Counters(ApplierBatchData* self, UID applierInterfID, int batchIndex) - : cc("ApplierBatch", applierInterfID.toString() + ":" + std::to_string(batchIndex)), - receivedBytes("ReceivedBytes", cc), receivedWeightedBytes("ReceivedWeightedMutations", cc), - receivedMutations("ReceivedMutations", cc), receivedAtomicOps("ReceivedAtomicOps", cc), - appliedBytes("AppliedBytes", cc), appliedWeightedBytes("AppliedWeightedBytes", cc), - appliedMutations("AppliedMutations", cc), appliedAtomicOps("AppliedAtomicOps", cc), - appliedTxns("AppliedTxns", cc), appliedTxnRetries("AppliedTxnRetries", cc), fetchKeys("FetchKeys", cc), - fetchTxns("FetchTxns", cc), fetchTxnRetries("FetchTxnRetries", cc), clearOps("ClearOps", cc), - clearTxns("ClearTxns", cc) {} - } counters; - - void addref() { return ReferenceCounted::addref(); } - void delref() { return ReferenceCounted::delref(); } - - explicit ApplierBatchData(UID nodeID, int batchIndex) - : vbState(ApplierVersionBatchState::NOT_INIT), receiveMutationReqs(0), receivedBytes(0), appliedBytes(0), - targetWriteRateMB(SERVER_KNOBS->FASTRESTORE_WRITE_BW_MB / SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS), - totalBytesToWrite(-1), applyingDataBytes(0), counters(this, nodeID, batchIndex) { - pollMetrics = - counters.cc.traceCounters(format("FastRestoreApplierMetrics%d", batchIndex), - nodeID, - SERVER_KNOBS->FASTRESTORE_ROLE_LOGGING_DELAY, - nodeID.toString() + "/RestoreApplierMetrics/" + std::to_string(batchIndex)); - TraceEvent("FastRestoreApplierMetricsCreated").detail("Node", nodeID); - } - ~ApplierBatchData() { - rateTracer = Void(); // cancel actor - } - - void addMutation(MutationRef m, LogMessageVersion ver) { - if (!isRangeMutation(m)) { - auto item = stagingKeys.emplace(m.param1, StagingKey(m.param1)); - item.first->second.add(m, ver); - } else { - stagingKeyRanges.insert(StagingKeyRange(m, ver)); - } - } - - // Return true if all staging keys have been precomputed - bool allKeysPrecomputed() { - for (auto& stagingKey : stagingKeys) { - if (!stagingKey.second.hasPrecomputed()) { - TraceEvent("FastRestoreApplierAllKeysPrecomputedFalse") - .detail("Key", stagingKey.first) - .detail("BufferedVersion", stagingKey.second.version.toString()) - .detail("MaxPendingVersion", stagingKey.second.pendingMutations.rbegin()->first.toString()); - return false; - } - } - TraceEvent("FastRestoreApplierAllKeysPrecomputed").log(); - return true; - } - - void reset() { - kvOps.clear(); - dbApplier = Optional>(); - } - - void sanityCheckMutationOps() const { - if (kvOps.empty()) - return; - - ASSERT_WE_THINK(isKVOpsSorted()); - ASSERT_WE_THINK(allOpsAreKnown()); - } - - bool isKVOpsSorted() const { - auto prev = kvOps.begin(); - for (auto it = kvOps.begin(); it != kvOps.end(); ++it) { - if (prev->first > it->first) { - return false; - } - prev = it; - } - return true; - } - - bool allOpsAreKnown() const { - for (auto it = kvOps.begin(); it != kvOps.end(); ++it) { - for (auto m = it->second.begin(); m != it->second.end(); ++m) { - if (m->type == MutationRef::SetValue || m->type == MutationRef::ClearRange || - isAtomicOp((MutationRef::Type)m->type)) - continue; - else { - TraceEvent(SevError, "FastRestoreApplier").detail("UnknownMutationType", m->type); - return false; - } - } - } - return true; - } -}; - -struct RestoreApplierData : RestoreRoleData, public ReferenceCounted { - // Buffer for uncommitted data at ongoing version batches - std::map> batch; - - void addref() { return ReferenceCounted::addref(); } - void delref() { return ReferenceCounted::delref(); } - - explicit RestoreApplierData(UID applierInterfID, int assignedIndex) { - nodeID = applierInterfID; - nodeIndex = assignedIndex; - - // Q: Why do we need to initMetric? - // version.initMetric("RestoreApplier.Version"_sr, cc.id); - - role = RestoreRole::Applier; - } - - ~RestoreApplierData() override = default; - - // getVersionBatchState may be called periodically to dump version batch state, - // even when no version batch has been started. - int getVersionBatchState(int batchIndex) const final { - auto item = batch.find(batchIndex); - if (item == batch.end()) { // Batch has not been initialized when we blindly profile the state - return ApplierVersionBatchState::INVALID; - } else { - return item->second->vbState.get(); - } - } - void setVersionBatchState(int batchIndex, int vbState) final { - std::map>::iterator item = batch.find(batchIndex); - ASSERT(item != batch.end()); - item->second->vbState = vbState; - } - - void initVersionBatch(int batchIndex) override { - TraceEvent("FastRestoreApplierInitVersionBatch", id()).detail("BatchIndex", batchIndex); - batch[batchIndex] = Reference(new ApplierBatchData(nodeID, batchIndex)); - } - - void resetPerRestoreRequest() override { - batch.clear(); - finishedBatch = NotifiedVersion(0); - } - - std::string describeNode() const override { - std::stringstream ss; - ss << "NodeID:" << nodeID.toString() << " nodeIndex:" << nodeIndex; - return ss.str(); - } -}; - -ACTOR Future restoreApplierCore(RestoreApplierInterface applierInterf, int nodeIndex, Database cx); - -#include "flow/unactorcompiler.h" -#endif diff --git a/fdbserver/include/fdbserver/RestoreCommon.actor.h b/fdbserver/include/fdbserver/RestoreCommon.actor.h deleted file mode 100644 index 4504abf2f9a..00000000000 --- a/fdbserver/include/fdbserver/RestoreCommon.actor.h +++ /dev/null @@ -1,391 +0,0 @@ -/* - * RestoreCommon.actor.h - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This file includes the code copied from the old restore in FDB 5.2 -// The functions and structure declared in this file can be shared by -// the old restore and the new performant restore systems - -#pragma once -#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RESTORECOMMON_ACTOR_G_H) -#define FDBSERVER_RESTORECOMMON_ACTOR_G_H -#include "fdbserver/RestoreCommon.actor.g.h" -#elif !defined(FDBSERVER_RESTORECOMMON_ACTOR_H) -#define FDBSERVER_RESTORECOMMON_ACTOR_H - -#include "flow/flow.h" -#include "flow/genericactors.actor.h" -#include "fdbclient/Tuple.h" -#include "fdbclient/NativeAPI.actor.h" -#include "flow/IAsyncFile.h" -#include "fdbclient/BackupAgent.actor.h" -#include "fdbserver/Knobs.h" - -#include "flow/actorcompiler.h" // has to be last include - -// RestoreConfig copied from FileBackupAgent.actor.cpp -// We copy RestoreConfig instead of using (and potentially changing) it in place -// to avoid conflict with the existing code. -// We also made minor changes to allow RestoreConfig to be ReferenceCounted -// TODO: Merge this RestoreConfig with the original RestoreConfig in FileBackupAgent.actor.cpp -// For convenience -typedef FileBackupAgent::ERestoreState ERestoreState; - -struct RestoreFileFR; - -// We copy RestoreConfig copied from FileBackupAgent.actor.cpp instead of using (and potentially changing) it in place -// to avoid conflict with the existing code Split RestoreConfig defined in FileBackupAgent.actor.cpp to declaration in -// Restore.actor.h and implementation in RestoreCommon.actor.cpp, so that we can use in both the existing restore and -// the new fast restore subsystems. We use RestoreConfig as a Reference, which leads to some -// non-functional changes in RestoreConfig -class RestoreConfigFR : public KeyBackedTaskConfig, public ReferenceCounted { -public: - RestoreConfigFR(UID uid = UID()) : KeyBackedTaskConfig(fileRestorePrefixRange.begin, uid) {} - RestoreConfigFR(Reference task) : KeyBackedTaskConfig(fileRestorePrefixRange.begin, task) {} - - KeyBackedProperty stateEnum(); - - Future stateText(Reference tr); - - KeyBackedProperty addPrefix(); - - KeyBackedProperty removePrefix(); - - // XXX: Remove restoreRange() once it is safe to remove. It has been changed to restoreRanges - KeyBackedProperty restoreRange(); - - KeyBackedProperty> restoreRanges(); - - KeyBackedProperty batchFuture(); - - KeyBackedProperty restoreVersion(); - - KeyBackedProperty> sourceContainer(); - - // Get the source container as a bare URL, without creating a container instance - KeyBackedProperty sourceContainerURL(); - - // Total bytes written by all log and range restore tasks. - KeyBackedBinaryValue bytesWritten(); - - // File blocks that have had tasks created for them by the Dispatch task - KeyBackedBinaryValue filesBlocksDispatched(); - - // File blocks whose tasks have finished - KeyBackedBinaryValue fileBlocksFinished(); - - // Total number of files in the fileMap - KeyBackedBinaryValue fileCount(); - - // Total number of file blocks in the fileMap - KeyBackedBinaryValue fileBlockCount(); - - Future> getRestoreRangesOrDefault(Reference tr); - ACTOR static Future> getRestoreRangesOrDefault_impl(RestoreConfigFR* self, - Reference tr); - - // Describes a file to load blocks from during restore. Ordered by version and then fileName to enable - // incrementally advancing through the map, saving the version and path of the next starting point. - struct RestoreFile { - Version version; - std::string fileName; - bool isRange; // false for log file - int64_t blockSize; - int64_t fileSize; - Version endVersion; // not meaningful for range files - - Tuple pack() const { - // fprintf(stderr, "Filename:%s\n", fileName.c_str()); - return Tuple::makeTuple(version, fileName, (int)isRange, fileSize, blockSize, endVersion); - } - static RestoreFile unpack(Tuple const& t) { - RestoreFile r; - int i = 0; - r.version = t.getInt(i++); - r.fileName = t.getString(i++).toString(); - r.isRange = t.getInt(i++) != 0; - r.fileSize = t.getInt(i++); - r.blockSize = t.getInt(i++); - r.endVersion = t.getInt(i++); - return r; - } - }; - - // typedef KeyBackedSet FileSetT; - KeyBackedSet fileSet(); - - Future isRunnable(Reference tr); - - Future logError(Database cx, Error e, std::string const& details, void* taskInstance = nullptr); - - Key mutationLogPrefix(); - - Key applyMutationsMapPrefix(); - - ACTOR Future getApplyVersionLag_impl(Reference tr, UID uid); - - Future getApplyVersionLag(Reference tr); - - void initApplyMutations(Reference tr, Key addPrefix, Key removePrefix); - - void clearApplyMutationsKeys(Reference tr); - - void setApplyBeginVersion(Reference tr, Version ver); - - void setApplyEndVersion(Reference tr, Version ver); - - Future getApplyEndVersion(Reference tr); - - ACTOR static Future getProgress_impl(Reference restore, - Reference tr); - Future getProgress(Reference tr); - - ACTOR static Future getFullStatus_impl(Reference restore, - Reference tr); - Future getFullStatus(Reference tr); - - std::string toString(); // Added by Meng -}; - -// typedef RestoreConfigFR::RestoreFile RestoreFile; - -// Describes a file to load blocks from during restore. Ordered by version and then fileName to enable -// incrementally advancing through the map, saving the version and path of the next starting point. -// NOTE: The struct RestoreFileFR can NOT be named RestoreFile, because compiler will get confused in linking which -// RestoreFile should be used. If we use RestoreFile, compilation succeeds, but weird segmentation fault will happen. -struct RestoreFileFR { - Version version; - std::string fileName; - bool isRange; // false for log file - int64_t blockSize; - int64_t fileSize; - Version endVersion; // not meaningful for range files - Version beginVersion; // range file's beginVersion == endVersion; log file contains mutations in version - // [beginVersion, endVersion) - int64_t cursor; // The start block location to be restored. All blocks before cursor have been scheduled to load and - // restore - int fileIndex; // index of backup file. Must be identical per file. - int partitionId = -1; // Partition ID (Log Router Tag ID) for mutation files. - - Tuple pack() const { - return Tuple::makeTuple(version, - fileName, - (int)isRange, - fileSize, - blockSize, - endVersion, - beginVersion, - cursor, - fileIndex, - partitionId); - } - static RestoreFileFR unpack(Tuple const& t) { - RestoreFileFR r; - int i = 0; - r.version = t.getInt(i++); - r.fileName = t.getString(i++).toString(); - r.isRange = t.getInt(i++) != 0; - r.fileSize = t.getInt(i++); - r.blockSize = t.getInt(i++); - r.endVersion = t.getInt(i++); - r.beginVersion = t.getInt(i++); - r.cursor = t.getInt(i++); - r.fileIndex = t.getInt(i++); - r.partitionId = t.getInt(i++); - return r; - } - - bool operator<(const RestoreFileFR& rhs) const { - return std::tie(beginVersion, endVersion, fileIndex, fileName) < - std::tie(rhs.beginVersion, rhs.endVersion, rhs.fileIndex, rhs.fileName); - } - - RestoreFileFR() - : version(invalidVersion), isRange(false), blockSize(0), fileSize(0), endVersion(invalidVersion), - beginVersion(invalidVersion), cursor(0), fileIndex(0) {} - - explicit RestoreFileFR(const RangeFile& f) - : version(f.version), fileName(f.fileName), isRange(true), blockSize(f.blockSize), fileSize(f.fileSize), - endVersion(f.version), beginVersion(f.version), cursor(0), fileIndex(0) {} - - explicit RestoreFileFR(const LogFile& f) - : version(f.beginVersion), fileName(f.fileName), isRange(false), blockSize(f.blockSize), fileSize(f.fileSize), - endVersion(f.endVersion), beginVersion(f.beginVersion), cursor(0), fileIndex(0), partitionId(f.tagId) {} - - std::string toString() const { - std::stringstream ss; - ss << "version:" << version << " fileName:" << fileName << " isRange:" << isRange << " blockSize:" << blockSize - << " fileSize:" << fileSize << " endVersion:" << endVersion << " beginVersion:" << beginVersion - << " cursor:" << cursor << " fileIndex:" << fileIndex << " partitionId:" << partitionId; - return ss.str(); - } -}; - -namespace parallelFileRestore { -ACTOR Future>> decodeLogFileBlock(Reference file, - int64_t offset, - int len); -} // namespace parallelFileRestore - -// Send each request in requests via channel of the request's interface. -// Save replies to replies if replies != nullptr -// The UID in a request is the UID of the interface to handle the request -ACTOR template -Future getBatchReplies(RequestStream Interface::* channel, - std::map interfaces, - std::vector> requests, - std::vector* replies, - TaskPriority taskID = TaskPriority::Low, - bool trackRequestLatency = true) { - if (requests.empty()) { - return Void(); - } - - state double start = now(); - state int oustandingReplies = requests.size(); - loop { - try { - state std::vector> cmdReplies; - state std::vector> replyDurations; // double is end time of the request - for (auto& request : requests) { - RequestStream const* stream = &(interfaces[request.first].*channel); - cmdReplies.push_back(stream->getReply(request.second, taskID)); - replyDurations.emplace_back(request.first, request.second, 0); - } - - state std::vector> ongoingReplies; - state std::vector ongoingRepliesIndex; - loop { - ongoingReplies.clear(); - ongoingRepliesIndex.clear(); - for (int i = 0; i < cmdReplies.size(); ++i) { - if (SERVER_KNOBS->FASTRESTORE_REQBATCH_LOG) { - TraceEvent(SevInfo, "FastRestoreGetBatchReplies") - .suppressFor(1.0) - .detail("Requests", requests.size()) - .detail("OutstandingReplies", oustandingReplies) - .detail("ReplyIndex", i) - .detail("ReplyIsReady", cmdReplies[i].isReady()) - .detail("ReplyIsError", cmdReplies[i].isError()) - .detail("RequestNode", requests[i].first) - .detail("Request", requests[i].second.toString()); - } - if (!cmdReplies[i].isReady()) { // still wait for reply - ongoingReplies.push_back(cmdReplies[i]); - ongoingRepliesIndex.push_back(i); - } - } - ASSERT(ongoingReplies.size() == oustandingReplies); - if (ongoingReplies.empty()) { - break; - } else { - wait( - quorum(ongoingReplies, - std::min((int)SERVER_KNOBS->FASTRESTORE_REQBATCH_PARALLEL, (int)ongoingReplies.size()))); - } - // At least one reply is received; Calculate the reply duration - for (int j = 0; j < ongoingReplies.size(); ++j) { - if (ongoingReplies[j].isReady()) { - std::get<2>(replyDurations[ongoingRepliesIndex[j]]) = now(); - --oustandingReplies; - } else if (ongoingReplies[j].isError()) { - // When this happens, - // the above assertion ASSERT(ongoingReplies.size() == oustandingReplies) will fail - TraceEvent(SevError, "FastRestoreGetBatchRepliesReplyError") - .detail("OngoingReplyIndex", j) - .detail("FutureError", ongoingReplies[j].getError().what()); - } - } - } - ASSERT(oustandingReplies == 0); - if (trackRequestLatency && SERVER_KNOBS->FASTRESTORE_TRACK_REQUEST_LATENCY) { - // Calculate the latest end time for each interface - std::map maxEndTime; - UID bathcID = deterministicRandom()->randomUniqueID(); - for (int i = 0; i < replyDurations.size(); ++i) { - double endTime = std::get<2>(replyDurations[i]); - TraceEvent(SevInfo, "ProfileSendRequestBatchLatency", bathcID) - .detail("Node", std::get<0>(replyDurations[i])) - .detail("Request", std::get<1>(replyDurations[i]).toString()) - .detail("Duration", endTime - start); - auto item = maxEndTime.emplace(std::get<0>(replyDurations[i]), endTime); - item.first->second = std::max(item.first->second, endTime); - } - // Check the time gap between the earliest and latest node - double earliest = std::numeric_limits::max(); - double latest = std::numeric_limits::min(); - UID earliestNode, latestNode; - - for (auto& endTime : maxEndTime) { - if (earliest > endTime.second) { - earliest = endTime.second; - earliestNode = endTime.first; - } - if (latest < endTime.second) { - latest = endTime.second; - latestNode = endTime.first; - } - } - if (latest - earliest > SERVER_KNOBS->FASTRESTORE_STRAGGLER_THRESHOLD_SECONDS) { - TraceEvent(SevWarn, "ProfileSendRequestBatchLatencyFoundStraggler", bathcID) - .detail("SlowestNode", latestNode) - .detail("FatestNode", earliestNode) - .detail("EarliestEndtime", earliest) - .detail("LagTime", latest - earliest); - } - } - // Update replies - if (replies != nullptr) { - for (int i = 0; i < cmdReplies.size(); ++i) { - replies->emplace_back(cmdReplies[i].get()); - } - } - break; - } catch (Error& e) { - if (e.code() == error_code_operation_cancelled) - break; - // fprintf(stdout, "sendBatchRequests Error code:%d, error message:%s\n", e.code(), e.what()); - TraceEvent(SevWarn, "FastRestoreSendBatchRequests").error(e); - for (auto& request : requests) { - TraceEvent(SevWarn, "FastRestoreSendBatchRequests") - .detail("SendBatchRequests", requests.size()) - .detail("RequestID", request.first) - .detail("Request", request.second.toString()); - resetReply(request.second); - } - } - } - - return Void(); -} - -// Similar to getBatchReplies except that the caller does not expect to process the reply info. -ACTOR template -Future sendBatchRequests(RequestStream Interface::* channel, - std::map interfaces, - std::vector> requests, - TaskPriority taskID = TaskPriority::Low, - bool trackRequestLatency = true) { - wait(getBatchReplies(channel, interfaces, requests, nullptr, taskID, trackRequestLatency)); - - return Void(); -} - -#include "flow/unactorcompiler.h" -#endif // FDBSERVER_RESTORECOMMON_ACTOR_H diff --git a/fdbserver/include/fdbserver/RestoreController.actor.h b/fdbserver/include/fdbserver/RestoreController.actor.h deleted file mode 100644 index 7e1066e332f..00000000000 --- a/fdbserver/include/fdbserver/RestoreController.actor.h +++ /dev/null @@ -1,464 +0,0 @@ -/* - * RestoreController.actor.h - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This file declear RestoreController interface and actors - -#pragma once -#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RESTORE_CONTROLLER_G_H) -#define FDBSERVER_RESTORE_CONTROLLER_G_H -#include "fdbserver/RestoreController.actor.g.h" -#elif !defined(FDBSERVER_RESTORE_CONTROLLER_H) -#define FDBSERVER_RESTORE_CONTROLLER_H - -#include -#include "flow/Platform.h" -#include "fdbclient/FDBTypes.h" -#include "fdbclient/CommitTransaction.h" -#include "fdbrpc/fdbrpc.h" -#include "fdbrpc/Locality.h" -#include "fdbrpc/Stats.h" -#include "fdbserver/CoordinationInterface.h" -#include "fdbserver/RestoreUtil.h" -#include "fdbserver/RestoreRoleCommon.actor.h" -#include "fdbserver/RestoreWorker.actor.h" - -#include "flow/actorcompiler.h" // has to be last include - -struct VersionBatch { - Version beginVersion; // Inclusive - Version endVersion; // exclusive - std::set logFiles; - std::set rangeFiles; - double size; // size of data in range and log files - int batchIndex; // Never reset - - VersionBatch() : beginVersion(0), endVersion(0), size(0) {}; - - bool operator<(const VersionBatch& rhs) const { - return std::tie(batchIndex, beginVersion, endVersion, logFiles, rangeFiles, size) < - std::tie(rhs.batchIndex, rhs.beginVersion, rhs.endVersion, rhs.logFiles, rhs.rangeFiles, rhs.size); - } - - bool isEmpty() const { return logFiles.empty() && rangeFiles.empty(); } - void reset() { - beginVersion = 0; - endVersion = 0; - logFiles.clear(); - rangeFiles.clear(); - size = 0; - } - - // RestoreAsset and VersionBatch both use endVersion as exclusive in version range - bool isInVersionRange(Version version) const { return version >= beginVersion && version < endVersion; } -}; - -struct ControllerBatchData : public ReferenceCounted { - // rangeToApplier is in controller and loader node. Loader uses this to determine which applier a mutation should be - // sent. - // KeyRef is the inclusive lower bound of the key range the applier (UID) is responsible for - std::map rangeToApplier; - Optional> applyToDB; - - IndexedSet samples; // sample of range and log files - double samplesSize; // sum of the metric of all samples - std::set sampleMsgs; // deduplicate sample messages - - ControllerBatchData() = default; - ~ControllerBatchData() = default; - - // Return true if pass the sanity check - bool sanityCheckApplierKeyRange() { - bool ret = true; - // An applier should only appear once in rangeToApplier - std::map applierToRange; - for (auto& applier : rangeToApplier) { - if (applierToRange.find(applier.second) == applierToRange.end()) { - applierToRange[applier.second] = applier.first; - } else { - TraceEvent(SevError, "FastRestoreController") - .detail("SanityCheckApplierKeyRange", applierToRange.size()) - .detail("ApplierID", applier.second) - .detail("Key1", applierToRange[applier.second]) - .detail("Key2", applier.first); - ret = false; - } - } - return ret; - } - - void logApplierKeyRange(int batchIndex) { - TraceEvent("FastRestoreLogApplierKeyRange") - .detail("BatchIndex", batchIndex) - .detail("ApplierKeyRangeNum", rangeToApplier.size()); - for (auto& applier : rangeToApplier) { - TraceEvent("FastRestoreLogApplierKeyRange") - .detail("BatchIndex", batchIndex) - .detail("KeyRangeLowerBound", applier.first) - .detail("Applier", applier.second); - } - } -}; - -enum class RestoreAssetStatus { Loading, Loaded }; - -enum class RestoreSendStatus { SendingLogs, SendedLogs, SendingRanges, SendedRanges }; - -enum class RestoreApplyStatus { Applying, Applied }; - -// Track restore progress of each RestoreAsset (RA) and -// Use status to sanity check restore property, e.g., each RA should be processed exactly once. -struct ControllerBatchStatus : public ReferenceCounted { - std::map raStatus; - std::map loadStatus; - std::map applyStatus; - - void addref() { return ReferenceCounted::addref(); } - void delref() { return ReferenceCounted::delref(); } - - ControllerBatchStatus() = default; - ~ControllerBatchStatus() = default; -}; - -struct RestoreControllerData : RestoreRoleData, public ReferenceCounted { - std::map versionBatches; // key is the beginVersion of the version batch - - Reference bc; // Backup container is used to read backup files - Key bcUrl; // The url used to get the bc - - std::map> batch; - std::map> batchStatus; - - AsyncVar runningVersionBatches; // Currently running version batches - - std::map rolesHeartBeatTime; // Key: role id; Value: most recent time controller receives heart beat - - // addActor: add to actorCollection so that when an actor has error, the ActorCollection can catch the error. - // addActor is used to create the actorCollection when the RestoreController is created - PromiseStream> addActor; - - void addref() { return ReferenceCounted::addref(); } - void delref() { return ReferenceCounted::delref(); } - - RestoreControllerData(UID interfId) { - role = RestoreRole::Controller; - nodeID = interfId; - runningVersionBatches.set(0); - } - - ~RestoreControllerData() override = default; - - int getVersionBatchState(int batchIndex) const final { return RoleVersionBatchState::INVALID; } - void setVersionBatchState(int batchIndex, int vbState) final {} - - void initVersionBatch(int batchIndex) override { - TraceEvent("FastRestoreControllerInitVersionBatch", id()).detail("VersionBatchIndex", batchIndex); - } - - // Reset controller data at the beginning of each restore request - void resetPerRestoreRequest() override { - TraceEvent("FastRestoreControllerReset").detail("OldVersionBatches", versionBatches.size()); - versionBatches.clear(); - batch.clear(); - batchStatus.clear(); - finishedBatch = NotifiedVersion(0); - versionBatchId = NotifiedVersion(0); - ASSERT(runningVersionBatches.get() == 0); - } - - std::string describeNode() const override { - std::stringstream ss; - ss << "Controller"; - return ss.str(); - } - - void dumpVersionBatches(const std::map& versionBatches) const { - int i = 1; - double rangeFiles = 0; - double rangeSize = 0; - double logFiles = 0; - double logSize = 0; - for (auto& vb : versionBatches) { - TraceEvent("FastRestoreVersionBatches") - .detail("BatchIndex", vb.second.batchIndex) - .detail("ExpectedBatchIndex", i) - .detail("BeginVersion", vb.second.beginVersion) - .detail("EndVersion", vb.second.endVersion) - .detail("Size", vb.second.size); - for (auto& f : vb.second.rangeFiles) { - bool invalidVersion = (f.beginVersion != f.endVersion) || (f.beginVersion >= vb.second.endVersion || - f.beginVersion < vb.second.beginVersion); - TraceEvent(invalidVersion ? SevError : SevInfo, "FastRestoreVersionBatches") - .detail("BatchIndex", i) - .detail("RangeFile", f.toString()); - rangeSize += f.fileSize; - rangeFiles++; - } - for (auto& f : vb.second.logFiles) { - bool outOfRange = (f.beginVersion >= vb.second.endVersion || f.endVersion <= vb.second.beginVersion); - TraceEvent(outOfRange ? SevError : SevInfo, "FastRestoreVersionBatches") - .detail("BatchIndex", i) - .detail("LogFile", f.toString()); - logSize += f.fileSize; - logFiles++; - } - ++i; - } - - TraceEvent("FastRestoreVersionBatchesSummary") - .detail("VersionBatches", versionBatches.size()) - .detail("LogFiles", logFiles) - .detail("RangeFiles", rangeFiles) - .detail("LogBytes", logSize) - .detail("RangeBytes", rangeSize); - } - - // Input: Get the size of data in backup files in version range [prevVersion, nextVersion) - // Return: param1: the size of data at nextVersion, param2: the minimum range file index whose version > - // nextVersion, param3: log files with data in [prevVersion, nextVersion) - std::tuple> getVersionSize(Version prevVersion, - Version nextVersion, - const std::vector& rangeFiles, - int rangeIdx, - const std::vector& logFiles) { - double size = 0; - TraceEvent(SevVerbose, "FastRestoreGetVersionSize") - .detail("PreviousVersion", prevVersion) - .detail("NextVersion", nextVersion) - .detail("RangeFiles", rangeFiles.size()) - .detail("RangeIndex", rangeIdx) - .detail("LogFiles", logFiles.size()); - ASSERT(prevVersion <= nextVersion); - while (rangeIdx < rangeFiles.size()) { - TraceEvent(SevVerbose, "FastRestoreGetVersionSize").detail("RangeFile", rangeFiles[rangeIdx].toString()); - if (rangeFiles[rangeIdx].version < nextVersion) { - ASSERT(rangeFiles[rangeIdx].version >= prevVersion); - size += rangeFiles[rangeIdx].fileSize; - } else { - break; - } - ++rangeIdx; - } - std::vector retLogs; - // Scan all logFiles every time to avoid assumption on log files' version ranges. - // For example, we do not assume each version range only exists in one log file - for (const auto& file : logFiles) { - Version begin = std::max(prevVersion, file.beginVersion); - Version end = std::min(nextVersion, file.endVersion); - if (begin < end) { // logIdx file overlap in [prevVersion, nextVersion) - double ratio = (end - begin) * 1.0 / (file.endVersion - file.beginVersion); - size += file.fileSize * ratio; - retLogs.push_back(file); - } - } - return std::make_tuple(size, rangeIdx, retLogs); - } - - // Split backup files into version batches, each of which has similar data size - // Input: sorted range files, sorted log files; - // Output: a set of version batches whose size is less than SERVER_KNOBS->FASTRESTORE_VERSIONBATCH_MAX_BYTES - // and each mutation in backup files is included in the version batches exactly once. - // Assumption 1: input files has no empty files; - // Assumption 2: range files at one version <= FASTRESTORE_VERSIONBATCH_MAX_BYTES. - // Note: We do not allow a versionBatch size larger than the FASTRESTORE_VERSIONBATCH_MAX_BYTES because the range - // file size at a version depends on the number of backupAgents and its upper bound is hard to get. - void buildVersionBatches(const std::vector& rangeFiles, - const std::vector& logFiles, - std::map* versionBatches, - Version targetVersion) { - bool rewriteNextVersion = false; - int rangeIdx = 0; - int logIdx = 0; // Ensure each log file is included in version batch - Version prevEndVersion = 0; - Version nextVersion = 0; // Used to calculate the batch's endVersion - VersionBatch vb; - Version maxVBVersion = 0; - bool lastLogFile = false; - vb.beginVersion = 0; // Version batch range [beginVersion, endVersion) - vb.batchIndex = 1; - - while (rangeIdx < rangeFiles.size() || logIdx < logFiles.size()) { - if (!rewriteNextVersion) { - if (rangeIdx < rangeFiles.size() && logIdx < logFiles.size()) { - // nextVersion as endVersion is exclusive in the version range - nextVersion = std::max(rangeFiles[rangeIdx].version + 1, nextVersion); - } else if (rangeIdx < rangeFiles.size()) { // i.e., logIdx >= logFiles.size() - nextVersion = rangeFiles[rangeIdx].version + 1; - } else if (logIdx < logFiles.size()) { - while (logIdx < logFiles.size() && logFiles[logIdx].endVersion <= nextVersion) { - logIdx++; - } - if (logIdx < logFiles.size()) { - nextVersion = logFiles[logIdx].endVersion; - } else { - TraceEvent(SevFRDebugInfo, "FastRestoreBuildVersionBatch") - .detail("FinishAllLogFiles", logIdx) - .detail("CurBatchIndex", vb.batchIndex) - .detail("CurBatchSize", vb.size); - if (prevEndVersion < nextVersion) { - // Ensure the last log file is included in version batch - lastLogFile = true; - } else { - break; // Finished all log files - } - } - } else { - // TODO: Check why this may happen?! - TraceEvent(SevError, "FastRestoreBuildVersionBatch") - .detail("RangeIndex", rangeIdx) - .detail("RangeFiles", rangeFiles.size()) - .detail("LogIndex", logIdx) - .detail("LogFiles", logFiles.size()); - } - } else { - rewriteNextVersion = false; - } - - double nextVersionSize; - int nextRangeIdx; - std::vector curLogFiles; - std::tie(nextVersionSize, nextRangeIdx, curLogFiles) = - getVersionSize(prevEndVersion, nextVersion, rangeFiles, rangeIdx, logFiles); - - TraceEvent(SevFRDebugInfo, "FastRestoreBuildVersionBatch") - .detail("BatchIndex", vb.batchIndex) - .detail("VersionBatchBeginVersion", vb.beginVersion) - .detail("PreviousEndVersion", prevEndVersion) - .detail("NextVersion", nextVersion) - .detail("TargetVersion", targetVersion) - .detail("RangeIndex", rangeIdx) - .detail("RangeFiles", rangeFiles.size()) - .detail("LogIndex", logIdx) - .detail("LogFiles", logFiles.size()) - .detail("VersionBatchSizeThreshold", SERVER_KNOBS->FASTRESTORE_VERSIONBATCH_MAX_BYTES) - .detail("CurrentBatchSize", vb.size) - .detail("NextVersionIntervalSize", nextVersionSize) - .detail("NextRangeIndex", nextRangeIdx) - .detail("UsedLogFiles", curLogFiles.size()) - .detail("VersionBatchCurRangeFiles", vb.rangeFiles.size()) - .detail("VersionBatchCurLogFiles", vb.logFiles.size()) - .detail("LastLogFile", lastLogFile); - - ASSERT(prevEndVersion < nextVersion); // Ensure progress - if (vb.size + nextVersionSize <= SERVER_KNOBS->FASTRESTORE_VERSIONBATCH_MAX_BYTES || - (vb.size < 1 && prevEndVersion + 1 == nextVersion) || lastLogFile) { - // In case the batch size at a single version > FASTRESTORE_VERSIONBATCH_MAX_BYTES, - // the version batch should include the single version to avoid false positive in simulation. - if (vb.size + nextVersionSize > SERVER_KNOBS->FASTRESTORE_VERSIONBATCH_MAX_BYTES) { - TraceEvent(g_network->isSimulated() ? SevWarnAlways : SevError, "FastRestoreBuildVersionBatch") - .detail("NextVersion", nextVersion) - .detail("PreviousEndVersion", prevEndVersion) - .detail("NextVersionIntervalSize", nextVersionSize) - .detail("VersionBatchSizeThreshold", SERVER_KNOBS->FASTRESTORE_VERSIONBATCH_MAX_BYTES) - .detail("SuggestedMinimumVersionBatchSizeThreshold", nextVersionSize * 2); - } - // nextVersion should be included in this batch - vb.size += nextVersionSize; - while (rangeIdx < nextRangeIdx && rangeIdx < rangeFiles.size()) { - ASSERT(rangeFiles[rangeIdx].fileSize > 0); - vb.rangeFiles.insert(rangeFiles[rangeIdx]); - ++rangeIdx; - } - - for (auto& log : curLogFiles) { - ASSERT(log.beginVersion < nextVersion); - ASSERT(log.endVersion > prevEndVersion); - ASSERT(log.fileSize > 0); - vb.logFiles.insert(log); - } - - vb.endVersion = std::min(nextVersion, targetVersion + 1); - maxVBVersion = std::max(maxVBVersion, vb.endVersion); - prevEndVersion = vb.endVersion; - } else { - if (vb.size < 1) { - // [vb.endVersion, nextVersion) > SERVER_KNOBS->FASTRESTORE_VERSIONBATCH_MAX_BYTES. We should split - // the version range - if (prevEndVersion >= nextVersion) { - // If range files at one version > FASTRESTORE_VERSIONBATCH_MAX_BYTES, DBA should increase - // FASTRESTORE_VERSIONBATCH_MAX_BYTES to some value larger than nextVersion - TraceEvent(SevError, "FastRestoreBuildVersionBatch") - .detail("NextVersion", nextVersion) - .detail("PreviousEndVersion", prevEndVersion) - .detail("NextVersionIntervalSize", nextVersionSize) - .detail("VersionBatchSizeThreshold", SERVER_KNOBS->FASTRESTORE_VERSIONBATCH_MAX_BYTES) - .detail("SuggestedMinimumVersionBatchSizeThreshold", nextVersionSize * 2); - // Exit restore early if it won't succeed - flushAndExit(FDB_EXIT_ERROR); - } - ASSERT(prevEndVersion < nextVersion); // Ensure progress - nextVersion = (prevEndVersion + nextVersion) / 2; - rewriteNextVersion = true; - TraceEvent(SevFRDebugInfo, "FastRestoreBuildVersionBatch") - .detail("NextVersionIntervalSize", nextVersionSize); // Duplicate Trace - continue; - } - // Finalize the current version batch - versionBatches->emplace(vb.beginVersion, vb); // copy vb to versionBatch - TraceEvent(SevFRDebugInfo, "FastRestoreBuildVersionBatch") - .detail("FinishBatchIndex", vb.batchIndex) - .detail("VersionBatchBeginVersion", vb.beginVersion) - .detail("VersionBatchEndVersion", vb.endVersion) - .detail("VersionBatchLogFiles", vb.logFiles.size()) - .detail("VersionBatchRangeFiles", vb.rangeFiles.size()) - .detail("VersionBatchSize", vb.size) - .detail("RangeIndex", rangeIdx) - .detail("LogIndex", logIdx) - .detail("NewVersionBatchBeginVersion", prevEndVersion) - .detail("RewriteNextVersion", rewriteNextVersion); - - // start finding the next version batch - vb.reset(); - vb.size = 0; - vb.beginVersion = prevEndVersion; - vb.batchIndex++; - } - } - // The last wip version batch has some files - if (vb.size > 0) { - vb.endVersion = std::min(nextVersion, targetVersion + 1); - maxVBVersion = std::max(maxVBVersion, vb.endVersion); - versionBatches->emplace(vb.beginVersion, vb); - } - // Invariant: The last vb endverion should be no smaller than targetVersion - if (maxVBVersion < targetVersion) { - // Q: Is the restorable version always less than the maximum version from all backup filenames? - // A: This is true for the raw backup files returned by backup container before we remove the empty files. - TraceEvent(SevWarnAlways, "FastRestoreBuildVersionBatch") - .detail("TargetVersion", targetVersion) - .detail("MaxVersionBatchVersion", maxVBVersion); - } - } - - void initBackupContainer(Key url, Optional proxy) { - if (bcUrl == url && bc.isValid()) { - return; - } - TraceEvent("FastRestoreControllerInitBackupContainer") - .detail("URL", url) - .detail("Proxy", proxy.present() ? proxy.get() : ""); - bcUrl = url; - bc = IBackupContainer::openContainer(url.toString(), proxy, {}); - } -}; - -ACTOR Future startRestoreController(Reference controllerWorker, Database cx); - -#include "flow/unactorcompiler.h" -#endif diff --git a/fdbserver/include/fdbserver/RestoreLoader.actor.h b/fdbserver/include/fdbserver/RestoreLoader.actor.h deleted file mode 100644 index f37023a75de..00000000000 --- a/fdbserver/include/fdbserver/RestoreLoader.actor.h +++ /dev/null @@ -1,244 +0,0 @@ -/* - * RestoreLoader.h - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This file declares the actors used by the RestoreLoader role - -#pragma once -#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RESTORE_LOADER_G_H) -#define FDBSERVER_RESTORE_LOADER_G_H -#include "fdbserver/RestoreLoader.actor.g.h" -#elif !defined(FDBSERVER_RESTORE_LOADER_H) -#define FDBSERVER_RESTORE_LOADER_H - -#include -#include "fdbclient/FDBTypes.h" -#include "fdbclient/CommitTransaction.h" -#include "fdbrpc/fdbrpc.h" -#include "fdbrpc/Stats.h" -#include "fdbserver/CoordinationInterface.h" -#include "fdbrpc/Locality.h" -#include "fdbserver/RestoreUtil.h" -#include "fdbserver/RestoreCommon.actor.h" -#include "fdbserver/RestoreRoleCommon.actor.h" -#include "fdbserver/RestoreWorkerInterface.actor.h" -#include "fdbclient/BackupContainer.h" - -#include "flow/actorcompiler.h" // has to be last include - -class LoaderVersionBatchState : RoleVersionBatchState { -public: - static const int NOT_INIT = 0; - static const int INIT = 1; - static const int LOAD_FILE = 2; - static const int SEND_MUTATIONS = 3; - static const int INVALID = 4; - - explicit LoaderVersionBatchState(int newState) { vbState = newState; } - - ~LoaderVersionBatchState() override = default; - - void operator=(int newState) override { vbState = newState; } - - int get() const override { return vbState; } -}; - -struct LoaderBatchData : public ReferenceCounted { - std::map> processedFileParams; - std::map kvOpsPerLP; // Buffered kvOps for each loading param - - // rangeToApplier is in controller and loader. Loader uses this to determine which applier a mutation should be sent - // Key is the inclusive lower bound of the key range the applier (UID) is responsible for - std::map rangeToApplier; - - // Sampled mutations to be sent back to restore controller - std::map sampleMutations; - int numSampledMutations; // The total number of mutations received from sampled data. - - Future pollMetrics; - - LoaderVersionBatchState vbState; - - long loadFileReqs; - - // Status counters - struct Counters { - CounterCollection cc; - Counter loadedRangeBytes, loadedLogBytes, sentBytes; - Counter sampledRangeBytes, sampledLogBytes; - Counter oldLogMutations; - - Counters(LoaderBatchData* self, UID loaderInterfID, int batchIndex) - : cc("LoaderBatch", loaderInterfID.toString() + ":" + std::to_string(batchIndex)), - loadedRangeBytes("LoadedRangeBytes", cc), loadedLogBytes("LoadedLogBytes", cc), sentBytes("SentBytes", cc), - sampledRangeBytes("SampledRangeBytes", cc), sampledLogBytes("SampledLogBytes", cc), - oldLogMutations("OldLogMutations", cc) {} - } counters; - - explicit LoaderBatchData(UID nodeID, int batchIndex) - : vbState(LoaderVersionBatchState::NOT_INIT), loadFileReqs(0), counters(this, nodeID, batchIndex) { - pollMetrics = - counters.cc.traceCounters(format("FastRestoreLoaderMetrics%d", batchIndex), - nodeID, - SERVER_KNOBS->FASTRESTORE_ROLE_LOGGING_DELAY, - nodeID.toString() + "/RestoreLoaderMetrics/" + std::to_string(batchIndex)); - TraceEvent("FastRestoreLoaderMetricsCreated").detail("Node", nodeID); - } - - void reset() { - processedFileParams.clear(); - kvOpsPerLP.clear(); - sampleMutations.clear(); - numSampledMutations = 0; - rangeToApplier.clear(); - } -}; - -using LoaderCounters = LoaderBatchData::Counters; - -struct LoaderBatchStatus : public ReferenceCounted { - Optional> sendAllRanges; - Optional> sendAllLogs; - - void addref() { return ReferenceCounted::addref(); } - void delref() { return ReferenceCounted::delref(); } - - std::string toString() const { - std::stringstream ss; - ss << "sendAllRanges: " - << (!sendAllRanges.present() ? "invalid" : (sendAllRanges.get().isReady() ? "ready" : "notReady")) - << " sendAllLogs: " - << (!sendAllLogs.present() ? "invalid" : (sendAllLogs.get().isReady() ? "ready" : "notReady")); - return ss.str(); - } -}; - -// Each request for each loadingParam, so that scheduler can control which requests in which version batch to send first -struct RestoreLoaderSchedSendLoadParamRequest { - int batchIndex; - Promise toSched; - double start; - - explicit RestoreLoaderSchedSendLoadParamRequest(int batchIndex, Promise toSched, double start) - : batchIndex(batchIndex), toSched(toSched), start(start) {}; - RestoreLoaderSchedSendLoadParamRequest() = default; - - bool operator<(RestoreLoaderSchedSendLoadParamRequest const& rhs) const { - return batchIndex > rhs.batchIndex || (batchIndex == rhs.batchIndex && start > rhs.start); - } - - std::string toString() const { - std::stringstream ss; - ss << "RestoreLoaderSchedSendLoadParamRequest: " - << " batchIndex:" << batchIndex << " toSchedFutureIsReady:" << toSched.getFuture().isReady() - << " start:" << start; - return ss.str(); - } -}; - -struct RestoreLoaderData : RestoreRoleData, public ReferenceCounted { - // buffered data per version batch - std::map> batch; - std::map> status; - RestoreControllerInterface ci; - - KeyRangeMap rangeVersions; - - Reference bc; // Backup container is used to read backup files - Key bcUrl; // The url used to get the bc - - // Request scheduler - std::priority_queue loadingQueue; // request queue of loading files - std::priority_queue - sendingQueue; // request queue of sending mutations to appliers - std::priority_queue sendLoadParamQueue; - int finishedLoadingVB; // the max version batch index that finished loading file phase - int finishedSendingVB; // the max version batch index that finished sending mutations phase - int inflightSendingReqs; // number of sendingMutations requests released - int inflightLoadingReqs; // number of load backup file requests released - std::map inflightSendLoadParamReqs; // key: batchIndex, value: inflightSendLoadParamReqs - - Reference> hasPendingRequests; // are there pending requests for loader - - // addActor: add to actorCollection so that when an actor has error, the ActorCollection can catch the error. - // addActor is used to create the actorCollection when the RestoreController is created - PromiseStream> addActor; - - void addref() { return ReferenceCounted::addref(); } - void delref() { return ReferenceCounted::delref(); } - - explicit RestoreLoaderData(UID loaderInterfID, int assignedIndex, RestoreControllerInterface ci) - : ci(ci), finishedLoadingVB(0), finishedSendingVB(0), inflightSendingReqs(0), inflightLoadingReqs(0) { - nodeID = loaderInterfID; - nodeIndex = assignedIndex; - role = RestoreRole::Loader; - hasPendingRequests = makeReference>(false); - } - - ~RestoreLoaderData() override = default; - - std::string describeNode() const override { - std::stringstream ss; - ss << "[Role: Loader] [NodeID:" << nodeID.toString().c_str() << "] [NodeIndex:" << std::to_string(nodeIndex) - << "]"; - return ss.str(); - } - - int getVersionBatchState(int batchIndex) const final { - auto item = batch.find(batchIndex); - if (item == batch.end()) { // Batch has not been initialized when we blindly profile the state - return LoaderVersionBatchState::INVALID; - } else { - return item->second->vbState.get(); - } - } - void setVersionBatchState(int batchIndex, int vbState) final { - std::map>::iterator item = batch.find(batchIndex); - ASSERT(item != batch.end()); - item->second->vbState = vbState; - } - - void initVersionBatch(int batchIndex) override { - TraceEvent("FastRestoreLoaderInitVersionBatch", nodeID).detail("BatchIndex", batchIndex); - batch[batchIndex] = makeReference(nodeID, batchIndex); - status[batchIndex] = makeReference(); - } - - void resetPerRestoreRequest() override { - batch.clear(); - status.clear(); - finishedBatch = NotifiedVersion(0); - } - - void initBackupContainer(Key url, Optional proxy) { - if (bcUrl == url && bc.isValid()) { - return; - } - bcUrl = url; - bc = IBackupContainer::openContainer(url.toString(), proxy, {}); - } -}; - -ACTOR Future restoreLoaderCore(RestoreLoaderInterface loaderInterf, - int nodeIndex, - Database cx, - RestoreControllerInterface ci); - -#include "flow/unactorcompiler.h" -#endif diff --git a/fdbserver/include/fdbserver/RestoreRoleCommon.actor.h b/fdbserver/include/fdbserver/RestoreRoleCommon.actor.h deleted file mode 100644 index 8fe266d12f9..00000000000 --- a/fdbserver/include/fdbserver/RestoreRoleCommon.actor.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * RestoreRoleCommon.h - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This file declares common struct and functions shared by restore roles, i.e., -// RestoreController, RestoreLoader, RestoreApplier - -#pragma once -#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RestoreRoleCommon_G_H) -#define FDBSERVER_RestoreRoleCommon_G_H -#include "fdbserver/RestoreRoleCommon.actor.g.h" -#elif !defined(FDBSERVER_RestoreRoleCommon_H) -#define FDBSERVER_RestoreRoleCommon_H - -#include -#include "flow/SystemMonitor.h" -#include "fdbclient/FDBTypes.h" -#include "fdbclient/CommitTransaction.h" -#include "fdbclient/Notified.h" -#include "fdbrpc/fdbrpc.h" -#include "fdbrpc/Locality.h" -#include "fdbrpc/Stats.h" -#include "fdbserver/CoordinationInterface.h" -#include "fdbserver/RestoreWorkerInterface.actor.h" -#include "fdbserver/RestoreUtil.h" - -#include "flow/actorcompiler.h" // has to be last include - -struct RestoreRoleInterface; -struct RestoreLoaderInterface; -struct RestoreApplierInterface; - -struct RestoreRoleData; -struct RestoreControllerData; - -struct RestoreSimpleRequest; - -// Key is the (version, subsequence) of parsed backup mutations. -// Value MutationsVec is the vector of parsed backup mutations. -// For old mutation logs, the subsequence number is always 0. -// For partitioned mutation logs, each mutation has a unique LogMessageVersion. -// Note for partitioned logs, one LogMessageVersion can have multiple mutations, -// because a clear mutation may be split into several smaller clear mutations by -// backup workers. -using VersionedMutationsMap = std::map; - -ACTOR Future isSchedulable(Reference self, int actorBatchIndex, std::string name); -ACTOR Future handleHeartbeat(RestoreSimpleRequest req, UID id); -ACTOR Future handleInitVersionBatchRequest(RestoreVersionBatchRequest req, Reference self); -void handleFinishRestoreRequest(const RestoreFinishRequest& req, Reference self); - -class RoleVersionBatchState { -public: - static const int INVALID = -1; - - virtual int get() const { return vbState; } - - virtual void operator=(int newState) { vbState = newState; } - - explicit RoleVersionBatchState() : vbState(INVALID) {} - explicit RoleVersionBatchState(int newState) : vbState(newState) {} - - virtual ~RoleVersionBatchState() = default; - - int vbState; -}; - -struct RestoreRoleData : NonCopyable, public ReferenceCounted { -public: - RestoreRole role; - UID nodeID; - int nodeIndex; - - double cpuUsage; - double memory; - double residentMemory; - - AsyncTrigger checkMemory; - int delayedActors; // actors that are delayed to release because of low memory - - std::map loadersInterf; // UID: loaderInterf's id - std::map appliersInterf; // UID: applierInterf's id - Promise recruitedRoles; // sent when loaders and appliers are recruited - - NotifiedVersion versionBatchId; // The index of the version batch that has been initialized and put into pipeline - NotifiedVersion finishedBatch; // The highest batch index all appliers have applied mutations - - RestoreRoleData() - : role(RestoreRole::Invalid), cpuUsage(0.0), memory(0.0), residentMemory(0.0), delayedActors(0) {}; - - virtual ~RestoreRoleData() = default; - - UID id() const { return nodeID; } - - virtual void initVersionBatch(int batchIndex) = 0; - virtual void resetPerRestoreRequest() = 0; - virtual int getVersionBatchState(int batchIndex) const = 0; - virtual void setVersionBatchState(int batchIndex, int vbState) = 0; - - void clearInterfaces() { - loadersInterf.clear(); - appliersInterf.clear(); - } - - virtual std::string describeNode() const = 0; -}; - -void updateProcessStats(Reference self); -ACTOR Future updateProcessMetrics(Reference self); -ACTOR Future traceProcessMetrics(Reference self, std::string role); -ACTOR Future traceRoleVersionBatchProgress(Reference self, std::string role); - -#include "flow/unactorcompiler.h" -#endif diff --git a/fdbserver/include/fdbserver/RestoreUtil.h b/fdbserver/include/fdbserver/RestoreUtil.h deleted file mode 100644 index e09105a6507..00000000000 --- a/fdbserver/include/fdbserver/RestoreUtil.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * RestoreUtil.h - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This file defines the commonly used data structure and functions -// that are used by both RestoreWorker and RestoreRoles(Controller, Loader, and Applier) - -#ifndef FDBSERVER_RESTOREUTIL_H -#define FDBSERVER_RESTOREUTIL_H - -#pragma once - -#include "fdbclient/Tuple.h" -#include "fdbclient/CommitTransaction.h" -#include "fdbclient/RestoreInterface.h" -#include "flow/flow.h" -#include "fdbrpc/TimedRequest.h" -#include "fdbrpc/fdbrpc.h" -#include "flow/IAsyncFile.h" -#include "fdbrpc/Stats.h" -#include -#include - -#define SevFRMutationInfo SevVerbose -// #define SevFRMutationInfo SevInfo - -#define SevFRDebugInfo SevVerbose -// #define SevFRDebugInfo SevInfo - -struct VersionedMutationSerialized { - MutationRef mutation; - LogMessageVersion version; - - VersionedMutationSerialized() = default; - explicit VersionedMutationSerialized(MutationRef mutation, LogMessageVersion version) - : mutation(mutation), version(version) {} - explicit VersionedMutationSerialized(Arena& arena, const VersionedMutationSerialized& vm) - : mutation(arena, vm.mutation), version(vm.version) {} - - template - void serialize(Ar& ar) { - serializer(ar, mutation, version); - } -}; - -struct SampledMutation { - KeyRef key; - long size; - - explicit SampledMutation(KeyRef key, long size) : key(key), size(size) {} - explicit SampledMutation(Arena& arena, const SampledMutation& sm) : key(arena, sm.key), size(sm.size) {} - SampledMutation() = default; - - int totalSize() { return key.size() + sizeof(size); } - - template - void serialize(Ar& ar) { - serializer(ar, key, size); - } -}; - -using MutationsVec = Standalone>; -using LogMessageVersionVec = Standalone>; -using VersionedMutationsVec = Standalone>; -using SampledMutationsVec = Standalone>; - -enum class RestoreRole { Invalid = 0, Controller = 1, Loader, Applier }; -std::string getRoleStr(RestoreRole role); -extern const std::vector RestoreRoleStr; -extern int numRoles; - -std::string getHexString(StringRef input); - -bool debugFRMutation(const char* context, Version version, MutationRef const& mutation); - -struct RestoreSimpleRequest : TimedRequest { - constexpr static FileIdentifier file_identifier = 16448937; - - ReplyPromise reply; - - RestoreSimpleRequest() = default; - - template - void serialize(Ar& ar) { - serializer(ar, reply); - } - - std::string toString() const { - std::stringstream ss; - ss << "RestoreSimpleRequest"; - return ss.str(); - } -}; - -bool isRangeMutation(MutationRef m); - -#endif // FDBSERVER_RESTOREUTIL_H diff --git a/fdbserver/include/fdbserver/RestoreWorker.actor.h b/fdbserver/include/fdbserver/RestoreWorker.actor.h deleted file mode 100644 index b0b057bd0ac..00000000000 --- a/fdbserver/include/fdbserver/RestoreWorker.actor.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * RestoreWorker.actor.h - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once -#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RESTOREWORKER_G_H) -#define FDBSERVER_RESTOREWORKER_G_H -#include "fdbserver/RestoreWorker.actor.g.h" -#elif !defined(FDBSERVER_RESTOREWORKER_H) -#define FDBSERVER_RESTOREWORKER_H - -#include "fdbclient/Tuple.h" -#include "flow/flow.h" -#include "fdbrpc/fdbrpc.h" -#include "flow/IAsyncFile.h" -#include "fdbrpc/Stats.h" -#include -#include - -#include "fdbserver/RestoreUtil.h" -#include "fdbserver/RestoreCommon.actor.h" -#include "fdbserver/RestoreRoleCommon.actor.h" -#include "fdbserver/RestoreLoader.actor.h" -#include "fdbserver/RestoreApplier.actor.h" -#include "fdbserver/RestoreWorkerInterface.actor.h" - -#include "flow/actorcompiler.h" // must be last include - -// Each restore worker (a process) is assigned for a role. -// MAYBE Later: We will support multiple restore roles on a worker -struct RestoreWorkerData : NonCopyable, public ReferenceCounted { - UID workerID; - std::map - workerInterfaces; // UID is worker's node id, RestoreWorkerInterface is worker's communication workerInterface - - // Restore Roles - Optional controllerInterf; - Optional loaderInterf; - Optional applierInterf; - - UID id() const { return workerID; }; - - RestoreWorkerData() = default; - - ~RestoreWorkerData() { - TraceEvent("RestoreWorkerDataDeleted").detail("WorkerID", workerID.toString()); - printf("[Exit] Worker:%s RestoreWorkerData is deleted\n", workerID.toString().c_str()); - } - - std::string describeNode() { - std::stringstream ss; - ss << "RestoreWorker workerID:" << workerID.toString(); - return ss.str(); - } -}; - -#include "flow/unactorcompiler.h" -#endif // FDBSERVER_RESTOREWORKER_H diff --git a/fdbserver/include/fdbserver/RestoreWorkerInterface.actor.h b/fdbserver/include/fdbserver/RestoreWorkerInterface.actor.h deleted file mode 100644 index 2e63782ddce..00000000000 --- a/fdbserver/include/fdbserver/RestoreWorkerInterface.actor.h +++ /dev/null @@ -1,739 +0,0 @@ -/* - * RestoreWorkerInterface.actor.h - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This file declare and define the interface for RestoreWorker and restore roles -// which are RestoreController, RestoreLoader, and RestoreApplier - -#pragma once -#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_RESTORE_WORKER_INTERFACE_ACTOR_G_H) -#define FDBSERVER_RESTORE_WORKER_INTERFACE_ACTOR_G_H -#include "fdbserver/RestoreWorkerInterface.actor.g.h" -#elif !defined(FDBSERVER_RESTORE_WORKER_INTERFACE_ACTOR_H) -#define FDBSERVER_RESTORE_WORKER_INTERFACE_ACTOR_H - -#include -#include -#include "flow/flow.h" -#include "fdbrpc/fdbrpc.h" -#include "fdbrpc/Locality.h" -#include "fdbrpc/Stats.h" -#include "fdbclient/FDBTypes.h" -#include "fdbclient/CommitTransaction.h" -#include "fdbserver/CoordinationInterface.h" -#include "fdbserver/Knobs.h" -#include "fdbserver/RestoreUtil.h" -#include "flow/actorcompiler.h" // This must be the last #include. - -class RestoreConfigFR; - -struct RestoreCommonReply; -struct RestoreRecruitRoleRequest; -struct RestoreSysInfoRequest; -struct RestoreLoadFileRequest; -struct RestoreVersionBatchRequest; -struct RestoreSendMutationsToAppliersRequest; -struct RestoreSendVersionedMutationsRequest; -struct RestoreSysInfo; -struct RestoreApplierInterface; -struct RestoreFinishRequest; -struct RestoreSamplesRequest; -struct RestoreUpdateRateRequest; - -// RestoreSysInfo includes information each (type of) restore roles should know. -// At this moment, it only include appliers. We keep the name for future extension. -// TODO: If it turns out this struct only has appliers in the final version, we will rename it to a more specific name, -// e.g., AppliersMap -struct RestoreSysInfo { - constexpr static FileIdentifier file_identifier = 68098739; - std::map appliers; - - RestoreSysInfo() = default; - explicit RestoreSysInfo(const std::map appliers) : appliers(appliers) {} - - template - void serialize(Ar& ar) { - serializer(ar, appliers); - } -}; - -struct RestoreWorkerInterface { - constexpr static FileIdentifier file_identifier = 15715718; - UID interfID; - - RequestStream heartbeat; - RequestStream recruitRole; - RequestStream terminateWorker; - - bool operator==(RestoreWorkerInterface const& r) const { return id() == r.id(); } - bool operator!=(RestoreWorkerInterface const& r) const { return id() != r.id(); } - - UID id() const { return interfID; } // cmd.getEndpoint().token; - - NetworkAddress address() const { return recruitRole.getEndpoint().addresses.address; } - - void initEndpoints() { - heartbeat.getEndpoint(TaskPriority::LoadBalancedEndpoint); - recruitRole.getEndpoint(TaskPriority::LoadBalancedEndpoint); // Q: Why do we need this? - terminateWorker.getEndpoint(TaskPriority::LoadBalancedEndpoint); - - interfID = deterministicRandom()->randomUniqueID(); - } - - // To change this serialization, ProtocolVersion::RestoreWorkerInterfaceValue must be updated, and downgrades need - // to be considered - template - void serialize(Ar& ar) { - serializer(ar, interfID, heartbeat, recruitRole, terminateWorker); - } -}; - -struct RestoreRoleInterface { - constexpr static FileIdentifier file_identifier = 12199691; - UID nodeID; - RestoreRole role; - - RestoreRoleInterface() { role = RestoreRole::Invalid; } - - explicit RestoreRoleInterface(RestoreRoleInterface const& interf) : nodeID(interf.nodeID), role(interf.role) {}; - - UID id() const { return nodeID; } - - std::string toString() const { - std::stringstream ss; - ss << "Role:" << getRoleStr(role) << " interfID:" << nodeID.toString(); - return ss.str(); - } - - template - void serialize(Ar& ar) { - serializer(ar, nodeID, role); - } -}; - -struct RestoreLoaderInterface : RestoreRoleInterface { - constexpr static FileIdentifier file_identifier = 358571; - - RequestStream heartbeat; - RequestStream updateRestoreSysInfo; - RequestStream loadFile; - RequestStream sendMutations; - RequestStream initVersionBatch; - RequestStream finishVersionBatch; - RequestStream collectRestoreRoleInterfaces; - RequestStream finishRestore; - - bool operator==(RestoreWorkerInterface const& r) const { return id() == r.id(); } - bool operator!=(RestoreWorkerInterface const& r) const { return id() != r.id(); } - - RestoreLoaderInterface() { - role = RestoreRole::Loader; - nodeID = deterministicRandom()->randomUniqueID(); - } - - NetworkAddress address() const { return heartbeat.getEndpoint().addresses.address; } - - void initEndpoints() { - // Endpoint in a later restore phase has higher priority - heartbeat.getEndpoint(TaskPriority::LoadBalancedEndpoint); - updateRestoreSysInfo.getEndpoint(TaskPriority::LoadBalancedEndpoint); - initVersionBatch.getEndpoint(TaskPriority::LoadBalancedEndpoint); - loadFile.getEndpoint(TaskPriority::RestoreLoaderLoadFiles); - sendMutations.getEndpoint(TaskPriority::RestoreLoaderSendMutations); - finishVersionBatch.getEndpoint(TaskPriority::RestoreLoaderFinishVersionBatch); - collectRestoreRoleInterfaces.getEndpoint(TaskPriority::LoadBalancedEndpoint); - finishRestore.getEndpoint(TaskPriority::LoadBalancedEndpoint); - } - - template - void serialize(Ar& ar) { - serializer(ar, - *(RestoreRoleInterface*)this, - heartbeat, - updateRestoreSysInfo, - loadFile, - sendMutations, - initVersionBatch, - finishVersionBatch, - collectRestoreRoleInterfaces, - finishRestore); - } -}; - -struct RestoreApplierInterface : RestoreRoleInterface { - constexpr static FileIdentifier file_identifier = 3921400; - - RequestStream heartbeat; - RequestStream sendMutationVector; - RequestStream applyToDB; - RequestStream initVersionBatch; - RequestStream collectRestoreRoleInterfaces; - RequestStream finishRestore; - RequestStream updateRate; - - bool operator==(RestoreWorkerInterface const& r) const { return id() == r.id(); } - bool operator!=(RestoreWorkerInterface const& r) const { return id() != r.id(); } - - RestoreApplierInterface() { - role = RestoreRole::Applier; - nodeID = deterministicRandom()->randomUniqueID(); - } - - NetworkAddress address() const { return heartbeat.getEndpoint().addresses.address; } - - void initEndpoints() { - // Endpoint in a later restore phase has higher priority - heartbeat.getEndpoint(TaskPriority::LoadBalancedEndpoint); - sendMutationVector.getEndpoint(TaskPriority::RestoreApplierReceiveMutations); - applyToDB.getEndpoint(TaskPriority::RestoreApplierWriteDB); - initVersionBatch.getEndpoint(TaskPriority::LoadBalancedEndpoint); - collectRestoreRoleInterfaces.getEndpoint(TaskPriority::LoadBalancedEndpoint); - finishRestore.getEndpoint(TaskPriority::LoadBalancedEndpoint); - updateRate.getEndpoint(TaskPriority::LoadBalancedEndpoint); - } - - template - void serialize(Ar& ar) { - serializer(ar, - *(RestoreRoleInterface*)this, - heartbeat, - sendMutationVector, - applyToDB, - initVersionBatch, - collectRestoreRoleInterfaces, - finishRestore, - updateRate); - } - - std::string toString() const { return nodeID.toString(); } -}; - -struct RestoreControllerInterface : RestoreRoleInterface { - constexpr static FileIdentifier file_identifier = 11642024; - - RequestStream samples; - - bool operator==(RestoreWorkerInterface const& r) const { return id() == r.id(); } - bool operator!=(RestoreWorkerInterface const& r) const { return id() != r.id(); } - - RestoreControllerInterface() { - role = RestoreRole::Controller; - nodeID = deterministicRandom()->randomUniqueID(); - } - - NetworkAddress address() const { return samples.getEndpoint().addresses.address; } - - void initEndpoints() { samples.getEndpoint(TaskPriority::LoadBalancedEndpoint); } - - template - void serialize(Ar& ar) { - serializer(ar, *(RestoreRoleInterface*)this, samples); - } - - std::string toString() const { return nodeID.toString(); } -}; - -// RestoreAsset uniquely identifies the work unit done by restore roles; -// It is used to ensure exact-once processing on restore loader and applier; -// By combining all RestoreAssets across all version batches, restore should process all mutations in -// backup range and log files up to the target restore version. -struct RestoreAsset { - UID uid; - - Version beginVersion, endVersion; // Only use mutation in [begin, end) versions; - KeyRange range; // Only use mutations in range - - int fileIndex; - // Partition ID for mutation log files, which is also encoded in the filename of mutation logs. - int partitionId = -1; - std::string filename; - int64_t offset; - int64_t len; - - Key addPrefix; - Key removePrefix; - - int batchIndex; // for progress tracking and performance investigation - - RestoreAsset() = default; - - // Q: Can we simply use uid for == and use different comparison rule for less than operator. - // The ordering of RestoreAsset may change, will that affect correctness or performance? - bool operator==(const RestoreAsset& r) const { - return batchIndex == r.batchIndex && beginVersion == r.beginVersion && endVersion == r.endVersion && - range == r.range && fileIndex == r.fileIndex && partitionId == r.partitionId && filename == r.filename && - offset == r.offset && len == r.len && addPrefix == r.addPrefix && removePrefix == r.removePrefix; - } - bool operator!=(const RestoreAsset& r) const { return !(*this == r); } - bool operator<(const RestoreAsset& r) const { - return std::make_tuple(batchIndex, - fileIndex, - filename, - offset, - len, - beginVersion, - endVersion, - range.begin, - range.end, - addPrefix, - removePrefix) < std::make_tuple(r.batchIndex, - r.fileIndex, - r.filename, - r.offset, - r.len, - r.beginVersion, - r.endVersion, - r.range.begin, - r.range.end, - r.addPrefix, - r.removePrefix); - } - - template - void serialize(Ar& ar) { - serializer(ar, - uid, - beginVersion, - endVersion, - range, - filename, - fileIndex, - partitionId, - offset, - len, - addPrefix, - removePrefix, - batchIndex); - } - - std::string toString() const { - std::stringstream ss; - ss << "UID:" << uid.toString() << " begin:" << beginVersion << " end:" << endVersion - << " range:" << range.toString() << " filename:" << filename << " fileIndex:" << fileIndex - << " partitionId:" << partitionId << " offset:" << offset << " len:" << len - << " addPrefix:" << addPrefix.toString() << " removePrefix:" << removePrefix.toString() - << " BatchIndex:" << batchIndex; - return ss.str(); - } - - bool hasPrefix() const { return addPrefix.size() > 0 || removePrefix.size() > 0; } - - // RestoreAsset and VersionBatch both use endVersion as exclusive in version range - bool isInVersionRange(Version commitVersion) const { - return commitVersion >= beginVersion && commitVersion < endVersion; - } - - // Is mutation's begin and end keys are in RestoreAsset's range - bool isInKeyRange(MutationRef mutation) const { - if (hasPrefix()) { - Key begin = range.begin; // Avoid creating new keys if we do not have addPrefix or removePrefix - Key end = range.end; - begin = begin.removePrefix(removePrefix).withPrefix(addPrefix); - end = end.removePrefix(removePrefix).withPrefix(addPrefix); - if (isRangeMutation(mutation)) { - // Range mutation's right side is exclusive - return mutation.param1 >= begin && mutation.param2 <= end; - } else { - return mutation.param1 >= begin && mutation.param1 < end; - } - } else { - if (isRangeMutation(mutation)) { - // Range mutation's right side is exclusive - return mutation.param1 >= range.begin && mutation.param2 <= range.end; - } else { - return mutation.param1 >= range.begin && mutation.param1 < range.end; - } - } - } -}; - -struct LoadingParam { - constexpr static FileIdentifier file_identifier = 246621; - - bool isRangeFile; - Key url; - Optional proxy; - Optional rangeVersion; // range file's version - - int64_t blockSize; - RestoreAsset asset; - - LoadingParam() = default; - - // TODO: Compare all fields for loadingParam - bool operator==(const LoadingParam& r) const { return isRangeFile == r.isRangeFile && asset == r.asset; } - bool operator!=(const LoadingParam& r) const { return isRangeFile != r.isRangeFile || asset != r.asset; } - bool operator<(const LoadingParam& r) const { - return (isRangeFile < r.isRangeFile) || (isRangeFile == r.isRangeFile && asset < r.asset); - } - - bool isPartitionedLog() const { return !isRangeFile && asset.partitionId >= 0; } - - template - void serialize(Ar& ar) { - serializer(ar, isRangeFile, url, proxy, rangeVersion, blockSize, asset); - } - - std::string toString() const { - std::stringstream str; - str << "isRangeFile:" << isRangeFile << " url:" << url.toString() - << " proxy:" << (proxy.present() ? proxy.get() : "") - << " rangeVersion:" << (rangeVersion.present() ? rangeVersion.get() : -1) << " blockSize:" << blockSize - << " RestoreAsset:" << asset.toString(); - return str.str(); - } -}; - -struct RestoreRecruitRoleReply : TimedRequest { - constexpr static FileIdentifier file_identifier = 13532876; - - UID id; - RestoreRole role; - Optional loader; - Optional applier; - - RestoreRecruitRoleReply() = default; - explicit RestoreRecruitRoleReply(UID id, RestoreRole role, RestoreLoaderInterface const& loader) - : id(id), role(role), loader(loader) {} - explicit RestoreRecruitRoleReply(UID id, RestoreRole role, RestoreApplierInterface const& applier) - : id(id), role(role), applier(applier) {} - - template - void serialize(Ar& ar) { - serializer(ar, id, role, loader, applier); - } - - std::string toString() const { - std::stringstream ss; - ss << "roleInterf role:" << getRoleStr(role) << " replyID:" << id.toString(); - if (loader.present()) { - ss << "loader:" << loader.get().toString(); - } - if (applier.present()) { - ss << "applier:" << applier.get().toString(); - } - - return ss.str(); - } -}; - -struct RestoreRecruitRoleRequest : TimedRequest { - constexpr static FileIdentifier file_identifier = 3136280; - - RestoreControllerInterface ci; - RestoreRole role; - int nodeIndex; // Each role is a node - - ReplyPromise reply; - - RestoreRecruitRoleRequest() : role(RestoreRole::Invalid) {} - explicit RestoreRecruitRoleRequest(RestoreControllerInterface ci, RestoreRole role, int nodeIndex) - : ci(ci), role(role), nodeIndex(nodeIndex) {} - - template - void serialize(Ar& ar) { - serializer(ar, ci, role, nodeIndex, reply); - } - - std::string printable() const { - std::stringstream ss; - ss << "RestoreRecruitRoleRequest Role:" << getRoleStr(role) << " NodeIndex:" << nodeIndex - << " RestoreController:" << ci.id().toString(); - return ss.str(); - } - - std::string toString() const { return printable(); } -}; - -// Static info. across version batches -struct RestoreSysInfoRequest : TimedRequest { - constexpr static FileIdentifier file_identifier = 8851877; - - RestoreSysInfo sysInfo; - Standalone>> rangeVersions; - - ReplyPromise reply; - - RestoreSysInfoRequest() = default; - explicit RestoreSysInfoRequest(RestoreSysInfo sysInfo, - Standalone>> rangeVersions) - : sysInfo(sysInfo), rangeVersions(rangeVersions) {} - - template - void serialize(Ar& ar) { - serializer(ar, sysInfo, rangeVersions, reply); - } - - std::string toString() const { - std::stringstream ss; - ss << "RestoreSysInfoRequest " - << "rangeVersions.size:" << rangeVersions.size(); - return ss.str(); - } -}; - -struct RestoreSamplesRequest : TimedRequest { - constexpr static FileIdentifier file_identifier = 10751035; - UID id; // deduplicate data - int batchIndex; - SampledMutationsVec samples; // sampled mutations - - ReplyPromise reply; - - RestoreSamplesRequest() = default; - explicit RestoreSamplesRequest(UID id, int batchIndex, SampledMutationsVec samples) - : id(id), batchIndex(batchIndex), samples(samples) {} - - template - void serialize(Ar& ar) { - serializer(ar, id, batchIndex, samples, reply); - } - - std::string toString() const { - std::stringstream ss; - ss << "ID:" << id.toString() << " BatchIndex:" << batchIndex << " samples:" << samples.size(); - return ss.str(); - } -}; - -struct RestoreLoadFileReply : TimedRequest { - constexpr static FileIdentifier file_identifier = 523470; - - LoadingParam param; - bool isDuplicated; // true if loader thinks the request is a duplicated one - - RestoreLoadFileReply() = default; - explicit RestoreLoadFileReply(LoadingParam param, bool isDuplicated) : param(param), isDuplicated(isDuplicated) {} - - template - void serialize(Ar& ar) { - serializer(ar, param, isDuplicated); - } - - std::string toString() const { - std::stringstream ss; - ss << "LoadingParam:" << param.toString() << " isDuplicated:" << isDuplicated; - return ss.str(); - } -}; - -// Sample_Range_File and Assign_Loader_Range_File, Assign_Loader_Log_File -struct RestoreLoadFileRequest : TimedRequest { - constexpr static FileIdentifier file_identifier = 9780148; - - int batchIndex; - LoadingParam param; - - ReplyPromise reply; - - RestoreLoadFileRequest() = default; - explicit RestoreLoadFileRequest(int batchIndex, LoadingParam& param) : batchIndex(batchIndex), param(param) {}; - - bool operator<(RestoreLoadFileRequest const& rhs) const { return batchIndex > rhs.batchIndex; } - - template - void serialize(Ar& ar) { - serializer(ar, batchIndex, param, reply); - } - - std::string toString() const { - std::stringstream ss; - ss << "RestoreLoadFileRequest batchIndex:" << batchIndex << " param:" << param.toString(); - return ss.str(); - } -}; - -struct RestoreSendMutationsToAppliersRequest : TimedRequest { - constexpr static FileIdentifier file_identifier = 1718441; - - int batchIndex; // version batch index - std::map rangeToApplier; - bool useRangeFile; // Send mutations parsed from range file? - - ReplyPromise reply; - - RestoreSendMutationsToAppliersRequest() = default; - explicit RestoreSendMutationsToAppliersRequest(int batchIndex, std::map rangeToApplier, bool useRangeFile) - : batchIndex(batchIndex), rangeToApplier(rangeToApplier), useRangeFile(useRangeFile) {} - - bool operator<(RestoreSendMutationsToAppliersRequest const& rhs) const { return batchIndex > rhs.batchIndex; } - - template - void serialize(Ar& ar) { - serializer(ar, batchIndex, rangeToApplier, useRangeFile, reply); - } - - std::string toString() const { - std::stringstream ss; - ss << "RestoreSendMutationsToAppliersRequest batchIndex:" << batchIndex - << " keyToAppliers.size:" << rangeToApplier.size() << " useRangeFile:" << useRangeFile; - return ss.str(); - } -}; - -struct RestoreSendVersionedMutationsRequest : TimedRequest { - constexpr static FileIdentifier file_identifier = 2655701; - - int batchIndex; // version batch index - RestoreAsset asset; // Unique identifier for the current restore asset - - Version msgIndex; // Monitonically increasing index of mutation messages - bool isRangeFile; - VersionedMutationsVec versionedMutations; // Versioned mutations may be at different versions parsed by one loader - - ReplyPromise reply; - - RestoreSendVersionedMutationsRequest() = default; - explicit RestoreSendVersionedMutationsRequest(int batchIndex, - const RestoreAsset& asset, - Version msgIndex, - bool isRangeFile, - VersionedMutationsVec versionedMutations) - : batchIndex(batchIndex), asset(asset), msgIndex(msgIndex), isRangeFile(isRangeFile), - versionedMutations(versionedMutations) {} - - std::string toString() const { - std::stringstream ss; - ss << "VersionBatchIndex:" << batchIndex << " msgIndex:" << msgIndex << " isRangeFile:" << isRangeFile - << " versionedMutations.size:" << versionedMutations.size() << " RestoreAsset:" << asset.toString(); - return ss.str(); - } - - template - void serialize(Ar& ar) { - serializer(ar, batchIndex, asset, msgIndex, isRangeFile, versionedMutations, reply); - } -}; - -struct RestoreVersionBatchRequest : TimedRequest { - constexpr static FileIdentifier file_identifier = 13337457; - - int batchIndex; - - ReplyPromise reply; - - RestoreVersionBatchRequest() = default; - explicit RestoreVersionBatchRequest(int batchIndex) : batchIndex(batchIndex) {} - - template - void serialize(Ar& ar) { - serializer(ar, batchIndex, reply); - } - - std::string toString() const { - std::stringstream ss; - ss << "RestoreVersionBatchRequest batchIndex:" << batchIndex; - return ss.str(); - } -}; - -struct RestoreFinishRequest : TimedRequest { - constexpr static FileIdentifier file_identifier = 13018413; - - bool terminate; // role exits if terminate = true - - ReplyPromise reply; - - RestoreFinishRequest() = default; - explicit RestoreFinishRequest(bool terminate) : terminate(terminate) {} - - template - void serialize(Ar& ar) { - serializer(ar, terminate, reply); - } - - std::string toString() const { - std::stringstream ss; - ss << "RestoreFinishRequest terminate:" << terminate; - return ss.str(); - } -}; - -struct RestoreUpdateRateReply : TimedRequest { - constexpr static FileIdentifier file_identifier = 13018414; - - UID id; - double remainMB; // remaining data in MB to write to DB; - - RestoreUpdateRateReply() = default; - explicit RestoreUpdateRateReply(UID id, double remainMB) : id(id), remainMB(remainMB) {} - - std::string toString() const { - std::stringstream ss; - ss << "RestoreUpdateRateReply NodeID:" << id.toString() << " remainMB:" << remainMB; - return ss.str(); - } - - template - void serialize(Ar& ar) { - serializer(ar, id, remainMB); - } -}; - -struct RestoreUpdateRateRequest : TimedRequest { - constexpr static FileIdentifier file_identifier = 13018415; - - int batchIndex; - double writeMB; - - ReplyPromise reply; - - RestoreUpdateRateRequest() = default; - explicit RestoreUpdateRateRequest(int batchIndex, double writeMB) : batchIndex(batchIndex), writeMB(writeMB) {} - - template - void serialize(Ar& ar) { - serializer(ar, batchIndex, writeMB, reply); - } - - std::string toString() const { - std::stringstream ss; - ss << "RestoreUpdateRateRequest batchIndex:" << batchIndex << " writeMB:" << writeMB; - return ss.str(); - } -}; - -std::string getRoleStr(RestoreRole role); - -////--- Interface functions -ACTOR Future _restoreWorker(Database cx, LocalityData locality); -ACTOR Future restoreWorker(Reference ccr, - LocalityData locality, - std::string coordFolder); - -extern const KeyRef restoreLeaderKey; -extern const KeyRangeRef restoreWorkersKeys; -extern const KeyRef restoreStatusKey; // To be used when we measure fast restore performance -extern const KeyRangeRef restoreRequestKeys; -extern const KeyRangeRef restoreApplierKeys; -extern const KeyRef restoreApplierTxnValue; - -const Key restoreApplierKeyFor(UID const& applierID, int64_t batchIndex, Version version); -std::tuple decodeRestoreApplierKey(ValueRef const& key); -const Key restoreWorkerKeyFor(UID const& workerID); -const Value restoreWorkerInterfaceValue(RestoreWorkerInterface const& server); -RestoreWorkerInterface decodeRestoreWorkerInterfaceValue(ValueRef const& value); -Version decodeRestoreRequestDoneVersionValue(ValueRef const& value); -RestoreRequest decodeRestoreRequestValue(ValueRef const& value); -const Key restoreStatusKeyFor(StringRef statusType); -const Value restoreStatusValue(double val); -Value restoreRequestDoneVersionValue(Version readVersion); - -#include "flow/unactorcompiler.h" -#endif diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 4828921869b..2e887549313 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -14612,7 +14612,7 @@ ACTOR Future storageServerCore(StorageServer* self, StorageServerInterface state double lastLoopTopTime = now(); state Future dbInfoChange = Void(); state Future checkLastUpdate = Void(); - state Future updateProcessStatsTimer = delay(SERVER_KNOBS->FASTRESTORE_UPDATE_PROCESS_STATS_INTERVAL); + state Future updateProcessStatsTimer = delay(SERVER_KNOBS->STORAGE_UPDATE_PROCESS_STATS_INTERVAL); self->actors.add(updateStorage(self)); self->actors.add(waitFailureServer(ssi.waitFailure.getFuture())); @@ -14790,7 +14790,7 @@ ACTOR Future storageServerCore(StorageServer* self, StorageServerInterface } when(wait(updateProcessStatsTimer)) { updateProcessStats(self); - updateProcessStatsTimer = delay(SERVER_KNOBS->FASTRESTORE_UPDATE_PROCESS_STATS_INTERVAL); + updateProcessStatsTimer = delay(SERVER_KNOBS->STORAGE_UPDATE_PROCESS_STATS_INTERVAL); } when(GetHotShardsRequest req = waitNext(ssi.getHotShards.getFuture())) { struct ComparePair { diff --git a/fdbserver/workloads/AtomicRestore.actor.cpp b/fdbserver/workloads/AtomicRestore.actor.cpp index 4becbc1788a..49a2f6881d7 100644 --- a/fdbserver/workloads/AtomicRestore.actor.cpp +++ b/fdbserver/workloads/AtomicRestore.actor.cpp @@ -22,7 +22,6 @@ #include "fdbrpc/simulator.h" #include "fdbclient/BackupAgent.actor.h" #include "fdbserver/Knobs.h" -#include "fdbserver/RestoreCommon.actor.h" #include "fdbserver/workloads/workloads.actor.h" #include "fdbserver/workloads/BulkSetup.actor.h" @@ -32,7 +31,6 @@ struct AtomicRestoreWorkload : TestWorkload { static constexpr auto NAME = "AtomicRestore"; double startAfter, restoreAfter; - bool fastRestore; // true: use fast restore, false: use old style restore Standalone> backupRanges; UsePartitionedLog usePartitionedLogs{ false }; Key addPrefix, removePrefix; // Original key will be first applied removePrefix and then applied addPrefix @@ -42,13 +40,7 @@ struct AtomicRestoreWorkload : TestWorkload { startAfter = getOption(options, "startAfter"_sr, 10.0); restoreAfter = getOption(options, "restoreAfter"_sr, 20.0); - fastRestore = getOption(options, "fastRestore"_sr, false); - if (!fastRestore) { - addDefaultBackupRanges(backupRanges); - } else { - // Fast restore doesn't support multiple ranges yet - backupRanges.push_back_deep(backupRanges.arena(), normalKeys); - } + addDefaultBackupRanges(backupRanges); usePartitionedLogs.set( getOption(options, "usePartitionedLogs"_sr, deterministicRandom()->random01() < 0.5 ? true : false)); @@ -118,22 +110,16 @@ struct AtomicRestoreWorkload : TestWorkload { wait(delay(self->restoreAfter * deterministicRandom()->random01())); TraceEvent("AtomicRestore_RestoreStart").log(); - if (self->fastRestore) { // New fast parallel restore - TraceEvent(SevInfo, "AtomicParallelRestore").log(); - wait(backupAgent.atomicParallelRestore( - cx, BackupAgentBase::getDefaultTag(), self->backupRanges, self->addPrefix, self->removePrefix)); - } else { // Old style restore - loop { - try { - wait(success(backupAgent.atomicRestore( - cx, BackupAgentBase::getDefaultTag(), self->backupRanges, StringRef(), StringRef()))); - break; - } catch (Error& e) { - if (e.code() != error_code_backup_unneeded && e.code() != error_code_backup_duplicate) - throw; - } - wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); + loop { + try { + wait(success(backupAgent.atomicRestore( + cx, BackupAgentBase::getDefaultTag(), self->backupRanges, StringRef(), StringRef()))); + break; + } catch (Error& e) { + if (e.code() != error_code_backup_unneeded && e.code() != error_code_backup_duplicate) + throw; } + wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); } // SOMEDAY: Remove after backup agents can exist quiescently diff --git a/fdbserver/workloads/BackupAndParallelRestoreCorrectness.actor.cpp b/fdbserver/workloads/BackupAndParallelRestoreCorrectness.actor.cpp deleted file mode 100644 index b2b646e9b70..00000000000 --- a/fdbserver/workloads/BackupAndParallelRestoreCorrectness.actor.cpp +++ /dev/null @@ -1,795 +0,0 @@ -/* - * BackupAndParallelRestoreCorrectness.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fdbrpc/simulator.h" -#include "fdbclient/BackupAgent.actor.h" -#include "fdbclient/BackupContainer.h" -#include "fdbclient/ManagementAPI.actor.h" -#include "fdbserver/RestoreWorkerInterface.actor.h" -#include "fdbclient/RunRYWTransaction.actor.h" -#include "fdbserver/RestoreCommon.actor.h" -#include "fdbserver/workloads/workloads.actor.h" -#include "fdbserver/workloads/BulkSetup.actor.h" -#include "flow/actorcompiler.h" // This must be the last #include. - -#define TEST_ABORT_FASTRESTORE 0 - -// A workload which test the correctness of backup and restore process -struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload { - static constexpr auto NAME = "BackupAndParallelRestoreCorrectness"; - double backupAfter, restoreAfter, abortAndRestartAfter; - double backupStartAt, restoreStartAfterBackupFinished, stopDifferentialAfter; - Key backupTag; - int backupRangesCount, backupRangeLengthMax; - bool differentialBackup, performRestore, agentRequest; - Standalone> backupRanges; - static int backupAgentRequests; - LockDB locked{ false }; - bool allowPauses; - bool shareLogRange; - UsePartitionedLog usePartitionedLogs{ false }; - Key addPrefix, removePrefix; // Original key will be first applied removePrefix and then applied addPrefix - // CAVEAT: When removePrefix is used, we must ensure every key in backup have the removePrefix - - std::map, Standalone> dbKVs; - - // This workload is not compatible with RandomRangeLock workload because they will race in locked range - void disableFailureInjectionWorkloads(std::set& out) const override { - out.insert({ "RandomRangeLock" }); - } - - BackupAndParallelRestoreCorrectnessWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { - locked.set(sharedRandomNumber % 2); - backupAfter = getOption(options, "backupAfter"_sr, 10.0); - restoreAfter = getOption(options, "restoreAfter"_sr, 35.0); - performRestore = getOption(options, "performRestore"_sr, true); - backupTag = getOption(options, "backupTag"_sr, BackupAgentBase::getDefaultTag()); - backupRangesCount = getOption(options, "backupRangesCount"_sr, 5); - backupRangeLengthMax = getOption(options, "backupRangeLengthMax"_sr, 1); - abortAndRestartAfter = - getOption(options, - "abortAndRestartAfter"_sr, - deterministicRandom()->random01() < 0.5 - ? deterministicRandom()->random01() * (restoreAfter - backupAfter) + backupAfter - : 0.0); - differentialBackup = - getOption(options, "differentialBackup"_sr, deterministicRandom()->random01() < 0.5 ? true : false); - stopDifferentialAfter = - getOption(options, - "stopDifferentialAfter"_sr, - differentialBackup ? deterministicRandom()->random01() * - (restoreAfter - std::max(abortAndRestartAfter, backupAfter)) + - std::max(abortAndRestartAfter, backupAfter) - : 0.0); - agentRequest = getOption(options, "simBackupAgents"_sr, true); - allowPauses = getOption(options, "allowPauses"_sr, true); - shareLogRange = getOption(options, "shareLogRange"_sr, false); - usePartitionedLogs.set(getOption(options, "usePartitionedLogs"_sr, deterministicRandom()->coinflip())); - addPrefix = getOption(options, "addPrefix"_sr, ""_sr); - removePrefix = getOption(options, "removePrefix"_sr, ""_sr); - - KeyRef beginRange; - KeyRef endRange; - UID randomID = nondeterministicRandom()->randomUniqueID(); - - // Correctness is not clean for addPrefix feature yet. Uncomment below to enable the test - // Generate addPrefix - // if (addPrefix.size() == 0 && removePrefix.size() == 0) { - // if (deterministicRandom()->random01() < 0.5) { // Generate random addPrefix - // int len = deterministicRandom()->randomInt(1, 100); - // std::string randomStr = deterministicRandom()->randomAlphaNumeric(len); - // TraceEvent("BackupAndParallelRestoreCorrectness") - // .detail("GenerateAddPrefix", randomStr) - // .detail("Length", len) - // .detail("StrLen", randomStr.size()); - // addPrefix = Key(randomStr); - // } - // } - TraceEvent("BackupAndParallelRestoreCorrectness") - .detail("AddPrefix", addPrefix) - .detail("RemovePrefix", removePrefix); - ASSERT(addPrefix.size() == 0 && removePrefix.size() == 0); - // Do not support removePrefix right now because we must ensure all backup keys have the removePrefix - // otherwise, test will fail because fast restore will simply add the removePrefix to every key in the end. - ASSERT(removePrefix.size() == 0); - - if (shareLogRange) { - bool beforePrefix = sharedRandomNumber & 1; - if (beforePrefix) - backupRanges.push_back_deep(backupRanges.arena(), KeyRangeRef(normalKeys.begin, "\xfe\xff\xfe"_sr)); - else - backupRanges.push_back_deep(backupRanges.arena(), - KeyRangeRef(strinc("\x00\x00\x01"_sr), normalKeys.end)); - } else if (backupRangesCount <= 0) { - backupRanges.push_back_deep(backupRanges.arena(), normalKeys); - } else { - // Add backup ranges - std::set rangeEndpoints; - while (rangeEndpoints.size() < backupRangesCount * 2) { - rangeEndpoints.insert(deterministicRandom()->randomAlphaNumeric( - deterministicRandom()->randomInt(1, backupRangeLengthMax + 1))); - } - - // Create ranges from the keys, in order, to prevent overlaps - std::vector sortedEndpoints(rangeEndpoints.begin(), rangeEndpoints.end()); - sort(sortedEndpoints.begin(), sortedEndpoints.end()); - for (auto i = sortedEndpoints.begin(); i != sortedEndpoints.end(); ++i) { - const std::string& start = *i++; - backupRanges.push_back_deep(backupRanges.arena(), KeyRangeRef(start, *i)); - - // Track the added range - TraceEvent("BARW_BackupCorrectnessRange", randomID) - .detail("RangeBegin", (beginRange < endRange) ? printable(beginRange) : printable(endRange)) - .detail("RangeEnd", (beginRange < endRange) ? printable(endRange) : printable(beginRange)); - } - } - } - - Future setup(Database const& cx) override { return Void(); } - - Future start(Database const& cx) override { - if (clientId != 0) - return Void(); - - TraceEvent(SevInfo, "BARW_Param").detail("Locked", locked); - TraceEvent(SevInfo, "BARW_Param").detail("BackupAfter", backupAfter); - TraceEvent(SevInfo, "BARW_Param").detail("RestoreAfter", restoreAfter); - TraceEvent(SevInfo, "BARW_Param").detail("PerformRestore", performRestore); - TraceEvent(SevInfo, "BARW_Param").detail("BackupTag", printable(backupTag).c_str()); - TraceEvent(SevInfo, "BARW_Param").detail("BackupRangesCount", backupRangesCount); - TraceEvent(SevInfo, "BARW_Param").detail("BackupRangeLengthMax", backupRangeLengthMax); - TraceEvent(SevInfo, "BARW_Param").detail("AbortAndRestartAfter", abortAndRestartAfter); - TraceEvent(SevInfo, "BARW_Param").detail("DifferentialBackup", differentialBackup); - TraceEvent(SevInfo, "BARW_Param").detail("StopDifferentialAfter", stopDifferentialAfter); - TraceEvent(SevInfo, "BARW_Param").detail("AgentRequest", agentRequest); - - return _start(cx, this); - } - - bool hasPrefix() const { return addPrefix != ""_sr || removePrefix != ""_sr; } - - Future check(Database const& cx) override { return true; } - - void getMetrics(std::vector& m) override {} - - ACTOR static Future changePaused(Database cx, FileBackupAgent* backupAgent) { - loop { - wait(backupAgent->changePause(cx, true)); - wait(delay(30 * deterministicRandom()->random01())); - wait(backupAgent->changePause(cx, false)); - wait(delay(120 * deterministicRandom()->random01())); - } - } - - ACTOR static Future statusLoop(Database cx, std::string tag) { - state FileBackupAgent agent; - loop { - std::string status = wait(agent.getStatus(cx, ShowErrors::True, tag)); - puts(status.c_str()); - wait(delay(2.0)); - } - } - - ACTOR static Future doBackup(BackupAndParallelRestoreCorrectnessWorkload* self, - double startDelay, - FileBackupAgent* backupAgent, - Database cx, - Key tag, - Standalone> backupRanges, - double stopDifferentialDelay, - Promise submitted) { - state UID randomID = nondeterministicRandom()->randomUniqueID(); - state Future stopDifferentialFuture = delay(stopDifferentialDelay); - - wait(delay(startDelay)); - - if (startDelay || BUGGIFY) { - TraceEvent("BARW_DoBackupAbortBackup1", randomID) - .detail("Tag", printable(tag)) - .detail("StartDelay", startDelay); - - try { - wait(backupAgent->abortBackup(cx, tag.toString())); - } catch (Error& e) { - TraceEvent("BARW_DoBackupAbortBackupException", randomID).error(e).detail("Tag", printable(tag)); - if (e.code() != error_code_backup_unneeded) - throw; - } - } - - TraceEvent("BARW_DoBackupSubmitBackup", randomID) - .detail("Tag", printable(tag)) - .detail("StopWhenDone", stopDifferentialDelay ? "False" : "True"); - - state std::string backupContainer = "file://simfdb/backups/"; - state Future status = statusLoop(cx, tag.toString()); - try { - wait(backupAgent->submitBackup(cx, - StringRef(backupContainer), - {}, - deterministicRandom()->randomInt(0, 60), - deterministicRandom()->randomInt(0, 100), - tag.toString(), - backupRanges, - true, - StopWhenDone{ !stopDifferentialDelay }, - self->usePartitionedLogs)); - } catch (Error& e) { - TraceEvent("BARW_DoBackupSubmitBackupException", randomID).error(e).detail("Tag", printable(tag)); - if (e.code() != error_code_backup_unneeded && e.code() != error_code_backup_duplicate) - throw; - } - - submitted.send(Void()); - - // Stop the differential backup, if enabled - if (stopDifferentialDelay) { - CODE_PROBE(!stopDifferentialFuture.isReady(), - "Restore starts at specified time - stopDifferential not ready"); - wait(stopDifferentialFuture); - TraceEvent("BARW_DoBackupWaitToDiscontinue", randomID) - .detail("Tag", printable(tag)) - .detail("DifferentialAfter", stopDifferentialDelay); - - try { - if (BUGGIFY) { - state KeyBackedTag backupTag = makeBackupTag(tag.toString()); - TraceEvent("BARW_DoBackupWaitForRestorable", randomID).detail("Tag", backupTag.tagName); - // Wait until the backup is in a restorable state and get the status, URL, and UID atomically - state Reference lastBackupContainer; - state UID lastBackupUID; - state EBackupState resultWait = wait(backupAgent->waitBackup( - cx, backupTag.tagName, StopWhenDone::False, &lastBackupContainer, &lastBackupUID)); - - TraceEvent("BARW_DoBackupWaitForRestorable", randomID) - .detail("Tag", backupTag.tagName) - .detail("Result", BackupAgentBase::getStateText(resultWait)); - - state bool restorable = false; - if (lastBackupContainer) { - state Future fdesc = lastBackupContainer->describeBackup(); - wait(ready(fdesc)); - - if (!fdesc.isError()) { - state BackupDescription desc = fdesc.get(); - wait(desc.resolveVersionTimes(cx)); - printf("BackupDescription:\n%s\n", desc.toString().c_str()); - restorable = desc.maxRestorableVersion.present(); - } - } - - TraceEvent("BARW_LastBackupContainer", randomID) - .detail("BackupTag", printable(tag)) - .detail("LastBackupContainer", lastBackupContainer ? lastBackupContainer->getURL() : "") - .detail("LastBackupUID", lastBackupUID) - .detail("WaitStatus", BackupAgentBase::getStateText(resultWait)) - .detail("Restorable", restorable); - - // Do not check the backup, if aborted - if (resultWait == EBackupState::STATE_ABORTED) { - } - // Ensure that a backup container was found - else if (!lastBackupContainer) { - TraceEvent(SevError, "BARW_MissingBackupContainer", randomID) - .detail("LastBackupUID", lastBackupUID) - .detail("BackupTag", printable(tag)) - .detail("WaitStatus", resultWait); - printf("BackupCorrectnessMissingBackupContainer tag: %s status: %s\n", - printable(tag).c_str(), - BackupAgentBase::getStateText(resultWait)); - } - // Check that backup is restorable - else if (!restorable) { - TraceEvent(SevError, "BARW_NotRestorable", randomID) - .detail("LastBackupUID", lastBackupUID) - .detail("BackupTag", printable(tag)) - .detail("BackupFolder", lastBackupContainer->getURL()) - .detail("WaitStatus", BackupAgentBase::getStateText(resultWait)); - printf("BackupCorrectnessNotRestorable: tag: %s\n", printable(tag).c_str()); - } - - // Abort the backup, if not the first backup because the second backup may have aborted the backup - // by now - if (startDelay) { - TraceEvent("BARW_DoBackupAbortBackup2", randomID) - .detail("Tag", printable(tag)) - .detail("WaitStatus", BackupAgentBase::getStateText(resultWait)) - .detail("LastBackupContainer", lastBackupContainer ? lastBackupContainer->getURL() : "") - .detail("Restorable", restorable); - wait(backupAgent->abortBackup(cx, tag.toString())); - } else { - TraceEvent("BARW_DoBackupDiscontinueBackup", randomID) - .detail("Tag", printable(tag)) - .detail("DifferentialAfter", stopDifferentialDelay); - wait(backupAgent->discontinueBackup(cx, tag)); - } - } - - else { - TraceEvent("BARW_DoBackupDiscontinueBackup", randomID) - .detail("Tag", printable(tag)) - .detail("DifferentialAfter", stopDifferentialDelay); - wait(backupAgent->discontinueBackup(cx, tag)); - } - } catch (Error& e) { - TraceEvent("BARW_DoBackupDiscontinueBackupException", randomID).error(e).detail("Tag", printable(tag)); - if (e.code() != error_code_backup_unneeded && e.code() != error_code_backup_duplicate) - throw; - } - } - - // Wait for the backup to complete - TraceEvent("BARW_DoBackupWaitBackup", randomID).detail("Tag", printable(tag)); - state EBackupState statusValue = wait(backupAgent->waitBackup(cx, tag.toString(), StopWhenDone::True)); - - state std::string statusText; - - std::string _statusText = wait(backupAgent->getStatus(cx, ShowErrors::True, tag.toString())); - statusText = _statusText; - // Can we validate anything about status? - - TraceEvent("BARW_DoBackupComplete", randomID) - .detail("Tag", printable(tag)) - .detail("Status", statusText) - .detail("StatusValue", BackupAgentBase::getStateText(statusValue)); - - return Void(); - } - - // This actor attempts to restore the database without clearing the keyspace. - // TODO: Enable this function in correctness test - ACTOR static Future attemptDirtyRestore(BackupAndParallelRestoreCorrectnessWorkload* self, - Database cx, - FileBackupAgent* backupAgent, - Standalone lastBackupContainer, - UID randomID) { - state Transaction tr(cx); - state int rowCount = 0; - loop { - try { - RangeResult existingRows = wait(tr.getRange(normalKeys, 1)); - rowCount = existingRows.size(); - break; - } catch (Error& e) { - wait(tr.onError(e)); - } - } - - // Try doing a restore without clearing the keys - if (rowCount > 0) { - try { - // TODO: Change to my restore agent code - TraceEvent(SevError, "MXFastRestore").detail("RestoreFunction", "ShouldChangeToMyOwnRestoreLogic"); - wait(success(backupAgent->restore(cx, - cx, - self->backupTag, - KeyRef(lastBackupContainer), - {}, - WaitForComplete::True, - ::invalidVersion, - Verbose::True, - normalKeys, - Key(), - Key(), - self->locked))); - TraceEvent(SevError, "BARW_RestoreAllowedOverwrittingDatabase", randomID).log(); - ASSERT(false); - } catch (Error& e) { - if (e.code() != error_code_restore_destination_not_empty) { - throw; - } - } - } - - return Void(); - } - - ACTOR static Future _start(Database cx, BackupAndParallelRestoreCorrectnessWorkload* self) { - state FileBackupAgent backupAgent; - state Future extraBackup; - state UID randomID = nondeterministicRandom()->randomUniqueID(); - state int restoreIndex = 0; - state ReadYourWritesTransaction tr2(cx); - - TraceEvent("BARW_Arguments") - .detail("BackupTag", printable(self->backupTag)) - .detail("PerformRestore", self->performRestore) - .detail("BackupAfter", self->backupAfter) - .detail("RestoreAfter", self->restoreAfter) - .detail("AbortAndRestartAfter", self->abortAndRestartAfter) - .detail("DifferentialAfter", self->stopDifferentialAfter); - - if (self->allowPauses && BUGGIFY) { - state Future cp = changePaused(cx, &backupAgent); - } - - // Increment the backup agent requests - if (self->agentRequest) { - BackupAndParallelRestoreCorrectnessWorkload::backupAgentRequests++; - } - - try { - state Future startRestore = delay(self->restoreAfter); - - // backup - wait(delay(self->backupAfter)); - - TraceEvent("BARW_DoBackup1", randomID).detail("Tag", printable(self->backupTag)); - state Promise submitted; - state Future b = doBackup( - self, 0, &backupAgent, cx, self->backupTag, self->backupRanges, self->stopDifferentialAfter, submitted); - - if (self->abortAndRestartAfter) { - TraceEvent("BARW_DoBackup2", randomID) - .detail("Tag", printable(self->backupTag)) - .detail("AbortWait", self->abortAndRestartAfter); - wait(submitted.getFuture()); - b = b && doBackup(self, - self->abortAndRestartAfter, - &backupAgent, - cx, - self->backupTag, - self->backupRanges, - self->stopDifferentialAfter, - Promise()); - } - - TraceEvent("BARW_DoBackupWait", randomID) - .detail("BackupTag", printable(self->backupTag)) - .detail("AbortAndRestartAfter", self->abortAndRestartAfter); - try { - wait(b); - } catch (Error& e) { - if (e.code() != error_code_database_locked) - throw; - if (self->performRestore) - throw; - return Void(); - } - TraceEvent("BARW_DoBackupDone", randomID) - .detail("BackupTag", printable(self->backupTag)) - .detail("AbortAndRestartAfter", self->abortAndRestartAfter); - - state KeyBackedTag keyBackedTag = makeBackupTag(self->backupTag.toString()); - UidAndAbortedFlagT uidFlag = wait(keyBackedTag.getOrThrow(cx.getReference())); - state UID logUid = uidFlag.first; - state Key destUidValue = wait(BackupConfig(logUid).destUidValue().getD(cx.getReference())); - state Reference lastBackupContainer = - wait(BackupConfig(logUid).backupContainer().getD(cx.getReference())); - - // Occasionally start yet another backup that might still be running when we restore - if (!self->locked && BUGGIFY) { - TraceEvent("BARW_SubmitBackup2", randomID).detail("Tag", printable(self->backupTag)); - try { - // Note the "partitionedLog" must be false, because we change - // the configuration to disable backup workers before restore. - extraBackup = backupAgent.submitBackup(cx, - "file://simfdb/backups/"_sr, - {}, - deterministicRandom()->randomInt(0, 60), - deterministicRandom()->randomInt(0, 100), - self->backupTag.toString(), - self->backupRanges, - true, - StopWhenDone::True, - UsePartitionedLog::False); - } catch (Error& e) { - TraceEvent("BARW_SubmitBackup2Exception", randomID) - .error(e) - .detail("BackupTag", printable(self->backupTag)); - if (e.code() != error_code_backup_unneeded && e.code() != error_code_backup_duplicate) - throw; - } - } - - CODE_PROBE(!startRestore.isReady(), "Restore starts at specified time"); - wait(startRestore); - - if (lastBackupContainer && self->performRestore) { - if (deterministicRandom()->random01() < 0.5) { - printf("TODO: Check if restore can succeed if dirty restore is performed first\n"); - // TODO: To support restore even after we attempt dirty restore. Not implemented in the 1st version - // fast restore - // wait(attemptDirtyRestore(self, cx, &backupAgent, StringRef(lastBackupContainer->getURL()), - // randomID)); - } - - // We must ensure no backup workers are running, otherwise the clear DB - // below can be picked up by backup workers and applied during restore. - wait(success(ManagementAPI::changeConfig(cx.getReference(), "backup_worker_enabled:=0", true))); - - // Clear DB before restore - wait(runRYWTransaction(cx, [=](Reference tr) -> Future { - for (auto& kvrange : self->backupRanges) - tr->clear(kvrange); - return Void(); - })); - - // restore database - TraceEvent("BAFRW_Restore", randomID) - .detail("LastBackupContainer", lastBackupContainer->getURL()) - .detail("RestoreAfter", self->restoreAfter) - .detail("BackupTag", printable(self->backupTag)); - // start restoring - - auto container = - IBackupContainer::openContainer(lastBackupContainer->getURL(), lastBackupContainer->getProxy(), {}); - BackupDescription desc = wait(container->describeBackup()); - ASSERT(self->usePartitionedLogs == desc.partitioned); - ASSERT(desc.minRestorableVersion.present()); // We must have a valid backup now. - - state Version targetVersion = -1; - if (desc.maxRestorableVersion.present()) { - if (deterministicRandom()->random01() < 0.1) { - targetVersion = desc.minRestorableVersion.get(); - } else if (deterministicRandom()->random01() < 0.1) { - targetVersion = desc.maxRestorableVersion.get(); - } else if (deterministicRandom()->random01() < 0.5 && - desc.minRestorableVersion.get() < desc.contiguousLogEnd.get()) { - // The assertion may fail because minRestorableVersion may be decided by snapshot version. - // ASSERT_WE_THINK(desc.minRestorableVersion.get() <= desc.contiguousLogEnd.get()); - // This assertion can fail when contiguousLogEnd < maxRestorableVersion and - // the snapshot version > contiguousLogEnd. I.e., there is a gap between - // contiguousLogEnd and snapshot version. - // ASSERT_WE_THINK(desc.contiguousLogEnd.get() > desc.maxRestorableVersion.get()); - targetVersion = deterministicRandom()->randomInt64(desc.minRestorableVersion.get(), - desc.contiguousLogEnd.get()); - } - } - - TraceEvent("BAFRW_Restore", randomID) - .detail("LastBackupContainer", lastBackupContainer->getURL()) - .detail("MinRestorableVersion", desc.minRestorableVersion.get()) - .detail("MaxRestorableVersion", desc.maxRestorableVersion.get()) - .detail("ContiguousLogEnd", desc.contiguousLogEnd.get()) - .detail("TargetVersion", targetVersion); - - state std::vector> restores; - state std::vector> restoreTags; - - // Submit parallel restore requests - TraceEvent("BackupAndParallelRestoreWorkload") - .detail("PrepareRestores", self->backupRanges.size()) - .detail("AddPrefix", self->addPrefix) - .detail("RemovePrefix", self->removePrefix); - wait(backupAgent.submitParallelRestore(cx, - self->backupTag, - self->backupRanges, - KeyRef(lastBackupContainer->getURL()), - lastBackupContainer->getProxy(), - targetVersion, - self->locked, - randomID, - self->addPrefix, - self->removePrefix)); - TraceEvent("BackupAndParallelRestoreWorkload") - .detail("TriggerRestore", "Setting up restoreRequestTriggerKey"); - - // Sometimes kill and restart the restore - // In real cluster, aborting a restore needs: - // (1) kill restore cluster; (2) clear dest. DB restore system keyspace. - // TODO: Consider gracefully abort a restore and restart. - if (BUGGIFY && TEST_ABORT_FASTRESTORE) { - TraceEvent(SevError, "FastRestore").detail("Buggify", "NotImplementedYet"); - wait(delay(deterministicRandom()->randomInt(0, 10))); - for (restoreIndex = 0; restoreIndex < restores.size(); restoreIndex++) { - FileBackupAgent::ERestoreState rs = - wait(backupAgent.abortRestore(cx, restoreTags[restoreIndex])); - // The restore may have already completed, or the abort may have been done before the restore - // was even able to start. Only run a new restore if the previous one was actually aborted. - if (rs == FileBackupAgent::ERestoreState::ABORTED) { - wait(runRYWTransaction(cx, [=](Reference tr) -> Future { - tr->clear(self->backupRanges[restoreIndex]); - return Void(); - })); - // TODO: Not Implemented yet - // restores[restoreIndex] = backupAgent.restore(cx, restoreTags[restoreIndex], - // KeyRef(lastBackupContainer->getURL()), true, -1, true, self->backupRanges[restoreIndex], - // Key(), Key(), self->locked); - } - } - } - - // Wait for parallel restore to finish before we can proceed - TraceEvent("FastRestoreWorkload").detail("WaitForRestoreToFinish", randomID); - // Do not unlock DB when restore finish because we need to transformDatabaseContents - wait(backupAgent.parallelRestoreFinish(cx, randomID, UnlockDB{ !self->hasPrefix() })); - TraceEvent("FastRestoreWorkload").detail("RestoreFinished", randomID); - - for (auto& restore : restores) { - ASSERT(!restore.isError()); - } - - // If addPrefix or removePrefix set, we want to transform the effect by copying data - if (self->hasPrefix()) { - wait(transformRestoredDatabase(cx, self->backupRanges, self->addPrefix, self->removePrefix)); - wait(unlockDatabase(cx, randomID)); - } - } - - // Q: What is the extra backup and why do we need to care about it? - if (extraBackup.isValid()) { // SOMEDAY: Handle this case - TraceEvent("BARW_WaitExtraBackup", randomID).detail("BackupTag", printable(self->backupTag)); - try { - wait(extraBackup); - } catch (Error& e) { - TraceEvent("BARW_ExtraBackupException", randomID) - .error(e) - .detail("BackupTag", printable(self->backupTag)); - if (e.code() != error_code_backup_unneeded && e.code() != error_code_backup_duplicate) - throw; - } - - TraceEvent("BARW_AbortBackupExtra", randomID).detail("BackupTag", printable(self->backupTag)); - try { - wait(backupAgent.abortBackup(cx, self->backupTag.toString())); - } catch (Error& e) { - TraceEvent("BARW_AbortBackupExtraException", randomID).error(e); - if (e.code() != error_code_backup_unneeded) - throw; - } - } - - state Key backupAgentKey = uidPrefixKey(logRangesRange.begin, logUid); - state Key backupLogValuesKey = destUidValue.withPrefix(backupLogKeys.begin); - state Key backupLatestVersionsPath = destUidValue.withPrefix(backupLatestVersionsPrefix); - state Key backupLatestVersionsKey = uidPrefixKey(backupLatestVersionsPath, logUid); - state int displaySystemKeys = 0; - - // Ensure that there is no left over key within the backup subspace - loop { - state Reference tr(new ReadYourWritesTransaction(cx)); - - TraceEvent("BARW_CheckLeftoverKeys", randomID).detail("BackupTag", printable(self->backupTag)); - - try { - tr->reset(); - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - - // Check the left over tasks - // We have to wait for the list to empty since an abort and get status - // can leave extra tasks in the queue - TraceEvent("BARW_CheckLeftoverTasks", randomID).detail("BackupTag", printable(self->backupTag)); - state int64_t taskCount = wait(backupAgent.getTaskCount(tr)); - state int waitCycles = 0; - - loop { - waitCycles++; - - TraceEvent("BARW_NonzeroTaskWait", randomID) - .detail("BackupTag", printable(self->backupTag)) - .detail("TaskCount", taskCount) - .detail("WaitCycles", waitCycles); - printf("%.6f %-10s Wait #%4d for %lld tasks to end\n", - now(), - randomID.toString().c_str(), - waitCycles, - (long long)taskCount); - - wait(delay(5.0)); - wait(tr->commit()); - tr = makeReference(cx); - int64_t _taskCount = wait(backupAgent.getTaskCount(tr)); - taskCount = _taskCount; - - if (!taskCount) { - break; - } - } - - if (taskCount) { - displaySystemKeys++; - TraceEvent(SevError, "BARW_NonzeroTaskCount", randomID) - .detail("BackupTag", printable(self->backupTag)) - .detail("TaskCount", taskCount) - .detail("WaitCycles", waitCycles); - printf("BackupCorrectnessLeftOverLogTasks: %ld\n", (long)taskCount); - } - - RangeResult agentValues = - wait(tr->getRange(KeyRange(KeyRangeRef(backupAgentKey, strinc(backupAgentKey))), 100)); - - // Error if the system keyspace for the backup tag is not empty - if (agentValues.size() > 0) { - displaySystemKeys++; - printf("BackupCorrectnessLeftOverMutationKeys: (%d) %s\n", - agentValues.size(), - printable(backupAgentKey).c_str()); - TraceEvent(SevError, "BackupCorrectnessLeftOverMutationKeys", randomID) - .detail("BackupTag", printable(self->backupTag)) - .detail("LeftOverKeys", agentValues.size()) - .detail("KeySpace", printable(backupAgentKey)); - for (auto& s : agentValues) { - TraceEvent("BARW_LeftOverKey", randomID) - .detail("Key", printable(StringRef(s.key.toString()))) - .detail("Value", printable(StringRef(s.value.toString()))); - printf(" Key: %-50s Value: %s\n", - printable(StringRef(s.key.toString())).c_str(), - printable(StringRef(s.value.toString())).c_str()); - } - } else { - printf("No left over backup agent configuration keys\n"); - } - - Optional latestVersion = wait(tr->get(backupLatestVersionsKey)); - if (latestVersion.present()) { - TraceEvent(SevError, "BackupCorrectnessLeftOverVersionKey", randomID) - .detail("BackupTag", printable(self->backupTag)) - .detail("BackupLatestVersionsKey", backupLatestVersionsKey.printable()) - .detail("DestUidValue", destUidValue.printable()); - } else { - printf("No left over backup version key\n"); - } - - RangeResult versions = wait(tr->getRange( - KeyRange(KeyRangeRef(backupLatestVersionsPath, strinc(backupLatestVersionsPath))), 1)); - if (!self->shareLogRange || !versions.size()) { - RangeResult logValues = wait( - tr->getRange(KeyRange(KeyRangeRef(backupLogValuesKey, strinc(backupLogValuesKey))), 100)); - - // Error if the log/mutation keyspace for the backup tag is not empty - if (logValues.size() > 0) { - displaySystemKeys++; - printf("BackupCorrectnessLeftOverLogKeys: (%d) %s\n", - logValues.size(), - printable(backupLogValuesKey).c_str()); - TraceEvent(SevError, "BackupCorrectnessLeftOverLogKeys", randomID) - .detail("BackupTag", printable(self->backupTag)) - .detail("LeftOverKeys", logValues.size()) - .detail("KeySpace", printable(backupLogValuesKey)); - } else { - printf("No left over backup log keys\n"); - } - } - - break; - } catch (Error& e) { - TraceEvent("BARW_CheckException", randomID).error(e); - wait(tr->onError(e)); - } - } - - if (displaySystemKeys) { - wait(TaskBucket::debugPrintRange(cx, "\xff"_sr, StringRef())); - } - - TraceEvent("BARW_Complete", randomID).detail("BackupTag", printable(self->backupTag)); - - // Decrement the backup agent requests - if (self->agentRequest) { - BackupAndParallelRestoreCorrectnessWorkload::backupAgentRequests--; - } - - // SOMEDAY: Remove after backup agents can exist quiescently - if ((g_simulator->backupAgents == ISimulator::BackupAgentType::BackupToFile) && - (!BackupAndParallelRestoreCorrectnessWorkload::backupAgentRequests)) { - g_simulator->backupAgents = ISimulator::BackupAgentType::NoBackupAgents; - } - } catch (Error& e) { - TraceEvent(SevError, "BackupAndParallelRestoreCorrectness").error(e).GetLastError(); - throw; - } - return Void(); - } -}; - -int BackupAndParallelRestoreCorrectnessWorkload::backupAgentRequests = 0; - -WorkloadFactory BackupAndParallelRestoreCorrectnessWorkloadFactory; diff --git a/fdbserver/workloads/ParallelRestore.actor.cpp b/fdbserver/workloads/ParallelRestore.actor.cpp deleted file mode 100644 index 4cd16013be4..00000000000 --- a/fdbserver/workloads/ParallelRestore.actor.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* - * ParallelRestore.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fdbrpc/simulator.h" -#include "fdbclient/BackupContainer.h" -#include "fdbserver/workloads/workloads.actor.h" -#include "fdbserver/workloads/BulkSetup.actor.h" -#include "fdbserver/RestoreWorkerInterface.actor.h" -#include "flow/actorcompiler.h" // This must be the last #include. - -// A workload which test the correctness of backup and restore process -struct RunRestoreWorkerWorkload : TestWorkload { - static constexpr auto NAME = "RunRestoreWorkerWorkload"; - - Future worker; - RunRestoreWorkerWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { - TraceEvent("RunRestoreWorkerWorkloadMX").log(); - } - - Future setup(Database const& cx) override { return Void(); } - - Future start(Database const& cx) override { - int num_myWorkers = SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS + SERVER_KNOBS->FASTRESTORE_NUM_LOADERS + 1; - TraceEvent("RunParallelRestoreWorkerWorkload") - .detail("Start", "RestoreToolDB") - .detail("Workers", num_myWorkers); - printf("RunParallelRestoreWorkerWorkload, we will start %d restore workers\n", num_myWorkers); - std::vector> myWorkers; - myWorkers.reserve(num_myWorkers); - for (int i = 0; i < num_myWorkers; ++i) { - myWorkers.push_back(_restoreWorker(cx, LocalityData())); - } - printf("RunParallelRestoreWorkerWorkload, wait on reply from %ld restore workers\n", myWorkers.size()); - worker = waitForAll(myWorkers); - printf("RunParallelRestoreWorkerWorkload, got all replies from restore workers\n"); - return Void(); - } - - Future check(Database const& cx) override { return true; } - - void getMetrics(std::vector& m) override {} -}; - -WorkloadFactory RunRestoreWorkerWorkloadFactory; diff --git a/packaging/docker/Dockerfile b/packaging/docker/Dockerfile index 84eabf3aef2..1d9906017e8 100644 --- a/packaging/docker/Dockerfile +++ b/packaging/docker/Dockerfile @@ -108,7 +108,7 @@ RUN for file in fdbserver fdbbackup fdbcli fdbmonitor; do \ done # Setup all symlinks for the other binaries that are a copy of fdbbackup -RUN for file in fdbdr fdbrestore backup_agent dr_agent fastrestore_tool; do \ +RUN for file in fdbdr fdbrestore backup_agent dr_agent; do \ ln -s /usr/bin/fdbbackup /usr/bin/$file; \ done diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1db7dfb2ac6..327c755a5e1 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -89,7 +89,6 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES KVStoreTestWrite.txt UNIT IGNORE) add_fdb_test(TEST_FILES KVStoreValueSize.txt UNIT IGNORE) add_fdb_test(TEST_FILES LayerStatusMerge.txt IGNORE) - add_fdb_test(TEST_FILES ParallelRestoreApiCorrectnessAtomicRestore.txt IGNORE) add_fdb_test(TEST_FILES PureNetwork.txt IGNORE) add_fdb_test(TEST_FILES RRW2500.txt IGNORE) add_fdb_test(TEST_FILES RandomRead.txt IGNORE) @@ -521,19 +520,10 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES slow/WriteDuringReadAtomicRestore.toml) add_fdb_test(TEST_FILES slow/WriteDuringReadSwitchover.toml) add_fdb_test(TEST_FILES slow/ddbalance.toml) - add_fdb_test(TEST_FILES slow/ParallelRestoreNewBackupCorrectnessAtomicOp.toml) - add_fdb_test(TEST_FILES slow/ParallelRestoreNewBackupCorrectnessCycle.toml) - add_fdb_test(TEST_FILES slow/ParallelRestoreNewBackupCorrectnessMultiCycles.toml) - add_fdb_test(TEST_FILES slow/ParallelRestoreNewBackupWriteDuringReadAtomicRestore.toml) - add_fdb_test(TEST_FILES slow/ParallelRestoreOldBackupCorrectnessAtomicOp.toml) - add_fdb_test(TEST_FILES slow/ParallelRestoreOldBackupCorrectnessCycle.toml) - add_fdb_test(TEST_FILES slow/ParallelRestoreOldBackupCorrectnessMultiCycles.toml) - add_fdb_test(TEST_FILES slow/ParallelRestoreOldBackupWriteDuringReadAtomicRestore.toml) add_fdb_test(TEST_FILES negative/ResolverIgnoreTooOld.toml) add_fdb_test(TEST_FILES negative/ResolverIgnoreReads.toml) add_fdb_test(TEST_FILES negative/ResolverIgnoreWrites.toml) add_fdb_test(TEST_FILES negative/StorageCorruption.toml) - add_fdb_test(TEST_FILES ParallelRestoreOldBackupApiCorrectnessAtomicRestore.toml IGNORE) # Note that status tests are not deterministic. add_fdb_test(TEST_FILES status/invalid_proc_addresses.txt) diff --git a/tests/ParallelRestoreApiCorrectnessAtomicRestore.txt b/tests/ParallelRestoreApiCorrectnessAtomicRestore.txt deleted file mode 100644 index 82a78a7dcff..00000000000 --- a/tests/ParallelRestoreApiCorrectnessAtomicRestore.txt +++ /dev/null @@ -1,38 +0,0 @@ -testTitle=ApiCorrectnessTest -clearAfterTest=false -simBackupAgents=BackupToFile -;timeout is in seconds -timeout=360000 -runSetup=true -; tenants are not supported with parallel restore -allowDefaultTenant=false - - testName=ApiCorrectness - numKeys=5000 - onlyLowerCase=true - shortKeysRatio=0.5 - minShortKeyLength=1 - maxShortKeyLength=3 - minLongKeyLength=1 - maxLongKeyLength=128 - minValueLength=1 - maxValueLength=1000 - numGets=1000 - numGetRanges=100 - numGetRangeSelectors=100 - numGetKeys=100 - numClears=100 - numClearRanges=10 - maxTransactionBytes=500000 - randomTestDuration=60 - - testName=AtomicRestore - startAfter=10.0 - restoreAfter=50.0 - fastRestore=true - usePartitionedLogs=true - - ; Each testName=RunRestoreWorkerWorkload creates a restore worker - ; We need at least 3 restore workers: master, loader, and applier - testName=RunRestoreWorkerWorkload - diff --git a/tests/ParallelRestoreOldBackupApiCorrectnessAtomicRestore.toml b/tests/ParallelRestoreOldBackupApiCorrectnessAtomicRestore.toml deleted file mode 100644 index e37d8d25725..00000000000 --- a/tests/ParallelRestoreOldBackupApiCorrectnessAtomicRestore.toml +++ /dev/null @@ -1,42 +0,0 @@ -[[test]] -testTitle = 'ApiCorrectnessTest' -clearAfterTest = false -simBackupAgents = 'BackupToFile' -#timeout is in seconds -timeout = 360000 -runSetup = true -# tenants are not supported with parallel restore -allowDefaultTenant = false - - [[test.workload]] - testName = 'ApiCorrectness' - numKeys = 5000 - onlyLowerCase = true - shortKeysRatio = 0.5 - minShortKeyLength = 1 - maxShortKeyLength = 3 - minLongKeyLength = 1 - maxLongKeyLength = 128 - minValueLength = 1 - maxValueLength = 1000 - numGets = 1000 - numGetRanges = 100 - numGetRangeSelectors = 100 - numGetKeys = 100 - numClears = 100 - numClearRanges = 10 - maxTransactionBytes = 500000 - randomTestDuration = 60 - resetDBTimeout = 7200 - - [[test.workload]] - testName = 'AtomicRestore' - startAfter = 10.0 - restoreAfter = 50.0 - fastRestore = true - usePartitionedLogs = false - # Each testName=RunRestoreWorkerWorkload creates a restore worker - # We need at least 3 restore workers: master, loader, and applier - - [[test.workload]] - testName = 'RunRestoreWorkerWorkload' diff --git a/tests/slow/ParallelRestoreNewBackupCorrectnessAtomicOp.toml b/tests/slow/ParallelRestoreNewBackupCorrectnessAtomicOp.toml deleted file mode 100644 index fe894b2df77..00000000000 --- a/tests/slow/ParallelRestoreNewBackupCorrectnessAtomicOp.toml +++ /dev/null @@ -1,63 +0,0 @@ -[configuration] -allowDefaultTenant = false - -[[test]] -testTitle = 'BackupAndParallelRestoreWithAtomicOp' -clearAfterTest = false -simBackupAgents = 'BackupToFile' -#timeout is in seconds -timeout = 360000 - - [[test.workload]] - testName = 'AtomicOps' - nodeCount = 30000 - # Make ops space only 1 key per group - # nodeCount=100 - transactionsPerSecond = 2500.0 - # transactionsPerSecond=500.0 - # transactionsPerSecond=500.0 - # nodeCount=4 - # transactionsPerSecond=250.0 - testDuration = 30.0 - # Specify a type of atomicOp - # opType=0 - # actorsPerClient=1 - # Each testName=RunRestoreWorkerWorkload creates a restore worker - # We need at least 3 restore workers: master, loader, and applier - - [[test.workload]] - testName = 'RunRestoreWorkerWorkload' - # Test case for parallel restore - - [[test.workload]] - testName = 'BackupAndParallelRestoreCorrectness' - backupAfter = 10.0 - restoreAfter = 60.0 - backupRangesCount = -1 - # use new backup - usePartitionedLogs = true - - [[test.workload]] - testName = 'RandomClogging' - testDuration = 90.0 - - [[test.workload]] - testName = 'Rollback' - meanDelay = 90.0 - testDuration = 90.0 - # Do NOT kill restore worker process yet - # Kill other process to ensure restore works when FDB cluster has faults - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 90.0 - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 90.0 diff --git a/tests/slow/ParallelRestoreNewBackupCorrectnessCycle.toml b/tests/slow/ParallelRestoreNewBackupCorrectnessCycle.toml deleted file mode 100644 index f4dac7069ca..00000000000 --- a/tests/slow/ParallelRestoreNewBackupCorrectnessCycle.toml +++ /dev/null @@ -1,59 +0,0 @@ -[configuration] -# tenants are not supported with parallel restore -allowDefaultTenant = false - -[[test]] -testTitle = 'BackupAndRestore' -clearAfterTest = false -simBackupAgents = 'BackupToFile' -#timeout is in seconds -timeout = 360000 - - [[test.workload]] - testName = 'Cycle' - # nodeCount=30000 - nodeCount = 1000 - # transactionsPerSecond=500.0 - transactionsPerSecond = 2500.0 - testDuration = 30.0 - expectedRate = 0 - # keyPrefix=! - # Each testName=RunRestoreWorkerWorkload creates a restore worker - # We need at least 3 restore workers: master, loader, and applier - - [[test.workload]] - testName = 'RunRestoreWorkerWorkload' - # Test case for parallel restore - - [[test.workload]] - testName = 'BackupAndParallelRestoreCorrectness' - backupAfter = 10.0 - restoreAfter = 60.0 - # backupRangesCount<0 means backup the entire normal keyspace - backupRangesCount = -1 - usePartitionedLogs = true - - [[test.workload]] - testName = 'RandomClogging' - testDuration = 90.0 - - [[test.workload]] - testName = 'Rollback' - meanDelay = 90.0 - testDuration = 90.0 - # Do NOT kill restore worker process yet - # Kill other process to ensure restore works when FDB cluster has faults - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 90.0 - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 90.0 diff --git a/tests/slow/ParallelRestoreNewBackupCorrectnessMultiCycles.toml b/tests/slow/ParallelRestoreNewBackupCorrectnessMultiCycles.toml deleted file mode 100644 index 0d0336d7ede..00000000000 --- a/tests/slow/ParallelRestoreNewBackupCorrectnessMultiCycles.toml +++ /dev/null @@ -1,82 +0,0 @@ -[configuration] -# tenants are not supported with parallel restore -allowDefaultTenant = false - -[[test]] -testTitle = 'BackupAndRestore' -clearAfterTest = false -simBackupAgents = 'BackupToFile' -#timeout is in seconds -timeout = 360000 - - [[test.workload]] - testName = 'Cycle' - # nodeCount=30000 - nodeCount = 1000 - transactionsPerSecond = 2500.0 - testDuration = 30.0 - expectedRate = 0 - keyPrefix = '!' - - [[test.workload]] - testName = 'Cycle' - nodeCount = 1000 - transactionsPerSecond = 2500.0 - testDuration = 30.0 - expectedRate = 0 - keyPrefix = 'z' - - [[test.workload]] - testName = 'Cycle' - nodeCount = 1000 - transactionsPerSecond = 2500.0 - testDuration = 30.0 - expectedRate = 0 - keyPrefix = 'A' - - [[test.workload]] - testName = 'Cycle' - nodeCount = 1000 - transactionsPerSecond = 2500.0 - testDuration = 30.0 - expectedRate = 0 - keyPrefix = 'Z' - # Each testName=RunRestoreWorkerWorkload creates a restore worker - # We need at least 3 restore workers: master, loader, and applier - - [[test.workload]] - testName = 'RunRestoreWorkerWorkload' - # Test case for parallel restore - - [[test.workload]] - testName = 'BackupAndParallelRestoreCorrectness' - backupAfter = 10.0 - restoreAfter = 60.0 - # backupRangesCount<0 means backup the entire normal keyspace - backupRangesCount = -1 - usePartitionedLogs = true - - [[test.workload]] - testName = 'RandomClogging' - testDuration = 90.0 - - [[test.workload]] - testName = 'Rollback' - meanDelay = 90.0 - testDuration = 90.0 - # Do NOT kill restore worker process yet - # Kill other process to ensure restore works when FDB cluster has faults - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 90.0 - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 90.0 diff --git a/tests/slow/ParallelRestoreNewBackupWriteDuringReadAtomicRestore.toml b/tests/slow/ParallelRestoreNewBackupWriteDuringReadAtomicRestore.toml deleted file mode 100644 index 4f2b6b27977..00000000000 --- a/tests/slow/ParallelRestoreNewBackupWriteDuringReadAtomicRestore.toml +++ /dev/null @@ -1,53 +0,0 @@ -[configuration] -StderrSeverity = 30 -allowDefaultTenant = false - -[[test]] -testTitle = 'WriteDuringReadTest' -clearAfterTest = false -simBackupAgents = 'BackupToFile' -#timeout is in seconds -timeout = 360000 - - [[test.workload]] - testName = 'WriteDuringRead' - maximumTotalData = 1000000 - testDuration = 240.0 - slowModeStart = 60.0 - minNode = 1 - useSystemKeys = false - - [[test.workload]] - testName = 'AtomicRestore' - startAfter = 10.0 - restoreAfter = 50.0 - fastRestore = true - usePartitionedLogs = true - - [[test.workload]] - testName = 'RandomClogging' - testDuration = 60.0 - - [[test.workload]] - testName = 'Rollback' - meanDelay = 60.0 - testDuration = 60.0 - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 60.0 - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 60.0 - # Each testName=RunRestoreWorkerWorkload creates a restore worker - # We need at least 3 restore workers: master, loader, and applier - - [[test.workload]] - testName = 'RunRestoreWorkerWorkload' diff --git a/tests/slow/ParallelRestoreOldBackupCorrectnessAtomicOp.toml b/tests/slow/ParallelRestoreOldBackupCorrectnessAtomicOp.toml deleted file mode 100644 index ea324b7a044..00000000000 --- a/tests/slow/ParallelRestoreOldBackupCorrectnessAtomicOp.toml +++ /dev/null @@ -1,62 +0,0 @@ -[configuration] -# tenants are not supported with parallel restore -allowDefaultTenant = false - -[[test]] -testTitle = 'BackupAndParallelRestoreWithAtomicOp' -clearAfterTest = false -simBackupAgents = 'BackupToFile' -#timeout is in seconds -timeout = 360000 - - [[test.workload]] - testName = 'AtomicOps' - nodeCount = 30000 - # Make ops space only 1 key per group - transactionsPerSecond = 2500.0 - # nodeCount=4 - # transactionsPerSecond=250.0 - testDuration = 30.0 - # Specify a type of atomicOp - # Unset the following two options for debug purpose - # opType=0 - # actorsPerClient=1 - # Each testName=RunRestoreWorkerWorkload creates a restore worker - # We need at least 3 restore workers: master, loader, and applier - - [[test.workload]] - testName = 'RunRestoreWorkerWorkload' - # Test case for parallel restore - - [[test.workload]] - testName = 'BackupAndParallelRestoreCorrectness' - backupAfter = 10.0 - restoreAfter = 60.0 - backupRangesCount = -1 - # use old backup - usePartitionedLogs = false - - [[test.workload]] - testName = 'RandomClogging' - testDuration = 90.0 - - [[test.workload]] - testName = 'Rollback' - meanDelay = 90.0 - testDuration = 90.0 - # Do NOT kill restore worker process yet - # Kill other process to ensure restore works when FDB cluster has faults - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 90.0 - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 90.0 diff --git a/tests/slow/ParallelRestoreOldBackupCorrectnessCycle.toml b/tests/slow/ParallelRestoreOldBackupCorrectnessCycle.toml deleted file mode 100644 index bdb4e2b013f..00000000000 --- a/tests/slow/ParallelRestoreOldBackupCorrectnessCycle.toml +++ /dev/null @@ -1,59 +0,0 @@ -[configuration] -# tenants are not supported with parallel restore -allowDefaultTenant = false - -[[test]] -testTitle = 'BackupAndRestore' -clearAfterTest = false -simBackupAgents = 'BackupToFile' -#timeout is in seconds -timeout = 360000 - - [[test.workload]] - testName = 'Cycle' - nodeCount=30000 - # nodeCount = 1000 - # transactionsPerSecond=500.0 - transactionsPerSecond = 2500.0 - testDuration = 30.0 - expectedRate = 0 - # keyPrefix=! - # Each testName=RunRestoreWorkerWorkload creates a restore worker - # We need at least 3 restore workers: master, loader, and applier - - [[test.workload]] - testName = 'RunRestoreWorkerWorkload' - # Test case for parallel restore - - [[test.workload]] - testName = 'BackupAndParallelRestoreCorrectness' - backupAfter = 10.0 - restoreAfter = 60.0 - # backupRangesCount<0 means backup the entire normal keyspace - backupRangesCount = -1 - usePartitionedLogs = false - - [[test.workload]] - testName = 'RandomClogging' - testDuration = 90.0 - - [[test.workload]] - testName = 'Rollback' - meanDelay = 90.0 - testDuration = 90.0 - # Do NOT kill restore worker process yet - # Kill other process to ensure restore works when FDB cluster has faults - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 90.0 - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 90.0 diff --git a/tests/slow/ParallelRestoreOldBackupCorrectnessMultiCycles.toml b/tests/slow/ParallelRestoreOldBackupCorrectnessMultiCycles.toml deleted file mode 100644 index 78d465688b7..00000000000 --- a/tests/slow/ParallelRestoreOldBackupCorrectnessMultiCycles.toml +++ /dev/null @@ -1,82 +0,0 @@ -[configuration] -# tenants are not supported with parallel restore -allowDefaultTenant = false - -[[test]] -testTitle = 'BackupAndRestore' -clearAfterTest = false -simBackupAgents = 'BackupToFile' -#timeout is in seconds -timeout = 360000 - - [[test.workload]] - testName = 'Cycle' - # nodeCount=30000 - nodeCount = 1000 - transactionsPerSecond = 2500.0 - testDuration = 30.0 - expectedRate = 0 - keyPrefix = '!' - - [[test.workload]] - testName = 'Cycle' - nodeCount = 1000 - transactionsPerSecond = 2500.0 - testDuration = 30.0 - expectedRate = 0 - keyPrefix = 'z' - - [[test.workload]] - testName = 'Cycle' - nodeCount = 1000 - transactionsPerSecond = 2500.0 - testDuration = 30.0 - expectedRate = 0 - keyPrefix = 'A' - - [[test.workload]] - testName = 'Cycle' - nodeCount = 1000 - transactionsPerSecond = 2500.0 - testDuration = 30.0 - expectedRate = 0 - keyPrefix = 'Z' - # Each testName=RunRestoreWorkerWorkload creates a restore worker - # We need at least 3 restore workers: master, loader, and applier - - [[test.workload]] - testName = 'RunRestoreWorkerWorkload' - # Test case for parallel restore - - [[test.workload]] - testName = 'BackupAndParallelRestoreCorrectness' - backupAfter = 10.0 - restoreAfter = 60.0 - # backupRangesCount<0 means backup the entire normal keyspace - backupRangesCount = -1 - usePartitionedLogs = false - - [[test.workload]] - testName = 'RandomClogging' - testDuration = 90.0 - - [[test.workload]] - testName = 'Rollback' - meanDelay = 90.0 - testDuration = 90.0 - # Do NOT kill restore worker process yet - # Kill other process to ensure restore works when FDB cluster has faults - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 90.0 - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 90.0 diff --git a/tests/slow/ParallelRestoreOldBackupWriteDuringReadAtomicRestore.toml b/tests/slow/ParallelRestoreOldBackupWriteDuringReadAtomicRestore.toml deleted file mode 100644 index 9827f1034fe..00000000000 --- a/tests/slow/ParallelRestoreOldBackupWriteDuringReadAtomicRestore.toml +++ /dev/null @@ -1,54 +0,0 @@ -[configuration] -StderrSeverity = 30 -# tenants are not supported with parallel restore -allowDefaultTenant = false - -[[test]] -testTitle = 'WriteDuringReadTest' -clearAfterTest = false -simBackupAgents = 'BackupToFile' -#timeout is in seconds -timeout = 360000 - - [[test.workload]] - testName = 'WriteDuringRead' - maximumTotalData = 1000000 - testDuration = 240.0 - slowModeStart = 60.0 - minNode = 1 - useSystemKeys = false - - [[test.workload]] - testName = 'AtomicRestore' - startAfter = 10.0 - restoreAfter = 50.0 - fastRestore = true - usePartitionedLogs = false - - [[test.workload]] - testName = 'RandomClogging' - testDuration = 60.0 - - [[test.workload]] - testName = 'Rollback' - meanDelay = 60.0 - testDuration = 60.0 - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 60.0 - - [[test.workload]] - testName = 'Attrition' - machinesToKill = 10 - machinesToLeave = 3 - reboot = true - testDuration = 60.0 - # Each testName=RunRestoreWorkerWorkload creates a restore worker - # We need at least 3 restore workers: master, loader, and applier - - [[test.workload]] - testName = 'RunRestoreWorkerWorkload'