apple · spraza · Sep 23, 2025 · Sep 26, 2025 · dlambrig · Sep 26, 2025
diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp
@@ -842,6 +842,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
 	init( CC_INVALIDATE_EXCLUDED_PROCESSES,                     false); if (isSimulated) CC_INVALIDATE_EXCLUDED_PROCESSES = deterministicRandom()->coinflip();
 	init( CC_GRAY_FAILURE_STATUS_JSON,                          false); if (isSimulated) CC_GRAY_FAILURE_STATUS_JSON = true;
 	init( CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL,              0.5 );
+	init( CC_RECOVERY_INIT_REQ_TIMEOUT,                         60.0 );
 
 	init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL,                   600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0;
 	init( EXPECTED_MASTER_FITNESS,            ProcessClass::UnsetFit );

diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h
@@ -850,6 +850,8 @@ class SWIFT_CXX_IMMORTAL_SINGLETON_TYPE ServerKnobs : public KnobsImpl<ServerKno
 	bool CC_GRAY_FAILURE_STATUS_JSON; // When enabled, returns gray failure information in machine readable status json.
 	double CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL; // The interval to prevent re-recruiting the same singleton if a
 	                                                 // recruiting fight between two cluster controllers occurs.
+	double CC_RECOVERY_INIT_REQ_TIMEOUT; // Timeout value after which CC stops waiting to hear back for a response to
+	                                     // recruitment initialization message
 
 	// Knobs used to select the best policy (via monte carlo)
 	int POLICY_RATING_TESTS; // number of tests per policy (in order to compare)

diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp
@@ -2788,6 +2788,9 @@ ACTOR Future<Void> TagPartitionedLogSystem::recruitOldLogRouters(TagPartitionedL
 			// Recruit log routers for old generations of the primary locality
 			if (tLogs->locality == locality) {
 				logRouterInitializationReplies.emplace_back();
+				TraceEvent("LogRouterInitReqSent1")
+				    .detail("Locality", locality)
+				    .detail("LogRouterTags", self->logRouterTags);
 				for (int i = 0; i < self->logRouterTags; i++) {
 					InitializeLogRouterRequest req;
 					req.recoveryCount = recoveryCount;
@@ -2799,8 +2802,10 @@ ACTOR Future<Void> TagPartitionedLogSystem::recruitOldLogRouters(TagPartitionedL
 					req.recoverAt = self->recoverAt.get();
 					req.knownLockedTLogIds = self->knownLockedTLogIds;
 					auto reply = transformErrors(
-					    throwErrorOr(workers[nextRouter].logRouter.getReplyUnlessFailedFor(
-					        req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
+					    throwErrorOr(timeoutError(
+					        workers[nextRouter].logRouter.getReplyUnlessFailedFor(
+					            req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY),
+					        SERVER_KNOBS->CC_RECOVERY_INIT_REQ_TIMEOUT)),
 					    cluster_recovery_failed());
 					logRouterInitializationReplies.back().push_back(reply);
 					allReplies.push_back(reply);
@@ -2839,6 +2844,9 @@ ACTOR Future<Void> TagPartitionedLogSystem::recruitOldLogRouters(TagPartitionedL
 			// Recruit log routers for old generations of the primary locality
 			if (tLogs->locality == locality) {
 				logRouterInitializationReplies.emplace_back();
+				TraceEvent("LogRouterInitReqSent2")
+				    .detail("Locality", locality)
+				    .detail("LogRouterTags", old.logRouterTags);
 				for (int i = 0; i < old.logRouterTags; i++) {
 					InitializeLogRouterRequest req;
 					req.recoveryCount = recoveryCount;
@@ -2849,8 +2857,10 @@ ACTOR Future<Void> TagPartitionedLogSystem::recruitOldLogRouters(TagPartitionedL
 					req.locality = locality;
 					req.recoverAt = old.recoverAt;
 					auto reply = transformErrors(
-					    throwErrorOr(workers[nextRouter].logRouter.getReplyUnlessFailedFor(
-					        req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
+					    throwErrorOr(timeoutError(
+					        workers[nextRouter].logRouter.getReplyUnlessFailedFor(
+					            req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY),
+					        SERVER_KNOBS->CC_RECOVERY_INIT_REQ_TIMEOUT)),
 					    cluster_recovery_failed());
 					logRouterInitializationReplies.back().push_back(reply);
 					allReplies.push_back(reply);
@@ -2860,7 +2870,7 @@ ACTOR Future<Void> TagPartitionedLogSystem::recruitOldLogRouters(TagPartitionedL
 		}
 	}
 
-	wait(waitForAll(allReplies));
+	wait(traceAfter(waitForAll(allReplies), "AllLogRouterRepliesReceived"));
 
 	int nextReplies = 0;
 	lastStart = std::numeric_limits<Version>::max();
@@ -3004,6 +3014,7 @@ ACTOR Future<Void> TagPartitionedLogSystem::newRemoteEpoch(TagPartitionedLogSyst
 	const Version startVersion = oldLogSystem->logRouterTags == 0
 	                                 ? oldLogSystem->recoverAt.get() + 1
 	                                 : std::max(self->tLogs[0]->startVersion, logSet->startVersion);
+	TraceEvent("LogRouterInitReqSent3").detail("Locality", remoteLocality).detail("LogRouterTags", self->logRouterTags);
 	for (int i = 0; i < self->logRouterTags; i++) {
 		InitializeLogRouterRequest req;
 		req.recoveryCount = recoveryCount;
@@ -3015,9 +3026,10 @@ ACTOR Future<Void> TagPartitionedLogSystem::newRemoteEpoch(TagPartitionedLogSyst
 		TraceEvent("RemoteTLogRouterReplies", self->dbgid)
 		    .detail("WorkerID", remoteWorkers.logRouters[i % remoteWorkers.logRouters.size()].id());
 		logRouterInitializationReplies.push_back(transformErrors(
-		    throwErrorOr(
+		    throwErrorOr(timeoutError(
 		        remoteWorkers.logRouters[i % remoteWorkers.logRouters.size()].logRouter.getReplyUnlessFailedFor(
-		            req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
+		            req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY),
+		        SERVER_KNOBS->CC_RECOVERY_INIT_REQ_TIMEOUT)),
 		    cluster_recovery_failed()));
 	}
 

diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp
@@ -26,6 +26,7 @@
 #include "fdbclient/FDBTypes.h"
 #include "fdbserver/BlobMigratorInterface.h"
 #include "flow/ApiVersion.h"
+#include "flow/Buggify.h"
 #include "flow/CodeProbe.h"
 #include "flow/IAsyncFile.h"
 #include "fdbrpc/Locality.h"
@@ -2194,6 +2195,14 @@ void cleanupStorageDisks(Reference<AsyncVar<ServerDBInfo>> dbInfo,
 	}
 }
 
+bool skipInitRspInSim() {
+	const bool skip = g_network->isSimulated() && BUGGIFY_WITH_PROB(0.01 /* 1% */);
+	if (skip) {
+		TraceEvent("SkipInitRspInSimTrue");
+	}
+	return skip;
+}
+
 ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
                                 Reference<AsyncVar<Optional<ClusterControllerFullInterface>> const> ccInterface,
                                 LocalityData locality,
@@ -3367,7 +3376,10 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
 				errorForwarders.add(
 				    zombie(recruited,
 				           forwardError(errors, Role::LOG_ROUTER, recruited.id(), logRouter(recruited, req, dbInfo))));
-				req.reply.send(recruited);
+
+				if (!skipInitRspInSim()) {
+					req.reply.send(recruited);
+				}
 			}
 			when(CoordinationPingMessage m = waitNext(interf.coordinationPing.getFuture())) {
 				TraceEvent("CoordinationPing", interf.id())