Skip to content

Commit 0be0927

Browse files
ankagrawalgithub-actions
and
github-actions
authored
For get operations, if there are very few local replicas, then shuffle the pool with remote replicas so that the few remaining local replicas don't get all the traffic. (#2653)
Also Define a config with the threshold for minimum number of local replicas to do this. Co-authored-by: github-actions <[email protected]>
1 parent e140579 commit 0be0927

File tree

4 files changed

+94
-0
lines changed

4 files changed

+94
-0
lines changed

Diff for: ambry-api/src/main/java/com/github/ambry/config/RouterConfig.java

+12
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ public class RouterConfig {
3838
// This is a theoretical maximum value. Configured value may be much smaller since we might need to respond back to
3939
// client with either success or failure much sooner.
4040
public static final int MAX_OVERALL_TIMEOUT_VALUE_FOR_A_REQUEST_IN_MS = 60 * 60 * 1000;
41+
// By default a get request should prioritize remote replicas only if there are no local replicas available.
42+
public static final int DEFAULT_ROUTER_GET_OPERATION_MIN_LOCAL_REPLICA_COUNT_TO_PRIORITIZE_LOCAL = 0;
4143

4244
// config keys
4345
public static final String ROUTER_SCALING_UNIT_COUNT = "router.scaling.unit.count";
@@ -147,6 +149,10 @@ public class RouterConfig {
147149
public static final String ROUTER_GET_OPERATION_DEPRIORITIZE_BOOTSTRAP_REPLICAS =
148150
"router.get.operation.deprioritize.bootstrap.replicas";
149151

152+
// minimum number of local replicas that should be live for a get request so that local replicas are prioritized.
153+
public static final String ROUTER_GET_OPERATION_MIN_LOCAL_REPLICA_COUNT_TO_PRIORITIZE_LOCAL =
154+
"router.get.operation.min.local.replica.count.to.prioritize.local";
155+
150156
/**
151157
* Number of independent scaling units for the router.
152158
*/
@@ -754,6 +760,10 @@ public class RouterConfig {
754760
@Config(ROUTER_GET_OPERATION_DEPRIORITIZE_BOOTSTRAP_REPLICAS)
755761
public final boolean routerGetOperationDeprioritizeBootstrapReplicas;
756762

763+
@Config(ROUTER_GET_OPERATION_MIN_LOCAL_REPLICA_COUNT_TO_PRIORITIZE_LOCAL)
764+
@Default("0")
765+
public final int routerGetOperationMinLocalReplicaCountToPrioritizeLocal;
766+
757767
/**
758768
* Create a RouterConfig instance.
759769
* @param verifiableProperties the properties map to refer to.
@@ -921,6 +931,8 @@ public RouterConfig(VerifiableProperties verifiableProperties) {
921931
routerReservedMetadataEnabled = verifiableProperties.getBoolean(RESERVED_METADATA_ENABLED, false);
922932
routerGetOperationDeprioritizeBootstrapReplicas =
923933
verifiableProperties.getBoolean(ROUTER_GET_OPERATION_DEPRIORITIZE_BOOTSTRAP_REPLICAS, false);
934+
routerGetOperationMinLocalReplicaCountToPrioritizeLocal =
935+
verifiableProperties.getInt(ROUTER_GET_OPERATION_MIN_LOCAL_REPLICA_COUNT_TO_PRIORITIZE_LOCAL, DEFAULT_ROUTER_GET_OPERATION_MIN_LOCAL_REPLICA_COUNT_TO_PRIORITIZE_LOCAL);
924936
}
925937

926938
/**

Diff for: ambry-router/src/main/java/com/github/ambry/router/NonBlockingRouterMetrics.java

+10
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,10 @@ public class NonBlockingRouterMetrics {
292292
public final Counter requestExpiryOnNetworkTimeoutCount;
293293
// Number of requests timed out due to not being able to sent to network
294294
public final Counter requestExpiryOnFinalTimeoutCount;
295+
// Number of Get requests to server for which remote replicas were prioritized due to few local replicas.
296+
public final Counter remoteReplicaPrioritizedForGetDueToFewLocalReplicas;
297+
// Number of Get requests to server for which local replicas were shuffled remote replicas due to few local replicas.
298+
public final Counter shuffledWithRemoteReplicasForGetDueToFewLocalReplicas;
295299

296300
// Map that stores dataNode-level metrics.
297301
private final Map<DataNodeId, NodeLevelMetrics> dataNodeToMetrics;
@@ -673,6 +677,12 @@ public NonBlockingRouterMetrics(ClusterMap clusterMap, RouterConfig routerConfig
673677
metricRegistry.counter(MetricRegistry.name(NonBlockingRouter.class, "RequestExpiryOnNetworkTimeoutCount"));
674678
requestExpiryOnFinalTimeoutCount =
675679
metricRegistry.counter(MetricRegistry.name(NonBlockingRouter.class, "RequestExpiryOnFinalTimeoutCount"));
680+
remoteReplicaPrioritizedForGetDueToFewLocalReplicas =
681+
metricRegistry.counter(MetricRegistry.name(SimpleOperationTracker.class,
682+
"RemoteReplicaPrioritizedForGetDueToFewLocalReplicas"));
683+
shuffledWithRemoteReplicasForGetDueToFewLocalReplicas =
684+
metricRegistry.counter(MetricRegistry.name(SimpleOperationTracker.class,
685+
"ShuffledWithRemoteReplicasForGetDueToFewLocalReplicas"));
676686
}
677687

678688
/**

Diff for: ambry-router/src/main/java/com/github/ambry/router/SimpleOperationTracker.java

+36
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,7 @@ class SimpleOperationTracker implements OperationTracker {
299299
List<ReplicaId> examinedReplicas = new ArrayList<>();
300300
originatingDcName = originatingDcName == null ? reassignedOriginDc : originatingDcName;
301301
int numLocalAndLiveReplicas = 0;
302+
int numRemoteOriginatingDcAndLiveReplicas = 0;
302303
for (ReplicaId replicaId : replicas) {
303304
examinedReplicas.add(replicaId);
304305
String replicaDcName = replicaId.getDataNodeId().getDatacenterName();
@@ -310,6 +311,7 @@ class SimpleOperationTracker implements OperationTracker {
310311
numLocalAndLiveReplicas++;
311312
addToBeginningOfPool(replicaId);
312313
} else if (crossColoEnabled && isOriginatingDcReplica) {
314+
numRemoteOriginatingDcAndLiveReplicas++;
313315
addToEndOfPool(replicaId);
314316
} else if (crossColoEnabled) {
315317
backupReplicas.addFirst(replicaId);
@@ -349,6 +351,7 @@ class SimpleOperationTracker implements OperationTracker {
349351
}
350352

351353
maybeDeprioritizeLocalBootstrapReplicas(numLocalAndLiveReplicas);
354+
maybeShuffleWithRemoteReplicas(numLocalAndLiveReplicas, numRemoteOriginatingDcAndLiveReplicas);
352355
totalReplicaCount = replicaPool.size();
353356

354357
// MockPartitionId.getReplicaIds() is returning a shared reference which may cause race condition.
@@ -614,6 +617,39 @@ void maybeDeprioritizeLocalBootstrapReplicas(int numLocalAndLiveReplicas) {
614617
}
615618
}
616619

620+
/**
621+
* For get operations, if there are very few local replicas, then shuffle the pool with remote replicas so that the
622+
* few (or one) remaining local replicas don't get all the traffic. The threshold for minimum number of local replicas
623+
* is defined in {@link RouterConfig#routerGetOperationMinLocalReplicaCountToPrioritizeLocal}.
624+
* @param numLocalAndLiveReplicas the number of local and live replicas.
625+
* @param numRemoteOriginatingDcAndLiveReplicas the number of remote originating DC and live replicas.
626+
*/
627+
void maybeShuffleWithRemoteReplicas(int numLocalAndLiveReplicas, int numRemoteOriginatingDcAndLiveReplicas) {
628+
if (isGetOperation() && numLocalAndLiveReplicas > 0
629+
&& numLocalAndLiveReplicas <= routerConfig.routerGetOperationMinLocalReplicaCountToPrioritizeLocal) {
630+
logger.debug("Shuffling replicas for {} because there are only {} local and live replicas for {}",
631+
routerOperation, numLocalAndLiveReplicas, partitionId);
632+
routerMetrics.shuffledWithRemoteReplicasForGetDueToFewLocalReplicas.inc();
633+
List<ReplicaId> replicasToReshuffle = new ArrayList<>();
634+
if (numRemoteOriginatingDcAndLiveReplicas > 0) {
635+
// If the local DC is not the originating DC, we shuffle only with originating DC replicas.
636+
replicasToReshuffle.addAll(
637+
replicaPool.subList(0, numLocalAndLiveReplicas + numRemoteOriginatingDcAndLiveReplicas));
638+
} else {
639+
replicasToReshuffle.addAll(replicaPool);
640+
}
641+
Collections.shuffle(replicasToReshuffle);
642+
ListIterator<ReplicaId> iter = replicaPool.listIterator();
643+
for (ReplicaId replicaId : replicasToReshuffle) {
644+
iter.next();
645+
iter.set(replicaId);
646+
}
647+
if (!replicaPool.get(0).getDataNodeId().getDatacenterName().equals(datacenterName)) {
648+
routerMetrics.remoteReplicaPrioritizedForGetDueToFewLocalReplicas.inc();
649+
}
650+
}
651+
}
652+
617653
public boolean hasFailed() {
618654
if (routerOperation == RouterOperation.PutOperation && routerConfig.routerPutUseDynamicSuccessTarget) {
619655
return totalReplicaCount - failedCount < Math.max(totalReplicaCount - 1,

Diff for: ambry-router/src/test/java/com/github/ambry/router/GetBlobOperationTest.java

+36
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ public void testSimpleBlobGetSuccess() throws Exception {
436436
if (!quotaChargeCallback.getQuotaConfig().bandwidthThrottlingFeatureEnabled) {
437437
Assert.assertEquals(i + 1, quotaChargeCallback.numCheckAndChargeCalls);
438438
}
439+
assertEquals(0, routerMetrics.shuffledWithRemoteReplicasForGetDueToFewLocalReplicas.getCount());
439440
}
440441
}
441442

@@ -455,6 +456,7 @@ public void testSimpleBlobRawMode() throws Exception {
455456
GetBlobOperation op = createOperationAndComplete(null);
456457
Assert.assertEquals(IllegalStateException.class, op.getOperationException().getClass());
457458
}
459+
assertEquals(0, routerMetrics.shuffledWithRemoteReplicasForGetDueToFewLocalReplicas.getCount());
458460
}
459461

460462
/**
@@ -502,6 +504,7 @@ public void testCompositeBlobRawMode() throws Exception {
502504
} else {
503505
// Only supported for encrypted blobs now
504506
}
507+
assertEquals(0, routerMetrics.shuffledWithRemoteReplicasForGetDueToFewLocalReplicas.getCount());
505508
}
506509
}
507510

@@ -578,6 +581,7 @@ public void testCompositeBlobChunkSizeMultipleGetSuccess() throws Exception {
578581
}
579582
getAndAssertSuccess(false, false, lifeVersion);
580583
}
584+
assertEquals(0, routerMetrics.shuffledWithRemoteReplicasForGetDueToFewLocalReplicas.getCount());
581585
}
582586

583587
/**
@@ -897,6 +901,38 @@ public void testOrigDcUnavailability() throws Exception {
897901
getErrorCodeChecker.testAndAssert(RouterErrorCode.AmbryUnavailable);
898902
}
899903

904+
/**
905+
* Test that if local DC has less than {@code RouterConfig#routerGetOperationMinLocalReplicaCountToPrioritizeLocal}
906+
* replicas available, then we will prioritize remote DC.
907+
* @throws Exception
908+
*/
909+
@Test
910+
public void testGetGoesToRemoteIfLocalDcHasFewAvailableReplicas() throws Exception {
911+
blobSize = maxChunkSize;
912+
doPut();
913+
Properties props = getDefaultNonBlockingRouterProperties(true);
914+
props.setProperty(RouterConfig.ROUTER_GET_ELIGIBLE_REPLICAS_BY_STATE_ENABLED, "true");
915+
props.setProperty(RouterConfig.ROUTER_GET_OPERATION_MIN_LOCAL_REPLICA_COUNT_TO_PRIORITIZE_LOCAL, "1");
916+
RouterConfig oldRouterConfig = routerConfig;
917+
routerConfig = new RouterConfig(new VerifiableProperties(props));
918+
919+
try {
920+
// For originating DC, let's bring down two replicas
921+
MockPartitionId partitionId = (MockPartitionId) blobId.getPartition();
922+
List<ReplicaId> localReplicas = partitionId.replicaIds.stream()
923+
.filter(replicaId -> replicaId.getDataNodeId().getDatacenterName().equals(localDcName))
924+
.collect(Collectors.toList());
925+
partitionId.setReplicaState(localReplicas.get(0), ReplicaState.LEADER);
926+
partitionId.setReplicaState(localReplicas.get(1), ReplicaState.OFFLINE);
927+
partitionId.setReplicaState(localReplicas.get(2), ReplicaState.OFFLINE);
928+
929+
getAndAssertSuccess();
930+
assertEquals(1, routerMetrics.shuffledWithRemoteReplicasForGetDueToFewLocalReplicas.getCount());
931+
} finally {
932+
routerConfig = oldRouterConfig;
933+
}
934+
}
935+
900936
/**
901937
* Test the case where deletes and unavailability happens at the same time, delete should take priority.
902938
* @throws Exception

0 commit comments

Comments
 (0)