Skip to content

Commit

Permalink
For get operations, if there are very few local replicas, then shuffl…
Browse files Browse the repository at this point in the history
…e the pool with remote replicas so that the few remaining local replicas don't get all the traffic.

Define a config with the threshold for minimum number of local replicas to do this.
  • Loading branch information
github-actions committed Nov 28, 2023
1 parent d641fd9 commit 533b3a3
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
13 changes: 13 additions & 0 deletions ambry-api/src/main/java/com/github/ambry/config/RouterConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ public class RouterConfig {
// This is a theoretical maximum value. Configured value may be much smaller since we might need to respond back to
// client with either success or failure much sooner.
public static final int MAX_OVERALL_TIMEOUT_VALUE_FOR_A_REQUEST_IN_MS = 60 * 60 * 1000;
// By default it is sufficient that there is only one local replica alive for a get request to prioritize local replicas.
// In effect this value always prioritizes local replicas.
public static final int DEFAULT_ROUTER_GET_OPERATION_MIN_LOCAL_REPLICA_COUNT_TO_PRIORITIZE_LOCAL = 1;

// config keys
public static final String ROUTER_SCALING_UNIT_COUNT = "router.scaling.unit.count";
Expand Down Expand Up @@ -147,6 +150,10 @@ public class RouterConfig {
public static final String ROUTER_GET_OPERATION_DEPRIORITIZE_BOOTSTRAP_REPLICAS =
"router.get.operation.deprioritize.bootstrap.replicas";

// minimum number of local replicas that should be live for a get request so that local replicas are prioritized.
public static final String ROUTER_GET_OPERATION_MIN_LOCAL_REPLICA_COUNT_TO_PRIORITIZE_LOCAL =
"router.get.operation.min.local.replica.count.to.prioritize.local";

/**
* Number of independent scaling units for the router.
*/
Expand Down Expand Up @@ -754,6 +761,10 @@ public class RouterConfig {
@Config(ROUTER_GET_OPERATION_DEPRIORITIZE_BOOTSTRAP_REPLICAS)
public final boolean routerGetOperationDeprioritizeBootstrapReplicas;

@Config(ROUTER_GET_OPERATION_MIN_LOCAL_REPLICA_COUNT_TO_PRIORITIZE_LOCAL)
@Default("1")
public final int routerGetOperationMinLocalReplicaCountToPrioritizeLocal;

/**
* Create a RouterConfig instance.
* @param verifiableProperties the properties map to refer to.
Expand Down Expand Up @@ -921,6 +932,8 @@ public RouterConfig(VerifiableProperties verifiableProperties) {
routerReservedMetadataEnabled = verifiableProperties.getBoolean(RESERVED_METADATA_ENABLED, false);
routerGetOperationDeprioritizeBootstrapReplicas =
verifiableProperties.getBoolean(ROUTER_GET_OPERATION_DEPRIORITIZE_BOOTSTRAP_REPLICAS, false);
routerGetOperationMinLocalReplicaCountToPrioritizeLocal =
verifiableProperties.getInt(ROUTER_GET_OPERATION_MIN_LOCAL_REPLICA_COUNT_TO_PRIORITIZE_LOCAL, DEFAULT_ROUTER_GET_OPERATION_MIN_LOCAL_REPLICA_COUNT_TO_PRIORITIZE_LOCAL);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ class SimpleOperationTracker implements OperationTracker {
List<ReplicaId> examinedReplicas = new ArrayList<>();
originatingDcName = originatingDcName == null ? reassignedOriginDc : originatingDcName;
int numLocalAndLiveReplicas = 0;
int numRemoteOriginatingDcAndLiveReplicas = 0;
for (ReplicaId replicaId : replicas) {
examinedReplicas.add(replicaId);
String replicaDcName = replicaId.getDataNodeId().getDatacenterName();
Expand All @@ -310,6 +311,7 @@ class SimpleOperationTracker implements OperationTracker {
numLocalAndLiveReplicas++;
addToBeginningOfPool(replicaId);
} else if (crossColoEnabled && isOriginatingDcReplica) {
numRemoteOriginatingDcAndLiveReplicas++;
addToEndOfPool(replicaId);
} else if (crossColoEnabled) {
backupReplicas.addFirst(replicaId);
Expand Down Expand Up @@ -349,6 +351,7 @@ class SimpleOperationTracker implements OperationTracker {
}

maybeDeprioritizeLocalBootstrapReplicas(numLocalAndLiveReplicas);
maybeShuffleWithRemoteReplicas(numLocalAndLiveReplicas, numRemoteOriginatingDcAndLiveReplicas);
totalReplicaCount = replicaPool.size();

// MockPartitionId.getReplicaIds() is returning a shared reference which may cause race condition.
Expand Down Expand Up @@ -614,6 +617,32 @@ void maybeDeprioritizeLocalBootstrapReplicas(int numLocalAndLiveReplicas) {
}
}

/**
* For get operations, if there are very few local replicas, then shuffle the pool with remote replicas so that the
* few (or one) remaining local replicas don't get all the traffic. The threshold for minimum number of local replicas
* is defined in {@link RouterConfig#routerGetOperationMinLocalReplicaCountToPrioritizeLocal}.
* @param numLocalAndLiveReplicas the number of local and live replicas.
* @param numRemoteOriginatingDcAndLiveReplicas the number of remote originating DC and live replicas.
*/
void maybeShuffleWithRemoteReplicas(int numLocalAndLiveReplicas, int numRemoteOriginatingDcAndLiveReplicas) {
if (isGetOperation() && numLocalAndLiveReplicas < routerConfig.routerGetOperationMinLocalReplicaCountToPrioritizeLocal) {
List<ReplicaId> replicasToReshuffle = new ArrayList<>();
if (numRemoteOriginatingDcAndLiveReplicas > 0) {
// If the local DC is not the originating DC, we shuffle only with originating DC replicas.
replicasToReshuffle.addAll(replicaPool.subList(0,
numLocalAndLiveReplicas + numRemoteOriginatingDcAndLiveReplicas));
} else {
replicasToReshuffle.addAll(replicaPool);
}
Collections.shuffle(replicasToReshuffle);
ListIterator<ReplicaId> iter = replicaPool.listIterator();
for (ReplicaId replicaId : replicasToReshuffle) {
iter.next();
iter.set(replicaId);
}
}
}

public boolean hasFailed() {
if (routerOperation == RouterOperation.PutOperation && routerConfig.routerPutUseDynamicSuccessTarget) {
return totalReplicaCount - failedCount < Math.max(totalReplicaCount - 1,
Expand Down

0 comments on commit 533b3a3

Please sign in to comment.