Skip to content

Commit

Permalink
fix: fixed cnpool scaling when inplace update is enabled (#566)
Browse files Browse the repository at this point in the history
* fix: fixed cnpool scale-in when enable inplace rolling

Signed-off-by: Aylei <[email protected]>

* fix draining recycle

Signed-off-by: Aylei <[email protected]>

* fix

Signed-off-by: Aylei <[email protected]>

---------

Signed-off-by: Aylei <[email protected]>
  • Loading branch information
aylei authored Nov 19, 2024
1 parent 1c0201f commit 581017c
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
6 changes: 3 additions & 3 deletions pkg/controllers/cnclaim/migrate.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@ func (r *Actor) migrate(ctx *recon.Context[*v1alpha1.CNClaim]) error {
case v1alpha1.CNPodPhaseBound:
// use connection migration to migrate workload from source to target pod
ctx.Log.Info("start draining source pod", "pod", source.Name)
if err := r.reclaimCN(ctx, source, deleteOnReclaim); err != nil {
if err := r.reclaimCN(ctx, source); err != nil {
return err
}
if err := r.reportProgress(ctx, source); err != nil {
return err
}
return recon.ErrReSync("source pod start draining, reqeue", migrationResyncInterval)
case v1alpha1.CNPodPhaseTerminating:
return recon.ErrReSync("source pod start draining, requeue", migrationResyncInterval)
case v1alpha1.CNPodPhaseTerminating, v1alpha1.CNPodPhaseIdle:
return r.completeMigration(ctx)
default:
return errors.Errorf("unknown pod phase: %s", source.Labels[v1alpha1.CNPodPhaseLabel])
Expand Down
11 changes: 7 additions & 4 deletions pkg/controllers/cnpool/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,20 +122,22 @@ func (r *Actor) Sync(ctx *recon.Context[*v1alpha1.CNPool]) error {
}

desiredReplicas := inUse + pendingClaims + p.Spec.Strategy.ScaleStrategy.MaxIdle
activeReplicas := inUse + int32(len(idlePods))
totalPods += desiredReplicas
if totalPods > maxPods {
return recon.ErrReSync(fmt.Sprintf("Pool %s has reached MaxPods limit %d, total Pods: %d, requeue", p.Name, totalPods, maxPods), time.Minute)
}
// ensure and scale desired CNSet to provide enough CN pods
err = recon.CreateOwnedOrUpdate(ctx, desired, func() error {
// apply update, since the CNSet revision hash is not changed, this must be an inplace-update
specReplicas := desired.Spec.Replicas
csSpec := p.Spec.Template.DeepCopy()
syncCNSetSpec(p, csSpec)
desired.Spec = *csSpec
ctx.Log.Info("scale cnset", "cnset", desired.Name, "replicas", desiredReplicas)
ctx.Log.Info("scale cnset", "cnset", desired.Name, "replicas", desiredReplicas, "spec replicas", specReplicas)
// sync terminating pods to delete
desired.Spec.PodsToDelete = podNames(terminatingPods)
if desired.Spec.Replicas > desiredReplicas {
if desiredReplicas <= specReplicas {
// CNSet is going to be scaled-in
if pendingClaims > 0 {
// don't scale-in if we still have pending claims
Expand All @@ -146,7 +148,8 @@ func (r *Actor) Sync(ctx *recon.Context[*v1alpha1.CNPool]) error {
"in use pods", inUse)
return nil
}
scaleInCount := desired.Spec.Replicas - desiredReplicas
// activeReplicas may be greater than desiredReplicas, we should scale-in more
scaleInCount := max(activeReplicas-desiredReplicas, specReplicas-desiredReplicas)
sortPodByDeletionOrder(idlePods)
if int32(len(idlePods)) > scaleInCount {
// pick first N to scale-in
Expand All @@ -165,7 +168,7 @@ func (r *Actor) Sync(ctx *recon.Context[*v1alpha1.CNPool]) error {
deleted = append(deleted, pod)
}
ctx.Log.Info("scale-in CN Pool complete", "deleted", len(deleted))
desired.Spec.Replicas = desired.Spec.Replicas - int32(len(deleted))
desired.Spec.Replicas = max(specReplicas-int32(len(deleted)), desiredReplicas)
desired.Spec.PodsToDelete = append(desired.Spec.PodsToDelete, podNames(deleted)...)
} else {
// scale-out, if we have terminating pods left, replace them
Expand Down

0 comments on commit 581017c

Please sign in to comment.