-
Notifications
You must be signed in to change notification settings - Fork 775
Upgrade masters last when upgrading ES clusters #8871
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 34 commits
9db32d0
39b2702
50b3954
00555c2
88cb347
790d3f1
4b944d1
d9885ba
efa8643
6914708
2dc664b
46c726c
fccf6c3
8feef24
0f5a31a
030fe16
6c9e2c5
0db51d8
57c71a9
c89e872
068fa54
0af6b85
54f9775
5dfdd05
edf3faf
a6d8edc
8cfa06c
c2e1161
1dcef6c
eb963e5
518d69d
16fd9ec
8273152
11cc0e6
645e088
3e95b2d
7f00388
5142908
4afe2e4
4b966db
6321a96
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,21 +7,27 @@ package driver | |
| import ( | ||
| "context" | ||
| "fmt" | ||
| "slices" | ||
|
|
||
| appsv1 "k8s.io/api/apps/v1" | ||
| apierrors "k8s.io/apimachinery/pkg/api/errors" | ||
| "k8s.io/utils/ptr" | ||
|
|
||
| esv1 "github.com/elastic/cloud-on-k8s/v3/pkg/apis/elasticsearch/v1" | ||
| "github.com/elastic/cloud-on-k8s/v3/pkg/controller/common" | ||
| "github.com/elastic/cloud-on-k8s/v3/pkg/controller/common/expectations" | ||
| "github.com/elastic/cloud-on-k8s/v3/pkg/controller/common/metadata" | ||
| sset "github.com/elastic/cloud-on-k8s/v3/pkg/controller/common/statefulset" | ||
| "github.com/elastic/cloud-on-k8s/v3/pkg/controller/common/version" | ||
| "github.com/elastic/cloud-on-k8s/v3/pkg/controller/elasticsearch/label" | ||
| "github.com/elastic/cloud-on-k8s/v3/pkg/controller/elasticsearch/nodespec" | ||
| "github.com/elastic/cloud-on-k8s/v3/pkg/controller/elasticsearch/reconcile" | ||
| "github.com/elastic/cloud-on-k8s/v3/pkg/controller/elasticsearch/settings" | ||
| es_sset "github.com/elastic/cloud-on-k8s/v3/pkg/controller/elasticsearch/sset" | ||
| "github.com/elastic/cloud-on-k8s/v3/pkg/controller/elasticsearch/version/zen1" | ||
| "github.com/elastic/cloud-on-k8s/v3/pkg/controller/elasticsearch/version/zen2" | ||
| "github.com/elastic/cloud-on-k8s/v3/pkg/utils/k8s" | ||
| ulog "github.com/elastic/cloud-on-k8s/v3/pkg/utils/log" | ||
| ) | ||
|
|
||
| type upscaleCtx struct { | ||
|
|
@@ -66,35 +72,116 @@ func HandleUpscaleAndSpecChanges( | |
| if err != nil { | ||
| return results, fmt.Errorf("adjust resources: %w", err) | ||
| } | ||
| // reconcile all resources | ||
| for _, res := range adjusted { | ||
| res := res | ||
| if err := settings.ReconcileConfig(ctx.parentCtx, ctx.k8sClient, ctx.es, res.StatefulSet.Name, res.Config, ctx.meta); err != nil { | ||
| return results, fmt.Errorf("reconcile config: %w", err) | ||
|
|
||
| // Check if this is a version upgrade | ||
| isVersionUpgrade, err := isVersionUpgrade(ctx.es) | ||
| if err != nil { | ||
| return results, fmt.Errorf("while checking for version upgrade: %w", err) | ||
| } | ||
|
|
||
| // If this is not a version upgrade, process all resources normally and return | ||
| if !isVersionUpgrade { | ||
| actualStatefulSets, requeue, err := reconcileResources(ctx, actualStatefulSets, adjusted) | ||
| if err != nil { | ||
| return results, fmt.Errorf("while reconciling resources: %w", err) | ||
| } | ||
| if _, err := common.ReconcileService(ctx.parentCtx, ctx.k8sClient, &res.HeadlessService, &ctx.es); err != nil { | ||
| return results, fmt.Errorf("reconcile service: %w", err) | ||
| results.Requeue = requeue | ||
| results.ActualStatefulSets = actualStatefulSets | ||
| return results, nil | ||
| } | ||
|
|
||
| // Version upgrade: separate master and non-master StatefulSets | ||
| var masterResources, nonMasterResources []nodespec.Resources | ||
| for _, res := range adjusted { | ||
| if label.IsMasterNodeSet(res.StatefulSet) { | ||
| masterResources = append(masterResources, res) | ||
| } else { | ||
| nonMasterResources = append(nonMasterResources, res) | ||
| } | ||
| if actualSset, exists := actualStatefulSets.GetByName(res.StatefulSet.Name); exists { | ||
| recreateSset, err := handleVolumeExpansion(ctx.parentCtx, ctx.k8sClient, ctx.es, res.StatefulSet, actualSset, ctx.validateStorageClass) | ||
| if err != nil { | ||
| return results, fmt.Errorf("handle volume expansion: %w", err) | ||
| } | ||
| if recreateSset { | ||
| // The StatefulSet is scheduled for recreation: let's requeue before attempting any further spec change. | ||
| results.Requeue = true | ||
| } | ||
|
|
||
| // The only adjustment we want to make to master statefulSets before ensuring that all non-master | ||
| // statefulSets have been reconciled is to scale up the replicas to the expected number. | ||
| // The only adjustment we want to make to master statefulSets before ensuring that all non-master | ||
| // statefulSets have been reconciled is to potentially scale up the replicas | ||
| // which should happen 1 at a time as we adjust the replicas early. | ||
| if err = maybeUpscaleMasterResources(ctx, masterResources); err != nil { | ||
naemono marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return results, fmt.Errorf("while scaling up master resources: %w", err) | ||
| } | ||
|
|
||
| // First, reconcile all non-master resources | ||
| actualStatefulSets, requeue, err := reconcileResources(ctx, actualStatefulSets, nonMasterResources) | ||
| if err != nil { | ||
| return results, fmt.Errorf("while reconciling non-master resources: %w", err) | ||
| } | ||
| results.ActualStatefulSets = actualStatefulSets | ||
|
|
||
| if requeue { | ||
| results.Requeue = true | ||
| return results, nil | ||
pebrc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| targetVersion, err := version.Parse(ctx.es.Spec.Version) | ||
| if err != nil { | ||
| return results, fmt.Errorf("while parsing Elasticsearch upgrade target version: %w", err) | ||
| } | ||
|
|
||
| // Check if all non-master StatefulSets have completed their upgrades before proceeding with master StatefulSets | ||
| pendingNonMasterSTS, err := findPendingNonMasterStatefulSetUpgrades( | ||
| ctx.k8sClient, | ||
| actualStatefulSets, | ||
| expectedResources.StatefulSets(), | ||
| targetVersion, | ||
| expectations.NewExpectations(ctx.k8sClient), | ||
naemono marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ) | ||
| if err != nil { | ||
| return results, fmt.Errorf("while checking non-master upgrade status: %w", err) | ||
| } | ||
|
|
||
| ctx.upscaleReporter.RecordPendingNonMasterSTSUpgrades(pendingNonMasterSTS) | ||
|
|
||
| if len(pendingNonMasterSTS) > 0 { | ||
| // Non-master StatefulSets are still upgrading, skipping master StatefulSets temporarily. | ||
| // This will cause a requeue in the caller, and master StatefulSets will attempt to be processed in the next reconciliation | ||
| return results, nil | ||
| } | ||
|
|
||
| // All non-master StatefulSets are upgraded, now process master StatefulSets | ||
| actualStatefulSets, results.Requeue, err = reconcileResources(ctx, actualStatefulSets, masterResources) | ||
| if err != nil { | ||
| return results, fmt.Errorf("while reconciling master resources: %w", err) | ||
| } | ||
|
|
||
| results.ActualStatefulSets = actualStatefulSets | ||
| return results, nil | ||
| } | ||
|
|
||
| func maybeUpscaleMasterResources(ctx upscaleCtx, masterResources []nodespec.Resources) error { | ||
| // Upscale master StatefulSets using the adjusted resources and read the current StatefulSet | ||
| // from k8s to get the latest state. | ||
| for _, res := range masterResources { | ||
| stsName := res.StatefulSet.Name | ||
|
|
||
| // Read the current StatefulSet from k8s to get the latest state | ||
| var actualSset appsv1.StatefulSet | ||
| if err := ctx.k8sClient.Get(ctx.parentCtx, k8s.ExtractNamespacedName(&res.StatefulSet), &actualSset); err != nil { | ||
| if apierrors.IsNotFound(err) { | ||
naemono marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| continue | ||
| } | ||
| return fmt.Errorf("while getting master StatefulSet %s: %w", stsName, err) | ||
| } | ||
| reconciled, err := es_sset.ReconcileStatefulSet(ctx.parentCtx, ctx.k8sClient, ctx.es, res.StatefulSet, ctx.expectations) | ||
| if err != nil { | ||
| return results, fmt.Errorf("reconcile StatefulSet: %w", err) | ||
|
|
||
| actualReplicas := sset.GetReplicas(actualSset) | ||
| targetReplicas := sset.GetReplicas(res.StatefulSet) | ||
|
|
||
| if actualReplicas < targetReplicas { | ||
| actualSset.Spec.Replicas = ptr.To[int32](targetReplicas) | ||
naemono marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
barkbay marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if err := ctx.k8sClient.Update(ctx.parentCtx, &actualSset); err != nil { | ||
naemono marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
naemono marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return fmt.Errorf("while upscaling master sts replicas: %w", err) | ||
| } | ||
| } | ||
| // update actual with the reconciled ones for next steps to work with up-to-date information | ||
| actualStatefulSets = actualStatefulSets.WithStatefulSet(reconciled) | ||
| } | ||
| results.ActualStatefulSets = actualStatefulSets | ||
| return results, nil | ||
| return nil | ||
| } | ||
|
|
||
| func podsToCreate( | ||
|
|
@@ -166,3 +253,114 @@ func adjustStatefulSetReplicas( | |
|
|
||
| return expected, nil | ||
| } | ||
|
|
||
| // reconcileResources handles the common StatefulSet reconciliation logic | ||
| // It returns: | ||
| // - the updated StatefulSets | ||
| // - whether a requeue is needed | ||
| // - any errors that occurred | ||
| func reconcileResources( | ||
| ctx upscaleCtx, | ||
| actualStatefulSets es_sset.StatefulSetList, | ||
| resources []nodespec.Resources, | ||
| ) (es_sset.StatefulSetList, bool, error) { | ||
naemono marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| requeue := false | ||
| ulog.FromContext(ctx.parentCtx).Info("Reconciling resources", "resource_size", len(resources)) | ||
| for _, res := range resources { | ||
| res := res | ||
| if err := settings.ReconcileConfig(ctx.parentCtx, ctx.k8sClient, ctx.es, res.StatefulSet.Name, res.Config, ctx.meta); err != nil { | ||
| return actualStatefulSets, false, fmt.Errorf("reconcile config: %w", err) | ||
| } | ||
| if _, err := common.ReconcileService(ctx.parentCtx, ctx.k8sClient, &res.HeadlessService, &ctx.es); err != nil { | ||
| return actualStatefulSets, false, fmt.Errorf("reconcile service: %w", err) | ||
| } | ||
| if actualSset, exists := actualStatefulSets.GetByName(res.StatefulSet.Name); exists { | ||
| recreateSset, err := handleVolumeExpansion(ctx.parentCtx, ctx.k8sClient, ctx.es, res.StatefulSet, actualSset, ctx.validateStorageClass) | ||
| if err != nil { | ||
| return actualStatefulSets, false, fmt.Errorf("handle volume expansion: %w", err) | ||
| } | ||
| if recreateSset { | ||
| ulog.FromContext(ctx.parentCtx).Info("StatefulSet is scheduled for recreation, requeuing", "name", res.StatefulSet.Name) | ||
| // The StatefulSet is scheduled for recreation: let's requeue before attempting any further spec change. | ||
| requeue = true | ||
| continue | ||
| } | ||
| } else if !exists { | ||
naemono marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ulog.FromContext(ctx.parentCtx).Info("StatefulSet does not exist", "name", res.StatefulSet.Name) | ||
| } | ||
| ulog.FromContext(ctx.parentCtx).Info("Reconciling StatefulSet", "name", res.StatefulSet.Name) | ||
| reconciled, err := es_sset.ReconcileStatefulSet(ctx.parentCtx, ctx.k8sClient, ctx.es, res.StatefulSet, ctx.expectations) | ||
| if err != nil { | ||
| return actualStatefulSets, false, fmt.Errorf("reconcile StatefulSet: %w", err) | ||
| } | ||
| // update actual with the reconciled ones for next steps to work with up-to-date information | ||
| actualStatefulSets = actualStatefulSets.WithStatefulSet(reconciled) | ||
| } | ||
| ulog.FromContext(ctx.parentCtx).Info("Resources reconciled", "actualStatefulSets_size", len(actualStatefulSets), "requeue", requeue) | ||
| return actualStatefulSets, requeue, nil | ||
| } | ||
|
|
||
| // findPendingNonMasterStatefulSetUpgrades finds all non-master StatefulSets that have not completed their upgrades | ||
| func findPendingNonMasterStatefulSetUpgrades( | ||
naemono marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| client k8s.Client, | ||
| actualStatefulSets es_sset.StatefulSetList, | ||
| expectedStatefulSets es_sset.StatefulSetList, | ||
| targetVersion version.Version, | ||
| expectations *expectations.Expectations, | ||
| ) ([]appsv1.StatefulSet, error) { | ||
| pendingStatefulSet, err := expectations.ExpectedStatefulSetUpdates.PendingGenerations() | ||
naemono marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| pendingNonMasterSTS := make([]appsv1.StatefulSet, 0) | ||
| for _, actualStatefulSet := range actualStatefulSets { | ||
| expectedSset, _ := expectedStatefulSets.GetByName(actualStatefulSet.Name) | ||
|
|
||
| // Skip master StatefulSets. We check both here because the master role may have been added | ||
| // to a non-master StatefulSet during the upgrade spec change. | ||
| if label.IsMasterNodeSet(actualStatefulSet) || label.IsMasterNodeSet(expectedSset) { | ||
| continue | ||
| } | ||
|
|
||
| // If the expectations show this as a pending StatefulSet, add it to the list. | ||
| if slices.Contains(pendingStatefulSet, actualStatefulSet.Name) { | ||
| pendingNonMasterSTS = append(pendingNonMasterSTS, actualStatefulSet) | ||
| continue | ||
| } | ||
|
|
||
| // If the StatefulSet is not at the target version, it is not upgraded | ||
| // so don't even bother looking at the state/status of the StatefulSet. | ||
naemono marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| actualVersion, err := es_sset.GetESVersion(actualStatefulSet) | ||
| if err != nil { | ||
| return pendingNonMasterSTS, err | ||
| } | ||
| if actualVersion.LT(targetVersion) { | ||
| pendingNonMasterSTS = append(pendingNonMasterSTS, actualStatefulSet) | ||
| continue | ||
| } | ||
|
|
||
naemono marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // Check if this StatefulSet has pending updates | ||
| if actualStatefulSet.Status.UpdatedReplicas != actualStatefulSet.Status.Replicas { | ||
| pendingNonMasterSTS = append(pendingNonMasterSTS, actualStatefulSet) | ||
| continue | ||
| } | ||
|
|
||
| // Check if there are any pods that need to be upgraded | ||
| pods, err := es_sset.GetActualPodsForStatefulSet(client, k8s.ExtractNamespacedName(&actualStatefulSet)) | ||
| if err != nil { | ||
| return pendingNonMasterSTS, err | ||
| } | ||
|
|
||
| for _, pod := range pods { | ||
| // Check if pod revision matches StatefulSet update revision | ||
| if actualStatefulSet.Status.UpdateRevision != "" && sset.PodRevision(pod) != actualStatefulSet.Status.UpdateRevision { | ||
| // This pod still needs to be upgraded | ||
| pendingNonMasterSTS = append(pendingNonMasterSTS, actualStatefulSet) | ||
| continue | ||
naemono marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
| } | ||
|
|
||
| return pendingNonMasterSTS, nil | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.