-
Notifications
You must be signed in to change notification settings - Fork 169
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
GenevaAction for etcd Certificate Renewal
- Loading branch information
1 parent
e4d4c73
commit abbb72b
Showing
3 changed files
with
397 additions
and
0 deletions.
There are no files selected for viewing
368 changes: 368 additions & 0 deletions
368
pkg/frontend/admin_openshiftcluster_etcdcertificaterenew.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,368 @@ | ||
package frontend | ||
|
||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the Apache License 2.0. | ||
|
||
import ( | ||
"context" | ||
"encoding/json" | ||
"fmt" | ||
"net/http" | ||
"path/filepath" | ||
"strings" | ||
"time" | ||
|
||
"github.com/go-chi/chi/v5" | ||
configv1 "github.com/openshift/api/config/v1" | ||
operatorv1 "github.com/openshift/api/operator/v1" | ||
"github.com/sirupsen/logrus" | ||
"github.com/ugorji/go/codec" | ||
corev1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" | ||
kruntime "k8s.io/apimachinery/pkg/runtime" | ||
|
||
"github.com/Azure/ARO-RP/pkg/api" | ||
"github.com/Azure/ARO-RP/pkg/database/cosmosdb" | ||
"github.com/Azure/ARO-RP/pkg/frontend/adminactions" | ||
"github.com/Azure/ARO-RP/pkg/frontend/middleware" | ||
utilcert "github.com/Azure/ARO-RP/pkg/util/cert" | ||
utilpem "github.com/Azure/ARO-RP/pkg/util/pem" | ||
"github.com/Azure/ARO-RP/pkg/util/steps" | ||
"github.com/Azure/ARO-RP/pkg/util/version" | ||
) | ||
|
||
type etcdrenew struct { | ||
log *logrus.Entry | ||
k adminactions.KubeActions | ||
secretNames []string | ||
mode string | ||
backupSecrets map[string][]byte | ||
} | ||
|
||
var etcdOperatorControllerConditionsExpected = map[string]operatorv1.ConditionStatus{ | ||
"EtcdCertSignerControllerDegraded": operatorv1.ConditionFalse, | ||
"EtcdMembersAvailable": operatorv1.ConditionTrue, | ||
"NodeInstallerProgressing": operatorv1.ConditionFalse, | ||
"NodeControllerDegraded": operatorv1.ConditionFalse, | ||
"EtcdMembersProgressing": operatorv1.ConditionFalse, | ||
} | ||
|
||
var etcdOperatorConditionsExpected = map[configv1.ClusterStatusConditionType]configv1.ConditionStatus{ | ||
configv1.OperatorAvailable: configv1.ConditionTrue, | ||
configv1.OperatorProgressing: configv1.ConditionFalse, | ||
configv1.OperatorDegraded: configv1.ConditionFalse, | ||
} | ||
|
||
func (f *frontend) postAdminOpenShiftClusterEtcdCertificateRenew(w http.ResponseWriter, r *http.Request) { | ||
ctx := r.Context() | ||
log := ctx.Value(middleware.ContextKeyLog).(*logrus.Entry) | ||
r.URL.Path = filepath.Dir(r.URL.Path) | ||
|
||
err := f._postAdminOpenShiftClusterEtcdCertificateRenew(ctx, r, log) | ||
|
||
adminReply(log, w, nil, nil, err) | ||
} | ||
|
||
// validate cluster is <4.9 and etcd is in expected state | ||
func (e *etcdrenew) validate(ctx context.Context) error { | ||
s := []steps.Step{ | ||
steps.Action(e.validateEtcdOperatorControllersState), | ||
steps.Action(e.validateEtcdOperatorState), | ||
steps.Action(e.validateEtcdCertsExistsAndExpiry), | ||
} | ||
_, err := steps.Run(ctx, e.log, 10*time.Second, s, nil) | ||
if err != nil { | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
func (e *etcdrenew) isRenewed(ctx context.Context) error { | ||
s := []steps.Step{ | ||
steps.Condition(e.isRevisied, 30*time.Minute, true), | ||
} | ||
_, err := steps.Run(ctx, e.log, 10*time.Second, s, nil) | ||
if err != nil { | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
func (e *etcdrenew) backupAndDelete(ctx context.Context) error { | ||
s := []steps.Step{ | ||
steps.Action(e.backupEtcdSecrets), | ||
steps.Action(e.deleteEtcdSecrets), | ||
} | ||
_, err := steps.Run(ctx, e.log, 10*time.Second, s, nil) | ||
if err != nil { | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
func (f *frontend) _postAdminOpenShiftClusterEtcdCertificateRenew(ctx context.Context, r *http.Request, log *logrus.Entry) error { | ||
resourceName := chi.URLParam(r, "resourceName") | ||
resourceType := chi.URLParam(r, "resourceType") | ||
resourceGroupName := chi.URLParam(r, "resourceGroupName") | ||
|
||
resourceID := strings.TrimPrefix(r.URL.Path, "/admin") | ||
|
||
doc, err := f.dbOpenShiftClusters.Get(ctx, resourceID) | ||
switch { | ||
case cosmosdb.IsErrorStatusCode(err, http.StatusNotFound): | ||
return api.NewCloudError(http.StatusNotFound, api.CloudErrorCodeResourceNotFound, "", "The Resource '%s/%s' under resource group '%s' was not found.", resourceType, resourceName, resourceGroupName) | ||
case err != nil: | ||
return err | ||
} | ||
|
||
k, err := f.kubeActionsFactory(log, f.env, doc.OpenShiftCluster) | ||
if err != nil { | ||
return err | ||
} | ||
e := &etcdrenew{ | ||
log: log, | ||
k: k, | ||
secretNames: nil, | ||
mode: "renew", | ||
} | ||
|
||
if err = e.validateClusterVersion(ctx); err != nil { | ||
return err | ||
} | ||
if err = e.validate(ctx); err != nil { | ||
return err | ||
} | ||
|
||
// Fetch secretNames using nodeNames | ||
masterNodeNames, err := fetchNodeNames(ctx, k, log) | ||
if err != nil { | ||
return err | ||
} | ||
if len(masterNodeNames) != 3 { | ||
return api.NewCloudError(http.StatusForbidden, api.CloudErrorCodeForbidden, "", "The cluster doesn't have 3 master nodes") | ||
} | ||
|
||
for _, nodeName := range masterNodeNames { | ||
for _, prefix := range []string{"etcd-peer-", "etcd-serving-", "etcd-serving-metrics-"} { | ||
e.secretNames = append(e.secretNames, prefix+nodeName) | ||
} | ||
} | ||
|
||
// backup and delete etcd secrets | ||
if err = e.backupAndDelete(ctx); err != nil { | ||
return err | ||
} | ||
|
||
// Calling Sleep method | ||
e.log.Infoln("Entering sleep... 3mins") | ||
time.Sleep(3 * time.Minute) | ||
|
||
if err = e.isRenewed(ctx); err != nil { | ||
e.mode = "recovery" | ||
} else { | ||
e.mode = "renewed" | ||
} | ||
|
||
if e.mode == "renewed" { | ||
if err = e.validate(ctx); err != nil { | ||
return err | ||
} | ||
e.log.Infoln("Done") | ||
} | ||
|
||
if e.mode == "recovery" { | ||
e.log.Println("Attempting to recover from backup") | ||
if err = e.recoverEtcdSecrets(ctx); err != nil { | ||
return err | ||
} | ||
e.log.Infoln("Recovered") | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (e *etcdrenew) validateClusterVersion(ctx context.Context) error { | ||
e.log.Infoln("validating cluster version now") | ||
rawCV, err := e.k.KubeGet(ctx, "clusterversion", "", "version") | ||
if err != nil { | ||
return err | ||
} | ||
cv := &configv1.ClusterVersion{} | ||
err = codec.NewDecoderBytes(rawCV, &codec.JsonHandle{}).Decode(cv) | ||
if err != nil { | ||
return api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", fmt.Sprintf("failed to decode clusterversion, %s", err.Error())) | ||
} | ||
clusterVersion, err := version.GetClusterVersion(cv) | ||
if err != nil { | ||
return err | ||
} | ||
// ETCD ceritificates are autorotated by the operator when close to expiry for cluster running 4.9+ | ||
if clusterVersion.Lt(version.NewVersion(4, 9)) { | ||
return api.NewCloudError(http.StatusForbidden, api.CloudErrorCodeForbidden, "", "etcd certificate renewal is not needed for cluster running version 4.9+") | ||
} | ||
return nil | ||
} | ||
|
||
func (e *etcdrenew) validateEtcdOperatorControllersState(ctx context.Context) error { | ||
e.log.Infoln("validating etcdOperator Controllers state now") | ||
rawEtcd, err := e.k.KubeGet(ctx, "Etcd", "", "cluster") | ||
if err != nil { | ||
return api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", err.Error()) | ||
} | ||
etcd := &operatorv1.Etcd{} | ||
err = codec.NewDecoderBytes(rawEtcd, &codec.JsonHandle{}).Decode(etcd) | ||
if err != nil { | ||
return api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", fmt.Sprintf("failed to decode etcd object, %s", err.Error())) | ||
} | ||
for _, c := range etcd.Status.Conditions { | ||
if _, ok := etcdOperatorControllerConditionsExpected[c.Type]; !ok { | ||
continue | ||
} | ||
if etcdOperatorControllerConditionsExpected[c.Type] != c.Status && e.mode == "renewed" { | ||
return api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", "%s is in state %s, quiting.", c.Type, c.Status) | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func (e *etcdrenew) isRevisied(ctx context.Context) (bool, error) { | ||
isAtRevision := true | ||
rawEtcd, err := e.k.KubeGet(ctx, "Etcd", "", "cluster") | ||
if err != nil { | ||
return false, api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", err.Error()) | ||
} | ||
etcd := &operatorv1.Etcd{} | ||
err = codec.NewDecoderBytes(rawEtcd, &codec.JsonHandle{}).Decode(etcd) | ||
if err != nil { | ||
return false, api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", fmt.Sprintf("failed to decode etcd object, %s", err.Error())) | ||
} | ||
for _, s := range etcd.Status.NodeStatuses { | ||
fmt.Println("Latest Revision is %s", etcd.Status.LatestAvailableRevision) | ||
if s.CurrentRevision != etcd.Status.LatestAvailableRevision { | ||
isAtRevision = false | ||
} | ||
} | ||
return isAtRevision, nil | ||
} | ||
|
||
func (e *etcdrenew) validateEtcdOperatorState(ctx context.Context) error { | ||
e.log.Infoln("validating Etcd Operator state") | ||
rawEtcdOperator, err := e.k.KubeGet(ctx, "clusteroperator", "", "etcd") | ||
if err != nil { | ||
return api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", err.Error()) | ||
} | ||
etcdOperator := &configv1.ClusterOperator{} | ||
err = codec.NewDecoderBytes(rawEtcdOperator, &codec.JsonHandle{}).Decode(etcdOperator) | ||
if err != nil { | ||
return api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", fmt.Sprintf("failed to decode etcd operator, %s", err.Error())) | ||
} | ||
for _, c := range etcdOperator.Status.Conditions { | ||
if _, ok := etcdOperatorConditionsExpected[c.Type]; !ok { | ||
continue | ||
} | ||
if etcdOperatorConditionsExpected[c.Type] != c.Status && e.mode == "renewed" { | ||
return api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", "Etcd Operator is not in expected state, quiting.") | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func fetchNodeNames(ctx context.Context, k adminactions.KubeActions, log *logrus.Entry) ([]string, error) { | ||
var masterNodeNames []string | ||
var u unstructured.Unstructured | ||
var nodes corev1.NodeList | ||
|
||
nodeList, err := k.KubeList(ctx, "node", "") | ||
if err != nil { | ||
return nil, err | ||
} | ||
if err = json.Unmarshal(nodeList, &u); err != nil { | ||
return nil, err | ||
} | ||
err = kruntime.DefaultUnstructuredConverter.FromUnstructured(u.Object, &nodes) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
for _, node := range nodes.Items { | ||
if _, ok := node.ObjectMeta.Labels["node-role.kubernetes.io/master"]; ok { | ||
masterNodeNames = append(masterNodeNames, node.ObjectMeta.Name) | ||
continue | ||
} | ||
} | ||
return masterNodeNames, nil | ||
} | ||
|
||
func (e *etcdrenew) validateEtcdCertsExistsAndExpiry(ctx context.Context) error { | ||
e.log.Infoln("validating etcd certs exists, not expired but are not close to expiry") | ||
for _, secretname := range e.secretNames { | ||
cert, err := e.k.KubeGet(ctx, "Secret", namespaceEtcds, secretname) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
var u unstructured.Unstructured | ||
var secret corev1.Secret | ||
if err = json.Unmarshal(cert, &u); err != nil { | ||
return err | ||
} | ||
err = kruntime.DefaultUnstructuredConverter.FromUnstructured(u.Object, &secret) | ||
if err != nil { | ||
return err | ||
} | ||
_, certData, err := utilpem.Parse(secret.Data[corev1.TLSCertKey]) | ||
if err != nil { | ||
return err | ||
} | ||
if !utilcert.IsLessThanMinimumDuration(certData[0], utilcert.DefaultMinDurationPercent) && e.mode == "renewed" { | ||
return api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", "secret %s is not near expiry, quitting", secretname) | ||
} | ||
if utilcert.IsCertExpired(certData[0]) { | ||
return api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", "secret %s is already expired, quitting", secretname) | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (e *etcdrenew) backupEtcdSecrets(ctx context.Context) error { | ||
e.log.Infoln("backing up etcd secrets now") | ||
for _, secretname := range e.secretNames { | ||
e.log.Infof("Backing up secret %s", secretname) | ||
data, err := e.k.KubeGet(ctx, "Secret", namespaceEtcds, secretname) | ||
if err != nil { | ||
return err | ||
} | ||
e.backupSecrets[secretname] = data | ||
} | ||
return nil | ||
} | ||
|
||
func (e *etcdrenew) deleteEtcdSecrets(ctx context.Context) error { | ||
e.log.Infoln("deleting etcd secrets now") | ||
for _, secretname := range e.secretNames { | ||
e.log.Infof("Deleting secret %s", secretname) | ||
err := e.k.KubeDelete(ctx, "Secret", namespaceEtcds, secretname, false, nil) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func (e *etcdrenew) recoverEtcdSecrets(ctx context.Context) error { | ||
e.log.Infoln("recovering etcd secrets now") | ||
for secretname, data := range e.backupSecrets { | ||
e.log.Infof("Recovering secret %s", secretname) | ||
obj := &unstructured.Unstructured{} | ||
err := obj.UnmarshalJSON(data) | ||
if err != nil { | ||
return api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidRequestContent, "", "The request content was invalid and could not be deserialized: %q.", err) | ||
} | ||
err = e.k.KubeCreateOrUpdate(ctx, obj) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.