Skip to content

Commit

Permalink
Monitor emit ETCD certificate expiration
Browse files Browse the repository at this point in the history
For clusters running version <4.9, emit daysUntilExpiration
metric for etcd certificates present in openshift-etcd namespace
so SRE can be alerted
  • Loading branch information
SrinivasAtmakuri committed Sep 17, 2023
1 parent 3091979 commit ba1b099
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 0 deletions.
56 changes: 56 additions & 0 deletions pkg/monitor/cluster/certificateexpirationstatuses.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,22 @@ import (
"context"
"crypto/x509"
"fmt"
"math"
"strings"
"time"

operatorv1 "github.com/openshift/api/operator/v1"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/Azure/ARO-RP/pkg/operator"
"github.com/Azure/ARO-RP/pkg/operator/controllers/genevalogging"
utilcert "github.com/Azure/ARO-RP/pkg/util/cert"
"github.com/Azure/ARO-RP/pkg/util/dns"
"github.com/Azure/ARO-RP/pkg/util/pem"
"github.com/Azure/ARO-RP/pkg/util/version"
)

// Copyright (c) Microsoft Corporation.
Expand Down Expand Up @@ -94,3 +98,55 @@ func secretMissingMetric(namespace, name string) map[string]string {
"name": name,
}
}

func (mon *Monitor) emitEtcdCertificateExpiry(ctx context.Context) error {
cv, err := mon.getClusterVersion(ctx)
if err != nil {
return err
}
v, err := version.ParseVersion(actualVersion(cv))
if err != nil {
return err
}
// ETCD ceritificates are autorotated by the operator when close to expiry for cluster running 4.9+
if !v.Lt(version.NewVersion(4, 9)) {
return nil
}

secretList, err := mon.cli.CoreV1().Secrets("openshift-etcd").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("type=%s", corev1.SecretTypeTLS)})
if err != nil {
return err
}

certNearExpiry := false
minDaysUntilExpiration := math.MaxInt
for _, secret := range secretList.Items {
if strings.Contains(secret.ObjectMeta.Name, "etcd-peer") || strings.Contains(secret.ObjectMeta.Name, "etcd-serving") {
_, certs, err := pem.Parse(secret.Data[corev1.TLSCertKey])
if err != nil {
return err
}
if utilcert.IsLessThanMinimumDuration(certs[0], utilcert.DefaultMinDurationPercent) {
certNearExpiry = true
minDaysUntilExpiration = min(utilcert.DaysUntilExpiration(certs[0]), minDaysUntilExpiration)
}
}
}

if certNearExpiry {
mon.emitGauge("certificate.expirationdate", 1, map[string]string{
"daysUntilExpiration": fmt.Sprintf("%d", minDaysUntilExpiration),
"namespace": "openshift-etcd",
"name": "openshift-etcd-certificate",
})
}

return nil
}

func min(a, b int) int {
if a < b {
return a
}
return b
}
75 changes: 75 additions & 0 deletions pkg/monitor/cluster/certificateexpirationstatuses_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@ import (
"context"
"crypto/x509"
"encoding/pem"
"fmt"
"testing"
"time"

"github.com/golang/mock/gomock"
configv1 "github.com/openshift/api/config/v1"
operatorv1 "github.com/openshift/api/operator/v1"
configfake "github.com/openshift/client-go/config/clientset/versioned/fake"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
fakeClient "k8s.io/client-go/kubernetes/fake"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

Expand Down Expand Up @@ -231,3 +235,74 @@ func buildMonitor(m *mock_metrics.MockEmitter, domain, id string, secrets ...cli
},
}
}

func TestEtcdCertificateExpiry(t *testing.T) {
ctx := context.Background()
expiration := time.Now().Add(time.Microsecond * 60)
_, cert, err := utiltls.GenerateTestKeyAndCertificate("etcd-cert", nil, nil, false, false, tweakTemplateFn(expiration))
if err != nil {
t.Fatal(err)
}

for _, tt := range []struct {
name string
configcli *configfake.Clientset
cli *fakeClient.Clientset
minDaysUntilExpiration int
}{
{
name: "emit etcd certificate expiry",
configcli: configfake.NewSimpleClientset(
&configv1.ClusterVersion{
ObjectMeta: metav1.ObjectMeta{
Name: "version",
},
Status: configv1.ClusterVersionStatus{
History: []configv1.UpdateHistory{
{
State: configv1.CompletedUpdate,
Version: "4.8.1",
},
},
},
},
),
cli: fakeClient.NewSimpleClientset(
&corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "etcd-peer-master-0",
Namespace: "openshift-etcd",
},
Data: map[string][]byte{
corev1.TLSCertKey: pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: cert[0].Raw}),
},
Type: corev1.SecretTypeTLS,
},
),
minDaysUntilExpiration: 0,
},
} {
t.Run(tt.name, func(t *testing.T) {
controller := gomock.NewController(t)
defer controller.Finish()

m := mock_metrics.NewMockEmitter(controller)
mon := &Monitor{
cli: tt.cli,
configcli: tt.configcli,
m: m,
}

m.EXPECT().EmitGauge("certificate.expirationdate", int64(1), map[string]string{
"daysUntilExpiration": fmt.Sprintf("%d", tt.minDaysUntilExpiration),
"namespace": "openshift-etcd",
"name": "openshift-etcd-certificate",
})

err = mon.emitEtcdCertificateExpiry(ctx)
if err != nil {
t.Fatal(err)
}
})
}
}
1 change: 1 addition & 0 deletions pkg/monitor/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ func (mon *Monitor) Monitor(ctx context.Context) (errs []error) {
mon.emitOperatorFlagsAndSupportBanner,
mon.emitPucmState,
mon.emitCertificateExpirationStatuses,
mon.emitEtcdCertificateExpiry,
mon.emitPrometheusAlerts, // at the end for now because it's the slowest/least reliable
} {
err = f(ctx)
Expand Down
27 changes: 27 additions & 0 deletions pkg/util/cert/cert.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package cert

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"crypto/x509"
"time"
)

const DefaultMinDurationPercent = 0.20

// IsLessThanMinimumDuration indicates whether the provided cert has less
// than the provided minimum percentage of its duration remaining.
func IsLessThanMinimumDuration(cert *x509.Certificate, minDurationPercent float64) bool {
duration := cert.NotAfter.Sub(cert.NotBefore)
minDuration := time.Duration(float64(duration.Nanoseconds()) * DefaultMinDurationPercent)
return time.Now().After(cert.NotAfter.Add(-minDuration))
}

func IsCertExpired(cert *x509.Certificate) bool {
return time.Now().After(cert.NotAfter)
}

func DaysUntilExpiration(cert *x509.Certificate) int {
return int(time.Until(cert.NotAfter) / (24 * time.Hour))
}

0 comments on commit ba1b099

Please sign in to comment.