Skip to content

Commit

Permalink
Monitor emit ETCD certificate expiration
Browse files Browse the repository at this point in the history
For clusters running version <4.9, emit daysUntilExpiration
metric for etcd certificates present in openshift-etcd namespace
so SRE can be alerted
  • Loading branch information
SrinivasAtmakuri committed Aug 28, 2023
1 parent e6442d7 commit 35cfd59
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 0 deletions.
1 change: 1 addition & 0 deletions pkg/monitor/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ func (mon *Monitor) Monitor(ctx context.Context) (errs []error) {
mon.emitHiveRegistrationStatus,
mon.emitOperatorFlagsAndSupportBanner,
mon.emitPucmState,
mon.emitEtcdCertificateExpiry,
mon.emitPrometheusAlerts, // at the end for now because it's the slowest/least reliable
} {
err = f(ctx)
Expand Down
66 changes: 66 additions & 0 deletions pkg/monitor/cluster/etcdcertificateexpiry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package cluster

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"fmt"
"strings"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/Azure/ARO-RP/pkg/util/certificate"
utilpem "github.com/Azure/ARO-RP/pkg/util/pem"
"github.com/Azure/ARO-RP/pkg/util/version"
)

func (mon *Monitor) emitEtcdCertificateExpiry(ctx context.Context) error {
cv, err := mon.getClusterVersion(ctx)
if err != nil {
return err
}
v, err := version.ParseVersion(actualVersion(cv))
if err != nil {
return err
}
// ETCD ceritificates are autorotated by the operator when close to expiry for cluster running 4.9+
if !v.Lt(version.NewVersion(4, 9)) {
return nil
}

secretList, err := mon.cli.CoreV1().Secrets("openshift-etcd").List(ctx, metav1.ListOptions{})
if err != nil {
return err
}

isAtleastSingleCertNearExpiry := false
minDaysUntilExpiration := 0
for _, secret := range secretList.Items {
if strings.Contains(secret.ObjectMeta.Name, "etcd-peer") || strings.Contains(secret.ObjectMeta.Name, "etcd-serving") && secret.Type == corev1.SecretTypeTLS {
_, certs, err := utilpem.Parse(secret.Data[corev1.TLSCertKey])
if err != nil {
return err
}

if certificate.LessThanMinimumDuration(certs[0], certificate.DefaultMinDurationPercent) {
isAtleastSingleCertNearExpiry = true
daysUntilExpiration := certificate.DaysUntilExpiration(certs[0])
fmt.Println(daysUntilExpiration)
if minDaysUntilExpiration < daysUntilExpiration {
minDaysUntilExpiration = daysUntilExpiration
}
}
}
}
if isAtleastSingleCertNearExpiry {
mon.emitGauge("certificate.expirationdate", 1, map[string]string{
"daysUntilExpiration": fmt.Sprintf("%d", minDaysUntilExpiration),
"namespace": "openshift-etcd",
"name": "openshift-etcd-certificate",
})
}

return nil
}
111 changes: 111 additions & 0 deletions pkg/monitor/cluster/etcdcertificateexpiry_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package cluster

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"crypto/x509"
"encoding/pem"
"fmt"
"testing"
"time"

"github.com/golang/mock/gomock"
configv1 "github.com/openshift/api/config/v1"
configfake "github.com/openshift/client-go/config/clientset/versioned/fake"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"

mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
utiltls "github.com/Azure/ARO-RP/pkg/util/tls"
)

func TestEtcdCertificateExpiry(t *testing.T) {
ctx := context.Background()
expiration := time.Now().Add(time.Microsecond * 60)
_, cert, err := utiltls.GenerateTestKeyAndCertificate("etcd-cert", nil, nil, false, false, tweakTemplateFn(expiration))
if err != nil {
t.Fatal(err)
}

for _, tt := range []struct {
name string
configcli *configfake.Clientset
cli *fake.Clientset
toExpire time.Time
minDaysUntilExpiration int
certSubject string
expiration time.Time
}{
{
name: "emit etcd certificate expiry",
configcli: configfake.NewSimpleClientset(
&configv1.ClusterVersion{
ObjectMeta: metav1.ObjectMeta{
Name: "version",
},
Status: configv1.ClusterVersionStatus{
History: []configv1.UpdateHistory{
{
State: configv1.CompletedUpdate,
Version: "4.8.1",
},
},
},
},
),
cli: fake.NewSimpleClientset(
&corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "etcd-peer",
Namespace: "openshift-etcd",
},
Data: map[string][]byte{
corev1.TLSCertKey: pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: cert[0].Raw}),
},
},
&corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "etcd-serving",
Namespace: "openshift-etcd",
},
Data: map[string][]byte{
corev1.TLSCertKey: pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: cert[0].Raw}),
},
},
),
minDaysUntilExpiration: 0,
},
} {
t.Run(tt.name, func(t *testing.T) {
controller := gomock.NewController(t)
defer controller.Finish()

m := mock_metrics.NewMockEmitter(controller)
mon := &Monitor{
cli: tt.cli,
configcli: tt.configcli,
m: m,
}

m.EXPECT().EmitGauge("certificate.expirationdate", int64(1), map[string]string{
"daysUntilExpiration": fmt.Sprintf("%d", tt.minDaysUntilExpiration),
"namespace": "openshift-etcd",
"name": "openshift-etcd-certificate",
})

err = mon.emitEtcdCertificateExpiry(ctx)
if err != nil {
t.Fatal(err)
}
})
}
}

func tweakTemplateFn(expiration time.Time) func(*x509.Certificate) {
return func(template *x509.Certificate) {
template.NotAfter = expiration
}
}
30 changes: 30 additions & 0 deletions pkg/util/certificate/certificate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package certificate

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"crypto/x509"
"math"
"time"
)

const DefaultMinDurationPercent = 0.20

// lessThanMinimumDuration indicates whether the provided cert has less
// than the provided minimum percentage of its duration remaining.
func LessThanMinimumDuration(cert *x509.Certificate, minDurationPercent float64) bool {
expiry := cert.NotAfter
duration := expiry.Sub(cert.NotBefore)
minDuration := time.Duration(float64(duration.Nanoseconds()) * DefaultMinDurationPercent)
replacementTime := expiry.Add(-minDuration)
return time.Now().After(replacementTime)
}

func IsCertExpired(cert *x509.Certificate) bool {
return (DaysUntilExpiration(cert) <= 0)
}

func DaysUntilExpiration(cert *x509.Certificate) int {
return int(math.Round(cert.NotAfter.UTC().Sub(time.Now().UTC()).Hours() / 24))
}

0 comments on commit 35cfd59

Please sign in to comment.