Skip to content

Commit

Permalink
Monitor emit ETCD certificate expiration
Browse files Browse the repository at this point in the history
For clusters running version <4.9, emit daysUntilExpiration
metric for etcd certificates present in openshift-etcd namespace
so SRE can be alerted
  • Loading branch information
SrinivasAtmakuri committed Aug 31, 2023
1 parent dcaa1ad commit 082b398
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 0 deletions.
1 change: 1 addition & 0 deletions pkg/monitor/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ func (mon *Monitor) Monitor(ctx context.Context) (errs []error) {
mon.emitHiveRegistrationStatus,
mon.emitOperatorFlagsAndSupportBanner,
mon.emitPucmState,
mon.emitEtcdCertificateExpiry,
mon.emitPrometheusAlerts, // at the end for now because it's the slowest/least reliable
} {
err = f(ctx)
Expand Down
70 changes: 70 additions & 0 deletions pkg/monitor/cluster/etcdcertificateexpiry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package cluster

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"fmt"
"math"
"strings"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

utilcert "github.com/Azure/ARO-RP/pkg/util/certificate"

Check failure on line 15 in pkg/monitor/cluster/etcdcertificateexpiry.go

View workflow job for this annotation

GitHub Actions / golangci-lint

import "github.com/Azure/ARO-RP/pkg/util/certificate" imported as "utilcert" but must be "utilcertificate" according to config (importas)
utilpem "github.com/Azure/ARO-RP/pkg/util/pem"
"github.com/Azure/ARO-RP/pkg/util/version"
)

func (mon *Monitor) emitEtcdCertificateExpiry(ctx context.Context) error {
cv, err := mon.getClusterVersion(ctx)
if err != nil {
return err
}
v, err := version.ParseVersion(actualVersion(cv))
if err != nil {
return err
}
// ETCD ceritificates are autorotated by the operator when close to expiry for cluster running 4.9+
if !v.Lt(version.NewVersion(4, 9)) {
return nil
}

secretList, err := mon.cli.CoreV1().Secrets("openshift-etcd").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("type=%s", corev1.SecretTypeTLS)})
if err != nil {
return err
}

certNearExpiry := false
minDaysUntilExpiration := math.MaxInt
for _, secret := range secretList.Items {
if strings.Contains(secret.ObjectMeta.Name, "etcd-peer") || strings.Contains(secret.ObjectMeta.Name, "etcd-serving") {
_, certs, err := utilpem.Parse(secret.Data[corev1.TLSCertKey])
if err != nil {
return err
}
if utilcert.LessThanMinimumDuration(certs[0], utilcert.DefaultMinDurationPercent) {
certNearExpiry = true
minDaysUntilExpiration = min(utilcert.DaysUntilExpiration(certs[0]), minDaysUntilExpiration)
}
}
}

if certNearExpiry {
mon.emitGauge("certificate.expirationdate", 1, map[string]string{
"daysUntilExpiration": fmt.Sprintf("%d", minDaysUntilExpiration),
"namespace": "openshift-etcd",
"name": "openshift-etcd-certificate",
})
}

return nil
}

func min(a, b int) int {
if a < b {
return a
}
return b
}
100 changes: 100 additions & 0 deletions pkg/monitor/cluster/etcdcertificateexpiry_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package cluster

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"crypto/x509"
"encoding/pem"
"fmt"
"testing"
"time"

"github.com/golang/mock/gomock"
configv1 "github.com/openshift/api/config/v1"
configfake "github.com/openshift/client-go/config/clientset/versioned/fake"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"

mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
utiltls "github.com/Azure/ARO-RP/pkg/util/tls"
)

func TestEtcdCertificateExpiry(t *testing.T) {
ctx := context.Background()
expiration := time.Now().Add(time.Microsecond * 60)
_, cert, err := utiltls.GenerateTestKeyAndCertificate("etcd-cert", nil, nil, false, false, tweakTemplateFn(expiration))
if err != nil {
t.Fatal(err)
}

for _, tt := range []struct {
name string
configcli *configfake.Clientset
cli *fake.Clientset
minDaysUntilExpiration int
}{
{
name: "emit etcd certificate expiry",
configcli: configfake.NewSimpleClientset(
&configv1.ClusterVersion{
ObjectMeta: metav1.ObjectMeta{
Name: "version",
},
Status: configv1.ClusterVersionStatus{
History: []configv1.UpdateHistory{
{
State: configv1.CompletedUpdate,
Version: "4.8.1",
},
},
},
},
),
cli: fake.NewSimpleClientset(
&corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "etcd-peer-master-0",
Namespace: "openshift-etcd",
},
Data: map[string][]byte{
corev1.TLSCertKey: pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: cert[0].Raw}),
},
Type: corev1.SecretTypeTLS,
},
),
minDaysUntilExpiration: 0,
},
} {
t.Run(tt.name, func(t *testing.T) {
controller := gomock.NewController(t)
defer controller.Finish()

m := mock_metrics.NewMockEmitter(controller)
mon := &Monitor{
cli: tt.cli,
configcli: tt.configcli,
m: m,
}

m.EXPECT().EmitGauge("certificate.expirationdate", int64(1), map[string]string{
"daysUntilExpiration": fmt.Sprintf("%d", tt.minDaysUntilExpiration),
"namespace": "openshift-etcd",
"name": "openshift-etcd-certificate",
})

err = mon.emitEtcdCertificateExpiry(ctx)
if err != nil {
t.Fatal(err)
}
})
}
}

func tweakTemplateFn(expiration time.Time) func(*x509.Certificate) {
return func(template *x509.Certificate) {
template.NotAfter = expiration
}
}
27 changes: 27 additions & 0 deletions pkg/util/certificate/certificate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package certificate

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"crypto/x509"
"time"
)

const DefaultMinDurationPercent = 0.20

// LessThanMinimumDuration indicates whether the provided cert has less
// than the provided minimum percentage of its duration remaining.
func LessThanMinimumDuration(cert *x509.Certificate, minDurationPercent float64) bool {
duration := cert.NotAfter.Sub(cert.NotBefore)
minDuration := time.Duration(float64(duration.Nanoseconds()) * DefaultMinDurationPercent)
return time.Now().After(cert.NotAfter.Add(-minDuration))
}

func IsCertExpired(cert *x509.Certificate) bool {
return time.Now().After(cert.NotAfter)
}

func DaysUntilExpiration(cert *x509.Certificate) int {
return int(time.Until(cert.NotAfter) / (24 * time.Hour))
}

0 comments on commit 082b398

Please sign in to comment.