Skip to content

Commit

Permalink
create monitor for certificate expirations (#2976)
Browse files Browse the repository at this point in the history
  • Loading branch information
dem4gus authored Sep 15, 2023
1 parent 1d6d144 commit 2114a6c
Show file tree
Hide file tree
Showing 3 changed files with 337 additions and 0 deletions.
96 changes: 96 additions & 0 deletions pkg/monitor/cluster/certificateexpirationstatuses.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package cluster

import (
"context"
"crypto/x509"
"fmt"
"strings"
"time"

operatorv1 "github.com/openshift/api/operator/v1"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/Azure/ARO-RP/pkg/operator"
"github.com/Azure/ARO-RP/pkg/operator/controllers/genevalogging"
"github.com/Azure/ARO-RP/pkg/util/dns"
"github.com/Azure/ARO-RP/pkg/util/pem"
)

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.
const (
certificateExpirationMetricName = "certificate.expirationdate"
secretMissingMetricName = "certificate.secretnotfound"
ingressNamespace = "openshift-ingress-operator"
ingressName = "default"
)

func (mon *Monitor) emitCertificateExpirationStatuses(ctx context.Context) error {
// report NotAfter dates for Ingress and API (on managed domains), and Geneva (always)
var certs []*x509.Certificate

mdsdCert, err := mon.getCertificate(ctx, operator.Namespace, operator.SecretName, genevalogging.GenevaCertName)
if kerrors.IsNotFound(err) {
mon.emitGauge(secretMissingMetricName, int64(1), secretMissingMetric(operator.Namespace, operator.SecretName))
} else if err != nil {
return err
} else {
certs = append(certs, mdsdCert)
}

if dns.IsManagedDomain(mon.oc.Properties.ClusterProfile.Domain) {
ic := &operatorv1.IngressController{}
err := mon.ocpclientset.Get(ctx, client.ObjectKey{
Namespace: ingressNamespace,
Name: ingressName,
}, ic)
if err != nil {
return err
}
ingressSecretName := ic.Spec.DefaultCertificate.Name

// secret with managed certificates is uuid + "-ingress" or "-apiserver"
for _, secretName := range []string{ingressSecretName, strings.Replace(ingressSecretName, "-ingress", "-apiserver", 1)} {
certificate, err := mon.getCertificate(ctx, operator.Namespace, secretName, corev1.TLSCertKey)
if kerrors.IsNotFound(err) {
mon.emitGauge(secretMissingMetricName, int64(1), secretMissingMetric(operator.Namespace, secretName))
} else if err != nil {
return err
} else {
certs = append(certs, certificate)
}
}
}

for _, cert := range certs {
daysUntilExpiration := time.Until(cert.NotAfter) / (24 * time.Hour)
mon.emitGauge(certificateExpirationMetricName, 1, map[string]string{
"subject": cert.Subject.CommonName,
"expirationDate": cert.NotAfter.UTC().Format(time.RFC3339),
"daysUntilExpiration": fmt.Sprintf("%d", daysUntilExpiration),
})
}
return nil
}

func (mon *Monitor) getCertificate(ctx context.Context, secretNamespace, secretName, secretKey string) (*x509.Certificate, error) {
secret := &corev1.Secret{}
err := mon.ocpclientset.Get(ctx, client.ObjectKey{
Namespace: secretNamespace,
Name: secretName,
}, secret)
if err != nil {
return nil, err
}

return pem.ParseFirstCertificate(secret.Data[secretKey])
}

func secretMissingMetric(namespace, name string) map[string]string {
return map[string]string{
"namespace": namespace,
"name": name,
}
}
233 changes: 233 additions & 0 deletions pkg/monitor/cluster/certificateexpirationstatuses_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
package cluster

import (
"context"
"crypto/x509"
"encoding/pem"
"testing"
"time"

"github.com/golang/mock/gomock"
operatorv1 "github.com/openshift/api/operator/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

"github.com/Azure/ARO-RP/pkg/api"
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
utiltls "github.com/Azure/ARO-RP/pkg/util/tls"
"github.com/Azure/ARO-RP/pkg/util/uuid"
utilerror "github.com/Azure/ARO-RP/test/util/error"
)

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.
type certInfo struct {
secretName, certSubject string
}

const (
managedDomainName = "contoso.aroapp.io"
unmanagedDomainName = "aro.contoso.com"
)

func TestEmitCertificateExpirationStatuses(t *testing.T) {
expiration := time.Now().Add(time.Hour * 24 * 5)
expirationString := expiration.UTC().Format(time.RFC3339)
clusterID := uuid.DefaultGenerator.Generate()

for _, tt := range []struct {
name string
domain string
certsPresent []certInfo
wantExpirations []map[string]string
wantWarning []map[string]string
wantErr string
}{
{
name: "only emits MDSD status for unmanaged domain",
domain: unmanagedDomainName,
certsPresent: []certInfo{{"cluster", "geneva.certificate"}},
wantExpirations: []map[string]string{
{
"subject": "geneva.certificate",
"expirationDate": expirationString,
"daysUntilExpiration": "4",
},
},
},
{
name: "includes ingress and API status for managed domain",
domain: managedDomainName,
certsPresent: []certInfo{
{"cluster", "geneva.certificate"},
{clusterID + "-ingress", managedDomainName},
{clusterID + "-apiserver", "api." + managedDomainName},
},
wantExpirations: []map[string]string{
{
"subject": "geneva.certificate",
"expirationDate": expirationString,
"daysUntilExpiration": "4",
},
{
"subject": "contoso.aroapp.io",
"expirationDate": expirationString,
"daysUntilExpiration": "4",
},
{
"subject": "api.contoso.aroapp.io",
"expirationDate": expirationString,
"daysUntilExpiration": "4",
},
},
},
{
name: "emits warning metric when cluster secret has been deleted",
domain: unmanagedDomainName,
wantWarning: []map[string]string{
{
"namespace": "openshift-azure-operator",
"name": "cluster",
},
},
},
{
name: "emits warning metric when managed domain secret has been deleted",
domain: managedDomainName,
certsPresent: []certInfo{
{"cluster", "geneva.certificate"},
{clusterID + "-ingress", managedDomainName},
},
wantExpirations: []map[string]string{
{
"subject": "geneva.certificate",
"expirationDate": expirationString,
"daysUntilExpiration": "4",
},
{
"subject": "contoso.aroapp.io",
"expirationDate": expirationString,
"daysUntilExpiration": "4",
},
},
wantWarning: []map[string]string{
{
"namespace": "openshift-azure-operator",
"name": clusterID + "-apiserver",
},
},
},
} {
t.Run(tt.name, func(t *testing.T) {
ctx := context.Background()

var secrets []client.Object
secretsFromCertInfo, err := generateTestSecrets(tt.certsPresent, tweakTemplateFn(expiration))
if err != nil {
t.Fatal(err)
}
secrets = append(secrets, secretsFromCertInfo...)

m := mock_metrics.NewMockEmitter(gomock.NewController(t))
for _, w := range tt.wantWarning {
m.EXPECT().EmitGauge(secretMissingMetricName, int64(1), w)
}
for _, g := range tt.wantExpirations {
m.EXPECT().EmitGauge(certificateExpirationMetricName, int64(1), g)
}

mon := buildMonitor(m, tt.domain, clusterID, secrets...)

err = mon.emitCertificateExpirationStatuses(ctx)

utilerror.AssertErrorMessage(t, err, tt.wantErr)
})
}

t.Run("returns error when secret is present but certificate data has been deleted", func(t *testing.T) {
var secrets []client.Object
data := map[string][]byte{}
s := buildSecret("cluster", data)
secrets = append(secrets, s)

ctx := context.Background()
m := mock_metrics.NewMockEmitter(gomock.NewController(t))
mon := buildMonitor(m, managedDomainName, clusterID, secrets...)

wantErr := "unable to find certificate"
err := mon.emitCertificateExpirationStatuses(ctx)
utilerror.AssertErrorMessage(t, err, wantErr)
})
}

func tweakTemplateFn(expiration time.Time) func(*x509.Certificate) {
return func(template *x509.Certificate) {
template.NotAfter = expiration
}
}

func generateTestSecrets(certsInfo []certInfo, tweakTemplateFn func(*x509.Certificate)) ([]client.Object, error) {
var secrets []client.Object
for _, sec := range certsInfo {
_, cert, err := utiltls.GenerateTestKeyAndCertificate(sec.certSubject, nil, nil, false, false, tweakTemplateFn)
if err != nil {
return nil, err
}
certKey := "tls.crt"
if sec.secretName == "cluster" {
certKey = "gcscert.pem"
}
data := map[string][]byte{
certKey: pem.EncodeToMemory(&pem.Block{
Type: "CERTIFICATE",
Bytes: cert[0].Raw,
}),
}
s := buildSecret(sec.secretName, data)
secrets = append(secrets, s)
}
return secrets, nil
}

func buildSecret(secretName string, data map[string][]byte) *corev1.Secret {
return &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: secretName,
Namespace: "openshift-azure-operator",
},
Data: data,
}
}

func buildMonitor(m *mock_metrics.MockEmitter, domain, id string, secrets ...client.Object) *Monitor {
ingressController := &operatorv1.IngressController{
ObjectMeta: metav1.ObjectMeta{
Name: "default",
Namespace: "openshift-ingress-operator",
},
Spec: operatorv1.IngressControllerSpec{
DefaultCertificate: &corev1.LocalObjectReference{
Name: id + "-ingress",
},
},
}

ocpclientset := fake.
NewClientBuilder().
WithObjects(ingressController).
WithObjects(secrets...).
Build()
return &Monitor{
ocpclientset: ocpclientset,
m: m,
oc: &api.OpenShiftCluster{
Properties: api.OpenShiftClusterProperties{
ClusterProfile: api.ClusterProfile{
Domain: domain,
},
},
},
}
}
8 changes: 8 additions & 0 deletions pkg/monitor/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ type Monitor struct {
m metrics.Emitter
arocli aroclient.Interface

ocpclientset client.Client
hiveclientset client.Client

// access below only via the helper functions in cache.go
Expand Down Expand Up @@ -91,6 +92,11 @@ func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftClu
return nil, err
}

ocpclientset, err := client.New(restConfig, client.Options{})
if err != nil {
return nil, err
}

hiveclientset, err := getHiveClientSet(hiveRestConfig)
if err != nil {
log.Error(err)
Expand All @@ -110,6 +116,7 @@ func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftClu
mcocli: mcocli,
arocli: arocli,
m: m,
ocpclientset: ocpclientset,
hiveclientset: hiveclientset,
}, nil
}
Expand Down Expand Up @@ -175,6 +182,7 @@ func (mon *Monitor) Monitor(ctx context.Context) (errs []error) {
mon.emitHiveRegistrationStatus,
mon.emitOperatorFlagsAndSupportBanner,
mon.emitPucmState,
mon.emitCertificateExpirationStatuses,
mon.emitPrometheusAlerts, // at the end for now because it's the slowest/least reliable
} {
err = f(ctx)
Expand Down

0 comments on commit 2114a6c

Please sign in to comment.