Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create monitor for certificate expirations #2976

Merged
merged 27 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e42117a
create monitor for certificate expirations
dem4gus Jun 21, 2023
3add7e7
check for err in test setup
dem4gus Jul 5, 2023
1554e65
only get first certificate in chain
dem4gus Jul 5, 2023
01c2d95
remove monitor for certificate number
dem4gus Jul 5, 2023
38492d2
refactor tests for readability
dem4gus Jul 10, 2023
fa71f5c
add tests for error states
dem4gus Jul 11, 2023
353944b
condense error tests into testing struct
dem4gus Jul 11, 2023
091ac73
remove bool from test struct
dem4gus Jul 12, 2023
e81af7f
use error test util
dem4gus Jul 12, 2023
89feb4c
extract test bootstrapping
dem4gus Jul 12, 2023
5d47c65
more graceful error handling
dem4gus Jul 26, 2023
6bbc532
fix managed secret names
dem4gus Jul 26, 2023
7d9f065
add explanation comment
dem4gus Jul 26, 2023
f78b9f8
Update pkg/monitor/cluster/certificateexpirationstatuses.go
dem4gus Jul 27, 2023
8999e67
Update pkg/monitor/cluster/certificateexpirationstatuses.go
dem4gus Jul 27, 2023
8a25481
Update pkg/monitor/cluster/certificateexpirationstatuses.go
dem4gus Jul 27, 2023
0b3882d
Update pkg/monitor/cluster/certificateexpirationstatuses.go
dem4gus Jul 27, 2023
70c6375
match k8s parameter order convention
dem4gus Jul 27, 2023
57efed9
use generic client
dem4gus Jul 27, 2023
78d7042
Revert "Update pkg/monitor/cluster/certificateexpirationstatuses.go"
dem4gus Jul 27, 2023
c9694e8
report better information for missing secrets
dem4gus Jul 28, 2023
bb255ec
include days until certificate expiration
dem4gus Aug 1, 2023
93285b7
use built-in to calculate duration
dem4gus Aug 30, 2023
a79f7fc
use constants for ingress name and namespace
dem4gus Sep 15, 2023
e7bc84b
rename struct member
dem4gus Sep 15, 2023
00e0728
use util package
dem4gus Sep 15, 2023
b7ee4ad
return struct literals
dem4gus Sep 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions pkg/monitor/cluster/certificateexpirationstatuses.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package cluster

import (
"context"
"crypto/x509"
"fmt"
"strings"
"time"

operatorv1 "github.com/openshift/api/operator/v1"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/Azure/ARO-RP/pkg/operator"
"github.com/Azure/ARO-RP/pkg/operator/controllers/genevalogging"
"github.com/Azure/ARO-RP/pkg/util/dns"
"github.com/Azure/ARO-RP/pkg/util/pem"
)

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.
const (
certificateExpirationMetricName = "certificate.expirationdate"
secretMissingMetricName = "certificate.secretnotfound"
ingressNamespace = "openshift-ingress-operator"
ingressName = "default"
)

func (mon *Monitor) emitCertificateExpirationStatuses(ctx context.Context) error {
// report NotAfter dates for Ingress and API (on managed domains), and Geneva (always)
var certs []*x509.Certificate

mdsdCert, err := mon.getCertificate(ctx, operator.Namespace, operator.SecretName, genevalogging.GenevaCertName)
if kerrors.IsNotFound(err) {
mon.emitGauge(secretMissingMetricName, int64(1), secretMissingMetric(operator.Namespace, operator.SecretName))
} else if err != nil {
return err
} else {
certs = append(certs, mdsdCert)
}
bennerv marked this conversation as resolved.
Show resolved Hide resolved

if dns.IsManagedDomain(mon.oc.Properties.ClusterProfile.Domain) {
ic := &operatorv1.IngressController{}
err := mon.ocpclientset.Get(ctx, client.ObjectKey{
Namespace: ingressNamespace,
Name: ingressName,
}, ic)
if err != nil {
s-amann marked this conversation as resolved.
Show resolved Hide resolved
return err
}
ingressSecretName := ic.Spec.DefaultCertificate.Name

// secret with managed certificates is uuid + "-ingress" or "-apiserver"
for _, secretName := range []string{ingressSecretName, strings.Replace(ingressSecretName, "-ingress", "-apiserver", 1)} {
certificate, err := mon.getCertificate(ctx, operator.Namespace, secretName, corev1.TLSCertKey)
if kerrors.IsNotFound(err) {
mon.emitGauge(secretMissingMetricName, int64(1), secretMissingMetric(operator.Namespace, secretName))
} else if err != nil {
return err
} else {
certs = append(certs, certificate)
}
}
}

for _, cert := range certs {
daysUntilExpiration := time.Until(cert.NotAfter) / (24 * time.Hour)
mon.emitGauge(certificateExpirationMetricName, 1, map[string]string{
"subject": cert.Subject.CommonName,
"expirationDate": cert.NotAfter.UTC().Format(time.RFC3339),
"daysUntilExpiration": fmt.Sprintf("%d", daysUntilExpiration),
})
}
return nil
}

func (mon *Monitor) getCertificate(ctx context.Context, secretNamespace, secretName, secretKey string) (*x509.Certificate, error) {
secret := &corev1.Secret{}
err := mon.ocpclientset.Get(ctx, client.ObjectKey{
Namespace: secretNamespace,
Name: secretName,
}, secret)
if err != nil {
return nil, err
}

return pem.ParseFirstCertificate(secret.Data[secretKey])
}

func secretMissingMetric(namespace, name string) map[string]string {
return map[string]string{
"namespace": namespace,
"name": name,
}
}
233 changes: 233 additions & 0 deletions pkg/monitor/cluster/certificateexpirationstatuses_test.go
dem4gus marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
package cluster

import (
"context"
"crypto/x509"
"encoding/pem"
"testing"
"time"

"github.com/golang/mock/gomock"
operatorv1 "github.com/openshift/api/operator/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

"github.com/Azure/ARO-RP/pkg/api"
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
utiltls "github.com/Azure/ARO-RP/pkg/util/tls"
"github.com/Azure/ARO-RP/pkg/util/uuid"
utilerror "github.com/Azure/ARO-RP/test/util/error"
)

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.
type certInfo struct {
secretName, certSubject string
}

const (
managedDomainName = "contoso.aroapp.io"
unmanagedDomainName = "aro.contoso.com"
)

func TestEmitCertificateExpirationStatuses(t *testing.T) {
expiration := time.Now().Add(time.Hour * 24 * 5)
expirationString := expiration.UTC().Format(time.RFC3339)
clusterID := uuid.DefaultGenerator.Generate()

for _, tt := range []struct {
name string
domain string
certsPresent []certInfo
wantExpirations []map[string]string
wantWarning []map[string]string
wantErr string
}{
dem4gus marked this conversation as resolved.
Show resolved Hide resolved
{
name: "only emits MDSD status for unmanaged domain",
domain: unmanagedDomainName,
certsPresent: []certInfo{{"cluster", "geneva.certificate"}},
wantExpirations: []map[string]string{
{
"subject": "geneva.certificate",
"expirationDate": expirationString,
"daysUntilExpiration": "4",
},
},
},
{
name: "includes ingress and API status for managed domain",
domain: managedDomainName,
certsPresent: []certInfo{
{"cluster", "geneva.certificate"},
{clusterID + "-ingress", managedDomainName},
{clusterID + "-apiserver", "api." + managedDomainName},
},
wantExpirations: []map[string]string{
{
"subject": "geneva.certificate",
"expirationDate": expirationString,
"daysUntilExpiration": "4",
},
{
"subject": "contoso.aroapp.io",
"expirationDate": expirationString,
"daysUntilExpiration": "4",
},
{
"subject": "api.contoso.aroapp.io",
"expirationDate": expirationString,
"daysUntilExpiration": "4",
},
},
},
{
name: "emits warning metric when cluster secret has been deleted",
domain: unmanagedDomainName,
wantWarning: []map[string]string{
{
"namespace": "openshift-azure-operator",
"name": "cluster",
},
},
},
{
name: "emits warning metric when managed domain secret has been deleted",
domain: managedDomainName,
certsPresent: []certInfo{
{"cluster", "geneva.certificate"},
{clusterID + "-ingress", managedDomainName},
},
wantExpirations: []map[string]string{
{
"subject": "geneva.certificate",
"expirationDate": expirationString,
"daysUntilExpiration": "4",
},
{
"subject": "contoso.aroapp.io",
"expirationDate": expirationString,
"daysUntilExpiration": "4",
},
},
wantWarning: []map[string]string{
{
"namespace": "openshift-azure-operator",
"name": clusterID + "-apiserver",
},
},
},
} {
t.Run(tt.name, func(t *testing.T) {
ctx := context.Background()

var secrets []client.Object
secretsFromCertInfo, err := generateTestSecrets(tt.certsPresent, tweakTemplateFn(expiration))
if err != nil {
t.Fatal(err)
}
secrets = append(secrets, secretsFromCertInfo...)

m := mock_metrics.NewMockEmitter(gomock.NewController(t))
for _, w := range tt.wantWarning {
m.EXPECT().EmitGauge(secretMissingMetricName, int64(1), w)
}
for _, g := range tt.wantExpirations {
m.EXPECT().EmitGauge(certificateExpirationMetricName, int64(1), g)
}

mon := buildMonitor(m, tt.domain, clusterID, secrets...)

err = mon.emitCertificateExpirationStatuses(ctx)

utilerror.AssertErrorMessage(t, err, tt.wantErr)
})
}

t.Run("returns error when secret is present but certificate data has been deleted", func(t *testing.T) {
var secrets []client.Object
data := map[string][]byte{}
s := buildSecret("cluster", data)
secrets = append(secrets, s)

ctx := context.Background()
m := mock_metrics.NewMockEmitter(gomock.NewController(t))
mon := buildMonitor(m, managedDomainName, clusterID, secrets...)

wantErr := "unable to find certificate"
err := mon.emitCertificateExpirationStatuses(ctx)
utilerror.AssertErrorMessage(t, err, wantErr)
})
}

func tweakTemplateFn(expiration time.Time) func(*x509.Certificate) {
return func(template *x509.Certificate) {
template.NotAfter = expiration
}
}

func generateTestSecrets(certsInfo []certInfo, tweakTemplateFn func(*x509.Certificate)) ([]client.Object, error) {
var secrets []client.Object
for _, sec := range certsInfo {
_, cert, err := utiltls.GenerateTestKeyAndCertificate(sec.certSubject, nil, nil, false, false, tweakTemplateFn)
if err != nil {
return nil, err
}
certKey := "tls.crt"
if sec.secretName == "cluster" {
certKey = "gcscert.pem"
}
data := map[string][]byte{
certKey: pem.EncodeToMemory(&pem.Block{
Type: "CERTIFICATE",
Bytes: cert[0].Raw,
}),
}
s := buildSecret(sec.secretName, data)
secrets = append(secrets, s)
}
return secrets, nil
}

func buildSecret(secretName string, data map[string][]byte) *corev1.Secret {
return &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: secretName,
Namespace: "openshift-azure-operator",
},
Data: data,
}
}

func buildMonitor(m *mock_metrics.MockEmitter, domain, id string, secrets ...client.Object) *Monitor {
ingressController := &operatorv1.IngressController{
ObjectMeta: metav1.ObjectMeta{
Name: "default",
Namespace: "openshift-ingress-operator",
},
Spec: operatorv1.IngressControllerSpec{
DefaultCertificate: &corev1.LocalObjectReference{
Name: id + "-ingress",
},
},
}

ocpclientset := fake.
NewClientBuilder().
WithObjects(ingressController).
WithObjects(secrets...).
Build()
return &Monitor{
ocpclientset: ocpclientset,
m: m,
oc: &api.OpenShiftCluster{
Properties: api.OpenShiftClusterProperties{
ClusterProfile: api.ClusterProfile{
Domain: domain,
},
},
},
}
}
8 changes: 8 additions & 0 deletions pkg/monitor/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ type Monitor struct {
m metrics.Emitter
arocli aroclient.Interface

ocpclientset client.Client
hiveclientset client.Client

// access below only via the helper functions in cache.go
Expand Down Expand Up @@ -91,6 +92,11 @@ func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftClu
return nil, err
}

ocpclientset, err := client.New(restConfig, client.Options{})
if err != nil {
return nil, err
}

hiveclientset, err := getHiveClientSet(hiveRestConfig)
if err != nil {
log.Error(err)
Expand All @@ -110,6 +116,7 @@ func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftClu
mcocli: mcocli,
arocli: arocli,
m: m,
ocpclientset: ocpclientset,
hiveclientset: hiveclientset,
}, nil
}
Expand Down Expand Up @@ -175,6 +182,7 @@ func (mon *Monitor) Monitor(ctx context.Context) (errs []error) {
mon.emitHiveRegistrationStatus,
mon.emitOperatorFlagsAndSupportBanner,
mon.emitPucmState,
mon.emitCertificateExpirationStatuses,
mon.emitPrometheusAlerts, // at the end for now because it's the slowest/least reliable
} {
err = f(ctx)
Expand Down
Loading