Skip to content

Commit

Permalink
Metrics for SyncSet and SelectorSyncSets
Browse files Browse the repository at this point in the history
  • Loading branch information
rhamitarora committed Aug 27, 2024
1 parent e079d17 commit 8d1a6e9
Show file tree
Hide file tree
Showing 16 changed files with 837 additions and 16 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ require (
github.com/openshift/api v0.0.0-20240103200955-7ca3a4634e46
github.com/openshift/client-go v0.0.0-20221019143426-16aed247da5c
github.com/openshift/cloud-credential-operator v0.0.0-00010101000000-000000000000
github.com/openshift/hive/apis v0.0.0-20240812130639-bdf9d08a060a
github.com/openshift/hive/apis v0.0.0-20240821011206-1ec27ad45d5a
github.com/openshift/library-go v0.0.0-20220525173854-9b950a41acdc
github.com/openshift/machine-config-operator v0.0.1-0.20230519222939-1abc13efbb0d
github.com/pires/go-proxyproto v0.6.2
Expand Down
15 changes: 15 additions & 0 deletions pkg/hive/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"sort"

hivev1 "github.com/openshift/hive/apis/hive/v1"
hivev1alpha1 "github.com/openshift/hive/apis/hiveinternal/v1alpha1"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -42,6 +43,7 @@ type ClusterManager interface {
IsClusterInstallationComplete(ctx context.Context, doc *api.OpenShiftClusterDocument) (bool, error)
GetClusterDeployment(ctx context.Context, doc *api.OpenShiftClusterDocument) (*hivev1.ClusterDeployment, error)
ResetCorrelationData(ctx context.Context, doc *api.OpenShiftClusterDocument) error
GetClusterSyncforClusterDeployment(ctx context.Context, doc *api.OpenShiftClusterDocument) (*hivev1alpha1.ClusterSync, error)
}

type clusterManager struct {
Expand Down Expand Up @@ -262,3 +264,16 @@ func (hr *clusterManager) installLogsForLatestDeployment(ctx context.Context, cd

return latestProvision.Spec.InstallLog, nil
}

func (hr *clusterManager) GetClusterSyncforClusterDeployment(ctx context.Context, doc *api.OpenShiftClusterDocument) (*hivev1alpha1.ClusterSync, error) {
cs := &hivev1alpha1.ClusterSync{}
err := hr.hiveClientset.Get(ctx, client.ObjectKey{
Namespace: doc.OpenShiftCluster.Properties.HiveProfile.Namespace,
Name: ClusterDeploymentName,
}, cs)

if err != nil {
return nil, err
}
return cs, nil
}
50 changes: 50 additions & 0 deletions pkg/hive/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"testing"

hivev1 "github.com/openshift/hive/apis/hive/v1"
hivev1alpha1 "github.com/openshift/hive/apis/hiveinternal/v1alpha1"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -548,3 +549,52 @@ func TestGetClusterDeployment(t *testing.T) {
})
}
}

func TestGetClusterSyncforClusterDeployment(t *testing.T) {
fakeNamespace := "aro-00000000-0000-0000-0000-000000000000"
doc := &api.OpenShiftClusterDocument{
OpenShiftCluster: &api.OpenShiftCluster{
Properties: api.OpenShiftClusterProperties{
HiveProfile: api.HiveProfile{
Namespace: fakeNamespace,
},
},
},
}

cs := &hivev1alpha1.ClusterSync{
ObjectMeta: metav1.ObjectMeta{
Name: ClusterDeploymentName,
Namespace: fakeNamespace,
},
}

for _, tt := range []struct {
name string
wantErr string
}{
{name: "syncset exists and returned"},
{name: "selectorsyncsets exists and returned"},
} {
t.Run(tt.name, func(t *testing.T) {
fakeClientBuilder := fake.NewClientBuilder()
if tt.wantErr == "" {
fakeClientBuilder = fakeClientBuilder.WithRuntimeObjects(cs)
}
c := clusterManager{
hiveClientset: fakeClientBuilder.Build(),
log: logrus.NewEntry(logrus.StandardLogger()),
}

result, err := c.GetClusterSyncforClusterDeployment(context.Background(), doc)
if err != nil && err.Error() != tt.wantErr ||
err == nil && tt.wantErr != "" {
t.Fatal(err)
}

if result != nil && result.Name != cs.Name && result.Namespace != cs.Namespace {
t.Fatal("Unexpected cluster sync returned", result)
}
})
}
}
30 changes: 18 additions & 12 deletions pkg/monitor/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client/apiutil"

"github.com/Azure/ARO-RP/pkg/api"
"github.com/Azure/ARO-RP/pkg/hive"
"github.com/Azure/ARO-RP/pkg/metrics"
"github.com/Azure/ARO-RP/pkg/monitor/dimension"
"github.com/Azure/ARO-RP/pkg/monitor/emitter"
Expand Down Expand Up @@ -60,10 +61,12 @@ type Monitor struct {
arodl *appsv1.DeploymentList
}

wg *sync.WaitGroup
wg *sync.WaitGroup
hiveClusterManager hive.ClusterManager
doc *api.OpenShiftClusterDocument
}

func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftCluster, m metrics.Emitter, hiveRestConfig *rest.Config, hourlyRun bool, wg *sync.WaitGroup) (*Monitor, error) {
func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftCluster, doc *api.OpenShiftClusterDocument, m metrics.Emitter, hiveRestConfig *rest.Config, hourlyRun bool, wg *sync.WaitGroup, hiveClusterManager hive.ClusterManager) (*Monitor, error) {
r, err := azure.ParseResourceID(oc.ID)
if err != nil {
return nil, err
Expand Down Expand Up @@ -126,16 +129,18 @@ func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftClu
oc: oc,
dims: dims,

restconfig: restConfig,
cli: cli,
configcli: configcli,
maocli: maocli,
mcocli: mcocli,
arocli: arocli,
m: m,
ocpclientset: ocpclientset,
hiveclientset: hiveclientset,
wg: wg,
restconfig: restConfig,
cli: cli,
configcli: configcli,
maocli: maocli,
mcocli: mcocli,
arocli: arocli,
m: m,
ocpclientset: ocpclientset,
hiveclientset: hiveclientset,
wg: wg,
hiveClusterManager: hiveClusterManager,
doc: doc,
}, nil
}

Expand Down Expand Up @@ -208,6 +213,7 @@ func (mon *Monitor) Monitor(ctx context.Context) (errs []error) {
mon.emitJobConditions,
mon.emitSummary,
mon.emitHiveRegistrationStatus,
mon.emitSyncSetStatus,
mon.emitOperatorFlagsAndSupportBanner,
mon.emitMaintenanceState,
mon.emitCertificateExpirationStatuses,
Expand Down
43 changes: 43 additions & 0 deletions pkg/monitor/cluster/syncsetstatus.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package cluster

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
)

func (mon *Monitor) emitSyncSetStatus(ctx context.Context) error {
cs, error := mon.hiveClusterManager.GetClusterSyncforClusterDeployment(ctx, mon.doc)
if error != nil {
return nil
}
if cs.Status.SyncSets != nil {
mon.emitGauge("syncsets.count", int64(len(cs.Status.SyncSets)), nil)

for _, s := range cs.Status.SyncSets {
mon.emitGauge("hive.syncsets", 1, map[string]string{
"name": s.Name,
"result": string(s.Result),
"firstSuccessTime": s.FirstSuccessTime.String(),
"lastTransitionTime": s.LastTransitionTime.String(),
"failureMessage": s.FailureMessage,
})
}
}

if cs.Status.SelectorSyncSets != nil {
mon.emitGauge("selectorsyncsets.count", int64(len(cs.Status.SelectorSyncSets)), nil)

for _, s := range cs.Status.SelectorSyncSets {
mon.emitGauge("hive.selectorsyncsets", 1, map[string]string{
"name": s.Name,
"result": string(s.Result),
"firstSuccessTime": s.FirstSuccessTime.String(),
"lastTransitionTime": s.LastTransitionTime.String(),
"failureMessage": s.FailureMessage,
})
}
}
return nil
}
11 changes: 10 additions & 1 deletion pkg/monitor/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import (
"k8s.io/client-go/rest"

"github.com/Azure/ARO-RP/pkg/api"
"github.com/Azure/ARO-RP/pkg/env"
"github.com/Azure/ARO-RP/pkg/hive"
"github.com/Azure/ARO-RP/pkg/monitor/azure/nsg"
"github.com/Azure/ARO-RP/pkg/monitor/cluster"
"github.com/Azure/ARO-RP/pkg/monitor/dimension"
Expand Down Expand Up @@ -281,9 +283,16 @@ func (mon *monitor) workOne(ctx context.Context, log *logrus.Entry, doc *api.Ope
var monitors []monitoring.Monitor
var wg sync.WaitGroup

_env, err := env.NewEnv(ctx, log, env.COMPONENT_MONITOR)
if err != nil {
log.Error(err)
return
}
hiveClusterManager, _ := hive.NewFromConfig(log, _env, restConfig)

nsgMon := nsg.NewMonitor(log, doc.OpenShiftCluster, mon.env, sub.ID, sub.Subscription.Properties.TenantID, mon.clusterm, dims, &wg, nsgMonTicker.C)

c, err := cluster.NewMonitor(log, restConfig, doc.OpenShiftCluster, mon.clusterm, hiveRestConfig, hourlyRun, &wg)
c, err := cluster.NewMonitor(log, restConfig, doc.OpenShiftCluster, doc, mon.clusterm, hiveRestConfig, hourlyRun, &wg, hiveClusterManager)
if err != nil {
log.Error(err)
mon.m.EmitGauge("monitor.cluster.failedworker", 1, map[string]string{
Expand Down
16 changes: 16 additions & 0 deletions pkg/util/mocks/hive/hive.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pkg/util/scheme/scheme.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
securityv1 "github.com/openshift/api/security/v1"
cloudcredentialv1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1"
hivev1 "github.com/openshift/hive/apis/hive/v1"
hivev1alpha1 "github.com/openshift/hive/apis/hiveinternal/v1alpha1"
mcv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -51,6 +52,7 @@ func init() {
utilruntime.Must(operatorv1.AddToScheme(scheme.Scheme))
utilruntime.Must(cloudcredentialv1.AddToScheme(scheme.Scheme))
utilruntime.Must(hivev1.AddToScheme(scheme.Scheme))
utilruntime.Must(hivev1alpha1.AddToScheme(scheme.Scheme))
utilruntime.Must(imageregistryv1.AddToScheme(scheme.Scheme))
utilruntime.Must(templatesv1.AddToScheme(scheme.Scheme))
}
4 changes: 3 additions & 1 deletion test/e2e/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ var _ = Describe("Monitor", func() {
wg.Add(1)
mon, err := cluster.NewMonitor(log, clients.RestConfig, &api.OpenShiftCluster{
ID: resourceIDFromEnv(),
}, &noop.Noop{}, nil, true, &wg)
}, &api.OpenShiftClusterDocument{
ID: resourceIDFromEnv(),
}, &noop.Noop{}, nil, true, &wg, nil)
Expect(err).NotTo(HaveOccurred())

By("running the monitor once")
Expand Down
Loading

0 comments on commit 8d1a6e9

Please sign in to comment.