Skip to content

Commit

Permalink
Merge pull request #18 from appuio/add-upgrading-metrics
Browse files Browse the repository at this point in the history
Add metrics if cluster and/or config pools are upgrading
  • Loading branch information
bastjan authored Jun 7, 2023
2 parents d897859 + ae56c04 commit 9d950e1
Show file tree
Hide file tree
Showing 3 changed files with 204 additions and 0 deletions.
92 changes: 92 additions & 0 deletions controllers/upgrading_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package controllers

import (
"context"
"fmt"

configv1 "github.com/openshift/api/config/v1"
machineconfigurationv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/apimachinery/pkg/util/sets"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/appuio/openshift-upgrade-controller/pkg/clusterversion"
"github.com/appuio/openshift-upgrade-controller/pkg/healthcheck"
)

//+kubebuilder:rbac:groups=config.openshift.io,resources=clusterversions,verbs=get;list;watch;update;patch
//+kubebuilder:rbac:groups=machineconfiguration.openshift.io,resources=machineconfigpools,verbs=get;list;watch

var clusterUpgradingDesc = prometheus.NewDesc(
MetricsNamespace+"_cluster_upgrading",
"Set to 1 if the cluster is currently upgrading, 0 otherwise.",
[]string{},
nil,
)

var poolsUpgradingDesc = prometheus.NewDesc(
MetricsNamespace+"_machine_config_pools_upgrading",
"Set to 1 if a machine config pool in the cluster is currently upgrading, 0 otherwise.",
[]string{"pool"},
nil,
)

// ClusterUpgradingMetric is a Prometheus collector that exposes the link between an organization and a billing entity.
type ClusterUpgradingMetric struct {
client.Client

ManagedUpstreamClusterVersionName string
}

var _ prometheus.Collector = &ClusterUpgradingMetric{}

// Describe implements prometheus.Collector.
// Sends the static description of the metrics to the provided channel.
func (*ClusterUpgradingMetric) Describe(ch chan<- *prometheus.Desc) {
ch <- clusterUpgradingDesc
ch <- poolsUpgradingDesc
}

// Collect implements prometheus.Collector.
// Sends a metric if the cluster is currently upgrading and a upgrading metric for each machine config pool.
func (m *ClusterUpgradingMetric) Collect(ch chan<- prometheus.Metric) {
ctx := context.Background()

mcpl := machineconfigurationv1.MachineConfigPoolList{}
if err := m.Client.List(ctx, &mcpl); err != nil {
err := fmt.Errorf("failed to list machine config pools: %w", err)
ch <- prometheus.NewInvalidMetric(clusterUpgradingDesc, err)
ch <- prometheus.NewInvalidMetric(poolsUpgradingDesc, err)
}
poolsUpdating := healthcheck.MachineConfigPoolsUpdating(mcpl)
ps := sets.NewString()
for _, p := range poolsUpdating {
ps.Insert(p.Name)
}
for _, mcp := range mcpl.Items {
ch <- prometheus.MustNewConstMetric(
poolsUpgradingDesc,
prometheus.GaugeValue,
boolToFloat64(ps.Has(mcp.Name)),
mcp.Name,
)
}

var cv configv1.ClusterVersion
if err := m.Get(ctx, client.ObjectKey{Name: m.ManagedUpstreamClusterVersionName}, &cv); err != nil {
ch <- prometheus.NewInvalidMetric(clusterUpgradingDesc, err)
return
}
ch <- prometheus.MustNewConstMetric(
clusterUpgradingDesc,
prometheus.GaugeValue,
boolToFloat64(clusterversion.IsUpgrading(cv) || len(poolsUpdating) > 0),
)
}

func boolToFloat64(b bool) float64 {
if b {
return 1
}
return 0
}
105 changes: 105 additions & 0 deletions controllers/upgrading_metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package controllers

import (
"context"
"fmt"
"io"
"strings"
"testing"

configv1 "github.com/openshift/api/config/v1"
machineconfigurationv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/require"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func Test_ClusterUpgradingMetric(t *testing.T) {
expectedMetricNames := []string{
"openshift_upgrade_controller_cluster_upgrading",
"openshift_upgrade_controller_machine_config_pools_upgrading",
}

version := &configv1.ClusterVersion{
ObjectMeta: metav1.ObjectMeta{
Name: "version",
},
Status: configv1.ClusterVersionStatus{
Conditions: []configv1.ClusterOperatorStatusCondition{
{
Type: configv1.OperatorProgressing,
Status: configv1.ConditionTrue,
},
},
},
}
masterPool := &machineconfigurationv1.MachineConfigPool{
ObjectMeta: metav1.ObjectMeta{
Name: "master",
},
Status: machineconfigurationv1.MachineConfigPoolStatus{
MachineCount: 3,
UpdatedMachineCount: 3,
},
}
workerPool := &machineconfigurationv1.MachineConfigPool{
ObjectMeta: metav1.ObjectMeta{
Name: "worker",
},
Status: machineconfigurationv1.MachineConfigPoolStatus{
MachineCount: 3,
UpdatedMachineCount: 3,
},
}
c := controllerClient(t, version, masterPool, workerPool)
subject := &ClusterUpgradingMetric{
Client: c,

ManagedUpstreamClusterVersionName: "version",
}

require.NoError(t,
testutil.CollectAndCompare(subject, expectedMetrics(true, false, false), expectedMetricNames...),
"upgrading should be true if cluster version is progressing",
)

version.Status.Conditions[0].Status = configv1.ConditionFalse
require.NoError(t, c.Status().Update(context.Background(), version))
workerPool.Status.UpdatedMachineCount = workerPool.Status.MachineCount - 1
require.NoError(t, c.Status().Update(context.Background(), workerPool))

require.NoError(t,
testutil.CollectAndCompare(subject, expectedMetrics(true, false, true), expectedMetricNames...),
"upgrading should be true if cluster version is progressing or a machine config pool is not fully upgraded",
)

workerPool.Status.UpdatedMachineCount = workerPool.Status.MachineCount
require.NoError(t, c.Status().Update(context.Background(), workerPool))

require.NoError(t,
testutil.CollectAndCompare(subject, expectedMetrics(false, false, false), expectedMetricNames...),
"upgrading should be false if cluster version is not progressing and all machine config pools are fully upgraded",
)
}

func expectedMetrics(upgrading, masterUpgrading, workerUpgrading bool) io.Reader {
metrics := `
# HELP openshift_upgrade_controller_cluster_upgrading Set to 1 if the cluster is currently upgrading, 0 otherwise.
# TYPE openshift_upgrade_controller_cluster_upgrading gauge
openshift_upgrade_controller_cluster_upgrading %d
# HELP openshift_upgrade_controller_machine_config_pools_upgrading Set to 1 if a machine config pool in the cluster is currently upgrading, 0 otherwise.
# TYPE openshift_upgrade_controller_machine_config_pools_upgrading gauge
openshift_upgrade_controller_machine_config_pools_upgrading{pool="master"} %d
openshift_upgrade_controller_machine_config_pools_upgrading{pool="worker"} %d
`
return strings.NewReader(
fmt.Sprintf(metrics, b2i(upgrading), b2i(masterUpgrading), b2i(workerUpgrading)),
)
}

func b2i(b bool) int {
if b {
return 1
}
return 0
}
7 changes: 7 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/metrics"

managedupgradev1beta1 "github.com/appuio/openshift-upgrade-controller/api/v1beta1"
"github.com/appuio/openshift-upgrade-controller/controllers"
Expand Down Expand Up @@ -105,6 +106,12 @@ func main() {
os.Exit(1)
}

metrics.Registry.MustRegister(&controllers.ClusterUpgradingMetric{
Client: mgr.GetClient(),

ManagedUpstreamClusterVersionName: managedUpstreamClusterVersionName,
})

if err = (&controllers.NodeReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Expand Down

0 comments on commit 9d950e1

Please sign in to comment.