From 4a2f9550a75857a29dd4761cbf06634249a36e56 Mon Sep 17 00:00:00 2001 From: yati1998 Date: Wed, 12 Jun 2024 16:24:13 +0530 Subject: [PATCH] Add group snapshot controller metrics this commit adds metrics for volumegroupsnapshot Signed-off-by: yati1998 --- .../groupsnapshot_controller_helper.go | 98 +++++++++++- pkg/metrics/metrics.go | 4 + pkg/metrics/metrics_group.go | 92 ++++++++++++ pkg/metrics/metrics_test.go | 140 ++++++++++++++++++ 4 files changed, 331 insertions(+), 3 deletions(-) create mode 100644 pkg/metrics/metrics_group.go diff --git a/pkg/common-controller/groupsnapshot_controller_helper.go b/pkg/common-controller/groupsnapshot_controller_helper.go index c4b2dec69..c539d5480 100644 --- a/pkg/common-controller/groupsnapshot_controller_helper.go +++ b/pkg/common-controller/groupsnapshot_controller_helper.go @@ -32,6 +32,7 @@ import ( crdv1alpha1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumegroupsnapshot/v1alpha1" crdv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + "github.com/kubernetes-csi/external-snapshotter/v8/pkg/metrics" "github.com/kubernetes-csi/external-snapshotter/v8/pkg/utils" ) @@ -263,6 +264,14 @@ func (ctrl *csiSnapshotCommonController) deleteGroupSnapshot(groupSnapshot *crdv _ = ctrl.snapshotStore.Delete(groupSnapshot) klog.V(4).Infof("group snapshot %q deleted", utils.GroupSnapshotKey(groupSnapshot)) + driverName, err := ctrl.getGroupSnapshotDriverName(groupSnapshot) + if err != nil { + klog.Errorf("failed to getGroupSnapshotDriverName while recording metrics for group snapshot %q: %v", utils.GroupSnapshotKey(groupSnapshot), err) + } else { + deleteOperationKey := metrics.NewOperationKey(metrics.DeleteGroupSnapshotOperationName, groupSnapshot.UID) + ctrl.metricsManager.RecordMetrics(deleteOperationKey, metrics.NewSnapshotOperationStatus(metrics.SnapshotStatusTypeSuccess), driverName) + } + groupSnapshotContentName := "" if groupSnapshot.Status != nil && groupSnapshot.Status.BoundVolumeGroupSnapshotContentName != nil { groupSnapshotContentName = *groupSnapshot.Status.BoundVolumeGroupSnapshotContentName @@ -376,9 +385,28 @@ func (ctrl *csiSnapshotCommonController) getGroupSnapshotContentFromStore(conten func (ctrl *csiSnapshotCommonController) syncUnreadyGroupSnapshot(groupSnapshot *crdv1alpha1.VolumeGroupSnapshot) error { uniqueGroupSnapshotName := utils.GroupSnapshotKey(groupSnapshot) klog.V(5).Infof("syncUnreadyGroupSnapshot %s", uniqueGroupSnapshotName) - /* - TODO: Add metrics - */ + driverName, err := ctrl.getGroupSnapshotDriverName(groupSnapshot) + if err != nil { + klog.Errorf("failed to getGroupSnapshotDriverName while recording metrics for groupsnapshot %q: %s", utils.GroupSnapshotKey(groupSnapshot), err) + } + + groupSnapshotProvisionType := metrics.DynamicGroupSnapshotType + if groupSnapshot.Spec.Source.VolumeGroupSnapshotContentName != nil { + groupSnapshotProvisionType = metrics.PreProvisionedGroupSnapshotType + } + + // Start metrics operations for volumegroupsnapshot + if !utils.IsGroupSnapshotCreated(groupSnapshot) { + // Only start CreateGroupSnapshot operation if the groupsnapshot has not been cut + ctrl.metricsManager.OperationStart( + metrics.NewOperationKey(metrics.CreateGroupSnapshotOperationName, groupSnapshot.UID), + metrics.NewOperationValue(driverName, groupSnapshotProvisionType), + ) + } + ctrl.metricsManager.OperationStart( + metrics.NewOperationKey(metrics.CreateGroupSnapshotAndReadyOperationName, groupSnapshot.UID), + metrics.NewOperationValue(driverName, groupSnapshotProvisionType), + ) // Pre-provisioned snapshot if groupSnapshot.Spec.Source.VolumeGroupSnapshotContentName != nil { @@ -664,12 +692,20 @@ func (ctrl *csiSnapshotCommonController) updateGroupSnapshotStatus(groupSnapshot groupSnapshotClone := groupSnapshotObj.DeepCopy() groupSnapshotClone.Status = newStatus + // We need to record metrics before updating the status due to a bug causing cache entries after a failed UpdateStatus call. + // Must meet the following criteria to emit a successful CreateGroupSnapshot status + // 1. Previous status was nil OR Previous status had a nil CreationTime + // 2. New status must be non-nil with a non-nil CreationTime + driverName := groupSnapshotContent.Spec.Driver + createOperationKey := metrics.NewOperationKey(metrics.CreateGroupSnapshotOperationName, groupSnapshot.UID) + // Must meet the following criteria to emit a successful CreateGroupSnapshot status // 1. Previous status was nil OR Previous status had a nil CreationTime // 2. New status must be non-nil with a non-nil CreationTime if !utils.IsGroupSnapshotCreated(groupSnapshotObj) && utils.IsGroupSnapshotCreated(groupSnapshotClone) { msg := fmt.Sprintf("GroupSnapshot %s was successfully created by the CSI driver.", utils.GroupSnapshotKey(groupSnapshot)) ctrl.eventRecorder.Event(groupSnapshot, v1.EventTypeNormal, "GroupSnapshotCreated", msg) + ctrl.metricsManager.RecordVolumeGroupSnapshotMetrics(createOperationKey, metrics.NewSnapshotOperationStatus(metrics.SnapshotStatusTypeSuccess), driverName) } // Must meet the following criteria to emit a successful CreateGroupSnapshotAndReady status @@ -678,6 +714,8 @@ func (ctrl *csiSnapshotCommonController) updateGroupSnapshotStatus(groupSnapshot if !utils.IsGroupSnapshotReady(groupSnapshotObj) && utils.IsGroupSnapshotReady(groupSnapshotClone) { msg := fmt.Sprintf("GroupSnapshot %s is ready to use.", utils.GroupSnapshotKey(groupSnapshot)) ctrl.eventRecorder.Event(groupSnapshot, v1.EventTypeNormal, "GroupSnapshotReady", msg) + createAndReadyOperation := metrics.NewOperationKey(metrics.CreateGroupSnapshotAndReadyOperationName, groupSnapshot.UID) + ctrl.metricsManager.RecordMetrics(createAndReadyOperation, metrics.NewSnapshotOperationStatus(metrics.SnapshotStatusTypeSuccess), driverName) } newGroupSnapshotObj, err := ctrl.clientset.GroupsnapshotV1alpha1().VolumeGroupSnapshots(groupSnapshotClone.Namespace).UpdateStatus(context.TODO(), groupSnapshotClone, metav1.UpdateOptions{}) @@ -1126,6 +1164,21 @@ func (ctrl *csiSnapshotCommonController) addGroupSnapshotFinalizer(groupSnapshot func (ctrl *csiSnapshotCommonController) processGroupSnapshotWithDeletionTimestamp(groupSnapshot *crdv1alpha1.VolumeGroupSnapshot) error { klog.V(5).Infof("processGroupSnapshotWithDeletionTimestamp VolumeGroupSnapshot[%s]: %s", utils.GroupSnapshotKey(groupSnapshot), utils.GetGroupSnapshotStatusForLogging(groupSnapshot)) + driverName, err := ctrl.getGroupSnapshotDriverName(groupSnapshot) + if err != nil { + klog.Errorf("failed to getGroupSnapshotDriverName while recording metrics for group snapshot %q: %v", utils.GroupSnapshotKey(groupSnapshot), err) + } + + groupSnapshotProvisionType := metrics.DynamicGroupSnapshotType + if groupSnapshot.Spec.Source.VolumeGroupSnapshotContentName != nil { + groupSnapshotProvisionType = metrics.PreProvisionedGroupSnapshotType + } + + // Processing delete, start operation metric + deleteOperationKey := metrics.NewOperationKey(metrics.DeleteGroupSnapshotOperationName, groupSnapshot.UID) + deleteOperationValue := metrics.NewOperationValue(driverName, groupSnapshotProvisionType) + ctrl.metricsManager.OperationStart(deleteOperationKey, deleteOperationValue) + var groupSnapshotContentName string if groupSnapshot.Status != nil && groupSnapshot.Status.BoundVolumeGroupSnapshotContentName != nil { groupSnapshotContentName = *groupSnapshot.Status.BoundVolumeGroupSnapshotContentName @@ -1297,3 +1350,42 @@ func (ctrl *csiSnapshotCommonController) removeGroupSnapshotFinalizer(groupSnaps klog.V(5).Infof("Removed protection finalizer from volume group snapshot %s", utils.GroupSnapshotKey(groupSnapshot)) return nil } + +// getGroupSnapshotDriverName is a helper function to get driver from the VolumeGroupSnapshot. +// We try to get the driverName in multiple ways, as snapshot controller metrics depend on the correct driverName. +func (ctrl *csiSnapshotCommonController) getGroupSnapshotDriverName(vgs *crdv1alpha1.VolumeGroupSnapshot) (string, error) { + klog.V(5).Infof("getSnapshotDriverName: VolumeSnapshot[%s]", vgs.Name) + var driverName string + + // Pre-Provisioned groupsnapshots have contentName as source + var contentName string + if vgs.Spec.Source.VolumeGroupSnapshotContentName != nil { + contentName = *vgs.Spec.Source.VolumeGroupSnapshotContentName + } + + // Get Driver name from GroupSnapshotContent if we found a contentName + if contentName != "" { + content, err := ctrl.groupSnapshotContentLister.Get(contentName) + if err != nil { + klog.Errorf("getGroupSnapshotDriverName: failed to get groupSnapshotContent: %v", contentName) + } else { + driverName = content.Spec.Driver + } + + if driverName != "" { + return driverName, nil + } + } + + // Dynamic groupsnapshots will have a groupsnapshotclass with a driver + if vgs.Spec.VolumeGroupSnapshotClassName != nil { + class, err := ctrl.getSnapshotClass(*vgs.Spec.VolumeGroupSnapshotClassName) + if err != nil { + klog.Errorf("getGroupSnapshotDriverName: failed to get groupsnapshotClass: %v", *vgs.Spec.VolumeGroupSnapshotClassName) + } else { + driverName = class.Driver + } + } + + return driverName, nil +} diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 82188e450..1e3569b87 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -108,6 +108,10 @@ type MetricsManager interface { // "Unknown" status of the passed-in operation is assumed. RecordMetrics(op OperationKey, status OperationStatus, driverName string) + // RecordVolumeGroupSnapshotMetrics records a metric for operations related to + // VolumeGroupSnapshot + RecordVolumeGroupSnapshotMetrics(op OperationKey, status OperationStatus, driverName string) + // GetRegistry() returns the metrics.KubeRegistry used by this metrics manager. GetRegistry() k8smetrics.KubeRegistry } diff --git a/pkg/metrics/metrics_group.go b/pkg/metrics/metrics_group.go new file mode 100644 index 000000000..5c5a6fb2a --- /dev/null +++ b/pkg/metrics/metrics_group.go @@ -0,0 +1,92 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "time" +) + +const ( + // CreateGroupSnapshotOperationName is the operation that tracks how long the controller takes to create a groupsnapshot. + // Specifically, the operation metric is emitted based on the following timestamps: + // - Start_time: controller notices the first time that there is a new VolumeGroupSnapshot CR to dynamically provision a groupsnapshot + // - End_time: controller notices that the CR has a status with CreationTime field set to be non-nil + CreateGroupSnapshotOperationName = "CreateGroupSnapshot" + + // CreateGroupSnapshotAndReadyOperationName is the operation that tracks how long the controller takes to create a groupsnapshot and for it to be ready. + // Specifically, the operation metric is emitted based on the following timestamps: + // - Start_time: controller notices the first time that there is a new VolumeGroupSnapshot CR(both dynamic and pre-provisioned cases) + // - End_time: controller notices that the CR has a status with Ready To Use field set to be true + CreateGroupSnapshotAndReadyOperationName = "CreateGroupSnapshotAndReady" + + // DeleteGroupSnapshotOperationName is the operation that tracks how long a groupsnapshot deletion takes. + // Specifically, the operation metric is emitted based on the following timestamps: + // - Start_time: controller notices the first time that there is a deletion timestamp placed on the VolumeGroupSnapshot CR and the CR is ready to be deleted. + // Note that if the CR is being used by a PVC for rehydration, the controller should *NOT* set the start_time. + // - End_time: controller removed all finalizers on the VolumeGroupSnapshot CR such that the CR is ready to be removed in the API server. + DeleteGroupSnapshotOperationName = "DeleteGroupSnapshot" + // DynamicGroupSnapshotType represents a groupsnapshot that is being dynamically provisioned + DynamicGroupSnapshotType = snapshotProvisionType("dynamic") + // PreProvisionedGroupSnapshotType represents a groupsnapshot that is pre-provisioned + PreProvisionedGroupSnapshotType = snapshotProvisionType("pre-provisioned") +) + +// RecordVolumeGroupMetrics emits operation metrics +func (opMgr *operationMetricsManager) RecordVolumeGroupSnapshotMetrics(opKey OperationKey, opStatus OperationStatus, driverName string) { + opMgr.mu.Lock() + defer opMgr.mu.Unlock() + opVal, exists := opMgr.cache[opKey] + if !exists { + // the operation has not been cached, return directly + return + } + status := string(SnapshotStatusTypeUnknown) + if opStatus != nil { + status = opStatus.String() + } + + // if we do not know the driverName while recording metrics, + // refer to the cached version instead. + if driverName == "" || driverName == unknownDriverName { + driverName = opVal.Driver + } + + operationDuration := time.Since(opVal.startTime).Seconds() + opMgr.opLatencyMetrics.WithLabelValues(driverName, opKey.Name, opVal.SnapshotType, status).Observe(operationDuration) + + // Report cancel metrics if we are deleting an unfinished VolumeGroupSnapshot + if opKey.Name == DeleteGroupSnapshotOperationName { + // check if we have a CreateGroupSnapshot operation pending for this + createKey := NewOperationKey(CreateGroupSnapshotOperationName, opKey.ResourceID) + obj, exists := opMgr.cache[createKey] + if exists { + // record a cancel metric if found + opMgr.recordCancelMetricLocked(obj, createKey, operationDuration) + } + + // check if we have a CreateGroupSnapshotAndReady operation pending for this + createAndReadyKey := NewOperationKey(CreateGroupSnapshotAndReadyOperationName, opKey.ResourceID) + obj, exists = opMgr.cache[createAndReadyKey] + if exists { + // record a cancel metric if found + opMgr.recordCancelMetricLocked(obj, createAndReadyKey, operationDuration) + } + } + + delete(opMgr.cache, opKey) + opMgr.opInFlight.Set(float64(len(opMgr.cache))) +} diff --git a/pkg/metrics/metrics_test.go b/pkg/metrics/metrics_test.go index be38a4baa..bd0883e52 100644 --- a/pkg/metrics/metrics_test.go +++ b/pkg/metrics/metrics_test.go @@ -738,3 +738,143 @@ func TestProcessStartTimeMetricExist(t *testing.T) { t.Fatalf("Metrics does not contain %v. Scraped content: %v", processStartTimeMetric, metricsFamilies) } + +func TestRecordVolumeGroupSnapshotMetrics(t *testing.T) { + mgr, srv := initMgr() + srvAddr := "http://" + srv.Addr + httpPattern + defer shutdown(srv) + // add an operation + opKey := OperationKey{ + Name: "op1", + ResourceID: types.UID("uid1"), + } + opVal := NewOperationValue("driver1", DynamicSnapshotType) + mgr.OperationStart(opKey, opVal) + // should create a Success data point with latency ~ 1100ms + time.Sleep(1100 * time.Millisecond) + success := &fakeOpStatus{ + statusCode: 0, + } + mgr.RecordVolumeGroupSnapshotMetrics(opKey, success, "driver") + + // add another operation metric + opKey.Name = "op2" + opKey.ResourceID = types.UID("uid2") + mgr.OperationStart(opKey, opVal) + // should create a Failure data point with latency ~ 100ms + time.Sleep(100 * time.Millisecond) + failure := &fakeOpStatus{ + statusCode: 1, + } + mgr.RecordVolumeGroupSnapshotMetrics(opKey, failure, "driver2") + + expected := + `# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end +# TYPE snapshot_controller_operation_total_seconds histogram +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="0.1"} 0 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="0.25"} 0 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="0.5"} 0 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="1"} 0 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="2.5"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="5"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="10"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="15"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="30"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="60"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="120"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="300"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="600"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic",le="+Inf"} 1 +snapshot_controller_operation_total_seconds_sum{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic"} 1.1 +snapshot_controller_operation_total_seconds_count{driver_name="driver",operation_name="op1",operation_status="Success",snapshot_type="dynamic"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="0.1"} 0 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="0.25"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="0.5"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="1"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="2.5"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="5"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="10"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="15"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="30"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="60"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="120"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="300"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="600"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic",le="+Inf"} 1 +snapshot_controller_operation_total_seconds_sum{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic"} 0.1 +snapshot_controller_operation_total_seconds_count{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="dynamic"} 1 +` + if err := verifyMetric(expected, srvAddr); err != nil { + t.Errorf("failed testing [%v]", err) + } +} + +func TestRecordVolumeGroupSnapshotMetricsForPreProvisioned(t *testing.T) { + mgr, srv := initMgr() + srvAddr := "http://" + srv.Addr + httpPattern + defer shutdown(srv) + // add an operation + opKey := OperationKey{ + Name: "op1", + ResourceID: types.UID("uid1"), + } + opVal := NewOperationValue("driver", PreProvisionedGroupSnapshotType) + mgr.OperationStart(opKey, opVal) + // should create a Success data point with latency ~ 1100ms + time.Sleep(1100 * time.Millisecond) + success := &fakeOpStatus{ + statusCode: 0, + } + mgr.RecordVolumeGroupSnapshotMetrics(opKey, success, "driver1") + + // add another operation metric + opKey.Name = "op2" + opKey.ResourceID = types.UID("uid2") + mgr.OperationStart(opKey, opVal) + // should create a Failure data point with latency ~ 100ms + time.Sleep(100 * time.Millisecond) + failure := &fakeOpStatus{ + statusCode: 1, + } + mgr.RecordVolumeGroupSnapshotMetrics(opKey, failure, "driver2") + + expected := + `# HELP snapshot_controller_operation_total_seconds [ALPHA] Total number of seconds spent by the controller on an operation from end to end +# TYPE snapshot_controller_operation_total_seconds histogram +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="0.1"} 0 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="0.25"} 0 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="0.5"} 0 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="1"} 0 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="2.5"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="5"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="10"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="15"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="30"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="60"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="120"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="300"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="600"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned",le="+Inf"} 1 +snapshot_controller_operation_total_seconds_sum{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned"} 1.1 +snapshot_controller_operation_total_seconds_count{driver_name="driver1",operation_name="op1",operation_status="Success",snapshot_type="pre-provisioned"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="0.1"} 0 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="0.25"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="0.5"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="1"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="2.5"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="5"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="10"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="15"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="30"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="60"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="120"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="300"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="600"} 1 +snapshot_controller_operation_total_seconds_bucket{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned",le="+Inf"} 1 +snapshot_controller_operation_total_seconds_sum{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned"} 0.1 +snapshot_controller_operation_total_seconds_count{driver_name="driver2",operation_name="op2",operation_status="Failure",snapshot_type="pre-provisioned"} 1 +` + if err := verifyMetric(expected, srvAddr); err != nil { + t.Errorf("failed testing [%v]", err) + } +}