Skip to content

Commit 8675e14

Browse files
fix: Fix karpenter_nodeclaims_drifted metric for v0.32.x (#880)
1 parent c79a02d commit 8675e14

File tree

5 files changed

+93
-4
lines changed

5 files changed

+93
-4
lines changed

pkg/controllers/nodeclaim/disruption/drift.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ func (d *Drift) Reconcile(ctx context.Context, nodePool *v1beta1.NodePool, nodeC
8686
Reason: string(driftedReason),
8787
})
8888
if !hasDriftedCondition {
89-
logging.FromContext(ctx).Debugf("marking drifted")
89+
logging.FromContext(ctx).With("reason", string(driftedReason)).Debugf("marking drifted")
9090
nodeclaimutil.DisruptedCounter(nodeClaim, metrics.DriftReason).Inc()
9191
nodeclaimutil.DriftedCounter(nodeClaim, string(driftedReason)).Inc()
9292
}

pkg/controllers/nodeclaim/disruption/nodeclaim_drift_test.go

+53
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,59 @@ var _ = Describe("NodeClaim/Drift", func() {
5656
// NodeClaims are required to be launched before they can be evaluated for drift
5757
nodeClaim.StatusConditions().MarkTrue(v1beta1.Launched)
5858
})
59+
Context("Metrics", func() {
60+
It("should fire a karpenter_nodeclaims_drifted metric when drifted", func() {
61+
cp.Drifted = "CloudProviderDrifted"
62+
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
63+
ExpectReconcileSucceeded(ctx, nodeClaimDisruptionController, client.ObjectKeyFromObject(nodeClaim))
64+
65+
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
66+
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Drifted).IsTrue()).To(BeTrue())
67+
metric, found := FindMetricWithLabelValues("karpenter_nodeclaims_drifted", map[string]string{
68+
"type": "CloudProviderDrifted",
69+
"nodepool": nodePool.Name,
70+
})
71+
Expect(found).To(BeTrue())
72+
Expect(metric.GetCounter().GetValue()).To(BeNumerically("==", 1))
73+
})
74+
It("should pass-through the correct drifted type value through the karpenter_nodeclaims_drifted metric", func() {
75+
cp.Drifted = "drifted"
76+
nodePool.Spec.Template.Spec.Requirements = []v1.NodeSelectorRequirement{
77+
{
78+
Key: v1.LabelInstanceTypeStable,
79+
Operator: v1.NodeSelectorOpDoesNotExist,
80+
},
81+
}
82+
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
83+
ExpectReconcileSucceeded(ctx, nodeClaimDisruptionController, client.ObjectKeyFromObject(nodeClaim))
84+
85+
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
86+
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Drifted).IsTrue()).To(BeTrue())
87+
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Drifted).Reason).To(Equal(string(disruption.RequirementsDrifted)))
88+
89+
metric, found := FindMetricWithLabelValues("karpenter_nodeclaims_drifted", map[string]string{
90+
"type": "RequirementsDrifted",
91+
"nodepool": nodePool.Name,
92+
})
93+
Expect(found).To(BeTrue())
94+
Expect(metric.GetCounter().GetValue()).To(BeNumerically("==", 1))
95+
})
96+
It("should fire a karpenter_nodeclaims_disrupted metric when drifted", func() {
97+
cp.Drifted = "drifted"
98+
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
99+
ExpectReconcileSucceeded(ctx, nodeClaimDisruptionController, client.ObjectKeyFromObject(nodeClaim))
100+
101+
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
102+
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Drifted).IsTrue()).To(BeTrue())
103+
104+
metric, found := FindMetricWithLabelValues("karpenter_nodeclaims_disrupted", map[string]string{
105+
"type": "drift",
106+
"nodepool": nodePool.Name,
107+
})
108+
Expect(found).To(BeTrue())
109+
Expect(metric.GetCounter().GetValue()).To(BeNumerically("==", 1))
110+
})
111+
})
59112
It("should detect drift", func() {
60113
cp.Drifted = "drifted"
61114
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)

pkg/controllers/nodeclaim/disruption/nodeclaim_emptiness_test.go

+18-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,24 @@ var _ = Describe("NodeClaim/Emptiness", func() {
4949
},
5050
})
5151
})
52-
52+
Context("Metrics", func() {
53+
It("should fire a karpenter_nodeclaims_disrupted metric when empty", func() {
54+
ExpectApplied(ctx, env.Client, nodePool, nodeClaim, node)
55+
ExpectMakeNodeClaimsInitialized(ctx, env.Client, nodeClaim)
56+
57+
ExpectReconcileSucceeded(ctx, nodeClaimDisruptionController, client.ObjectKeyFromObject(nodeClaim))
58+
59+
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
60+
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Empty).IsTrue()).To(BeTrue())
61+
62+
metric, found := FindMetricWithLabelValues("karpenter_nodeclaims_disrupted", map[string]string{
63+
"type": "emptiness",
64+
"nodepool": nodePool.Name,
65+
})
66+
Expect(found).To(BeTrue())
67+
Expect(metric.GetCounter().GetValue()).To(BeNumerically("==", 1))
68+
})
69+
})
5370
It("should mark NodeClaims as empty", func() {
5471
ExpectApplied(ctx, env.Client, nodePool, nodeClaim, node)
5572
ExpectMakeNodeClaimsInitialized(ctx, env.Client, nodeClaim)

pkg/controllers/nodeclaim/disruption/nodeclaim_expiration_test.go

+20-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,26 @@ var _ = Describe("NodeClaim/Expiration", func() {
4343
},
4444
})
4545
})
46-
46+
Context("Metrics", func() {
47+
It("should fire a karpenter_nodeclaims_disrupted metric when expired", func() {
48+
nodePool.Spec.Disruption.ExpireAfter.Duration = lo.ToPtr(time.Second * 30)
49+
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
50+
51+
// step forward to make the node expired
52+
fakeClock.Step(60 * time.Second)
53+
ExpectReconcileSucceeded(ctx, nodeClaimDisruptionController, client.ObjectKeyFromObject(nodeClaim))
54+
55+
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
56+
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Expired).IsTrue()).To(BeTrue())
57+
58+
metric, found := FindMetricWithLabelValues("karpenter_nodeclaims_disrupted", map[string]string{
59+
"type": "expiration",
60+
"nodepool": nodePool.Name,
61+
})
62+
Expect(found).To(BeTrue())
63+
Expect(metric.GetCounter().GetValue()).To(BeNumerically("==", 1))
64+
})
65+
})
4766
It("should remove the status condition from the NodeClaims when expiration is disabled", func() {
4867
nodePool.Spec.Disruption.ExpireAfter.Duration = nil
4968
nodeClaim.StatusConditions().MarkTrue(v1beta1.Expired)

pkg/utils/nodeclaim/nodeclaim.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ func DriftedCounter(nodeClaim *v1beta1.NodeClaim, driftType string) prometheus.C
435435
metrics.ProvisionerLabel: nodeClaim.Labels[v1alpha5.ProvisionerNameLabelKey],
436436
})
437437
}
438-
return metrics.NodeClaimsDisruptedCounter.With(prometheus.Labels{
438+
return metrics.NodeClaimsDriftedCounter.With(prometheus.Labels{
439439
metrics.TypeLabel: driftType,
440440
metrics.NodePoolLabel: nodeClaim.Labels[v1beta1.NodePoolLabelKey],
441441
})

0 commit comments

Comments
 (0)