Skip to content

Commit 396a348

Browse files
Furst RomanFurst Roman
authored andcommitted
propagate current redis cluster state to redisfailver crd
1 parent 632aa3d commit 396a348

File tree

11 files changed

+171
-12
lines changed

11 files changed

+171
-12
lines changed

api/redisfailover/v1/defaults.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ const (
77
defaultExporterImage = "quay.io/oliver006/redis_exporter:v1.43.0"
88
defaultImage = "redis:6.2.6-alpine"
99
defaultRedisPort = 6379
10+
HealthyState = "Healthy"
11+
NotHealthyState = "NotHealthy"
1012
)
1113

1214
var (

api/redisfailover/v1/types.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ import (
1717
type RedisFailover struct {
1818
metav1.TypeMeta `json:",inline"`
1919
metav1.ObjectMeta `json:"metadata,omitempty"`
20-
Spec RedisFailoverSpec `json:"spec"`
20+
Spec RedisFailoverSpec `json:"spec"`
21+
Status RedisFailoverStatus `json:"status,omitempty"`
2122
}
2223

2324
// RedisFailoverSpec represents a Redis failover spec
@@ -198,3 +199,9 @@ type RedisFailoverList struct {
198199

199200
Items []RedisFailover `json:"items"`
200201
}
202+
203+
type RedisFailoverStatus struct {
204+
State string `json:"state,omitempty"`
205+
LastChanged string `json:"lastChanged,omitempty"`
206+
Message string `json:"message,omitempty"`
207+
}

api/redisfailover/v1/validate.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ func (r *RedisFailover) Validate() error {
6161
r.Spec.Sentinel.CustomConfig = defaultSentinelCustomConfig
6262
}
6363

64+
r.Status = RedisFailoverStatus{
65+
State: HealthyState,
66+
}
67+
6468
return nil
6569
}
6670

api/redisfailover/v1/validate_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,11 @@ func TestValidate(t *testing.T) {
120120
},
121121
BootstrapNode: test.expectedBootstrapNode,
122122
},
123+
Status: RedisFailoverStatus{
124+
State: HealthyState,
125+
LastChanged: "",
126+
Message: "",
127+
},
123128
}
124129
assert.Equal(expectedRF, rf)
125130
} else {

charts/redisoperator/crds/databases.spotahome.com_redisfailovers.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12387,6 +12387,19 @@ spec:
1238712387
type: array
1238812388
type: object
1238912389
type: object
12390+
status:
12391+
description: CRD status defined by redisfailover cluster state
12392+
properties:
12393+
state:
12394+
description: state of redis failover cluster
12395+
type: string
12396+
lastChanged:
12397+
description: timestamp of last state change
12398+
type: string
12399+
message:
12400+
description: message for current state if needed
12401+
type: string
12402+
type: object
1239012403
required:
1239112404
- spec
1239212405
type: object

mocks/operator/redisfailover/RedisFailover.go

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

mocks/service/k8s/Services.go

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

operator/redisfailover/checker.go

Lines changed: 121 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package redisfailover
22

33
import (
4+
"context"
45
"errors"
6+
"github.com/spotahome/redis-operator/service/k8s"
7+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
58
"strconv"
69
"time"
710

@@ -85,6 +88,15 @@ func (r *RedisFailoverHandler) UpdateRedisesPods(rf *redisfailoverv1.RedisFailov
8588
// CheckAndHeal runs verifcation checks to ensure the RedisFailover is in an expected and healthy state.
8689
// If the checks do not match up to expectations, an attempt will be made to "heal" the RedisFailover into a healthy state.
8790
func (r *RedisFailoverHandler) CheckAndHeal(rf *redisfailoverv1.RedisFailover) error {
91+
92+
oldState := rf.Status.State
93+
94+
rf.Status = redisfailoverv1.RedisFailoverStatus{
95+
State: redisfailoverv1.HealthyState,
96+
}
97+
98+
defer updateStatus(r.k8sservice, rf, oldState)
99+
88100
if rf.Bootstrapping() {
89101
return r.checkAndHealBootstrapMode(rf)
90102
}
@@ -99,19 +111,33 @@ func (r *RedisFailoverHandler) CheckAndHeal(rf *redisfailoverv1.RedisFailover) e
99111
// Sentinel knows the correct slave number
100112

101113
if !r.rfChecker.IsRedisRunning(rf) {
102-
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.REDIS_REPLICA_MISMATCH, metrics.NOT_APPLICABLE, errors.New("not all replicas running"))
114+
errorMsg := "not all replicas running"
115+
rf.Status = redisfailoverv1.RedisFailoverStatus{
116+
State: redisfailoverv1.NotHealthyState,
117+
Message: errorMsg,
118+
}
119+
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.REDIS_REPLICA_MISMATCH, metrics.NOT_APPLICABLE, errors.New(errorMsg))
103120
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Debugf("Number of redis mismatch, waiting for redis statefulset reconcile")
104121
return nil
105122
}
106123

107124
if !r.rfChecker.IsSentinelRunning(rf) {
108-
setRedisCheckerMetrics(r.mClient, "sentinel", rf.Namespace, rf.Name, metrics.SENTINEL_REPLICA_MISMATCH, metrics.NOT_APPLICABLE, errors.New("not all replicas running"))
125+
errorMsg := "not all replicas running"
126+
rf.Status = redisfailoverv1.RedisFailoverStatus{
127+
State: redisfailoverv1.NotHealthyState,
128+
Message: errorMsg,
129+
}
130+
setRedisCheckerMetrics(r.mClient, "sentinel", rf.Namespace, rf.Name, metrics.SENTINEL_REPLICA_MISMATCH, metrics.NOT_APPLICABLE, errors.New(errorMsg))
109131
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Debugf("Number of sentinel mismatch, waiting for sentinel deployment reconcile")
110132
return nil
111133
}
112134

113135
nMasters, err := r.rfChecker.GetNumberMasters(rf)
114136
if err != nil {
137+
rf.Status = redisfailoverv1.RedisFailoverStatus{
138+
State: redisfailoverv1.NotHealthyState,
139+
Message: "unable to get number of masters",
140+
}
115141
return err
116142
}
117143

@@ -125,7 +151,12 @@ func (r *RedisFailoverHandler) CheckAndHeal(rf *redisfailoverv1.RedisFailover) e
125151
err = r.rfHealer.SetOldestAsMaster(rf)
126152
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.NO_MASTER, metrics.NOT_APPLICABLE, err)
127153
if err != nil {
128-
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("Error in Setting oldest Pod as master")
154+
errorMsg := "Error in Setting oldest Pod as master"
155+
rf.Status = redisfailoverv1.RedisFailoverStatus{
156+
State: redisfailoverv1.NotHealthyState,
157+
Message: errorMsg,
158+
}
159+
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf(errorMsg)
129160
return err
130161
}
131162
return nil
@@ -138,6 +169,10 @@ func (r *RedisFailoverHandler) CheckAndHeal(rf *redisfailoverv1.RedisFailover) e
138169
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Warningf("Number of Masters running is 0")
139170
maxUptime, err := r.rfChecker.GetMaxRedisPodTime(rf)
140171
if err != nil {
172+
rf.Status = redisfailoverv1.RedisFailoverStatus{
173+
State: redisfailoverv1.NotHealthyState,
174+
Message: "unable to get Redis POD time",
175+
}
141176
return err
142177
}
143178

@@ -150,13 +185,22 @@ func (r *RedisFailoverHandler) CheckAndHeal(rf *redisfailoverv1.RedisFailover) e
150185
err2 := r.rfHealer.SetOldestAsMaster(rf)
151186
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.NO_MASTER, metrics.NOT_APPLICABLE, err2)
152187
if err2 != nil {
153-
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("Error in Setting oldest Pod as master")
188+
errorMsg := "Error in Setting oldest Pod as master"
189+
rf.Status = redisfailoverv1.RedisFailoverStatus{
190+
State: redisfailoverv1.NotHealthyState,
191+
Message: errorMsg,
192+
}
193+
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf(errorMsg)
154194
return err2
155195
}
156196
} else {
157197
//sentinels are having a quorum to make a failover , but check if redis are not having local hostip (first boot) as master
158198
status, err2 := r.rfChecker.CheckIfMasterLocalhost(rf)
159199
if err2 != nil {
200+
rf.Status = redisfailoverv1.RedisFailoverStatus{
201+
State: redisfailoverv1.NotHealthyState,
202+
Message: "unable to check if master localhost",
203+
}
160204
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("CheckIfMasterLocalhost failed retry later")
161205
return err2
162206
} else if status {
@@ -165,7 +209,12 @@ func (r *RedisFailoverHandler) CheckAndHeal(rf *redisfailoverv1.RedisFailover) e
165209
err3 := r.rfHealer.SetOldestAsMaster(rf)
166210
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.NO_MASTER, metrics.NOT_APPLICABLE, err3)
167211
if err3 != nil {
168-
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("Error in Setting oldest Pod as master")
212+
errorMsg := "Error in Setting oldest Pod as master"
213+
rf.Status = redisfailoverv1.RedisFailoverStatus{
214+
State: redisfailoverv1.NotHealthyState,
215+
Message: errorMsg,
216+
}
217+
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf(errorMsg)
169218
return err3
170219
}
171220

@@ -183,11 +232,20 @@ func (r *RedisFailoverHandler) CheckAndHeal(rf *redisfailoverv1.RedisFailover) e
183232
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.NUMBER_OF_MASTERS, metrics.NOT_APPLICABLE, nil)
184233
default:
185234
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.NUMBER_OF_MASTERS, metrics.NOT_APPLICABLE, errors.New("multiple masters detected"))
186-
return errors.New("more than one master, fix manually")
235+
errorMsg := "more than one master, fix manually"
236+
rf.Status = redisfailoverv1.RedisFailoverStatus{
237+
State: redisfailoverv1.NotHealthyState,
238+
Message: errorMsg,
239+
}
240+
return errors.New(errorMsg)
187241
}
188242

189243
master, err := r.rfChecker.GetMasterIP(rf)
190244
if err != nil {
245+
rf.Status = redisfailoverv1.RedisFailoverStatus{
246+
State: redisfailoverv1.NotHealthyState,
247+
Message: "unable to get master IP",
248+
}
191249
return err
192250
}
193251

@@ -196,23 +254,38 @@ func (r *RedisFailoverHandler) CheckAndHeal(rf *redisfailoverv1.RedisFailover) e
196254
if err != nil {
197255
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Warningf("Slave not associated to master: %s", err.Error())
198256
if err = r.rfHealer.SetMasterOnAll(master, rf); err != nil {
257+
rf.Status = redisfailoverv1.RedisFailoverStatus{
258+
State: redisfailoverv1.NotHealthyState,
259+
}
199260
return err
200261
}
201262
}
202263

203264
err = r.applyRedisCustomConfig(rf)
204265
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.APPLY_REDIS_CONFIG, metrics.NOT_APPLICABLE, err)
205266
if err != nil {
267+
rf.Status = redisfailoverv1.RedisFailoverStatus{
268+
State: redisfailoverv1.NotHealthyState,
269+
Message: "unable to apply custom config",
270+
}
206271
return err
207272
}
208273

209274
err = r.UpdateRedisesPods(rf)
210275
if err != nil {
276+
rf.Status = redisfailoverv1.RedisFailoverStatus{
277+
State: redisfailoverv1.NotHealthyState,
278+
Message: "unable to update redis PODs",
279+
}
211280
return err
212281
}
213282

214283
sentinels, err := r.rfChecker.GetSentinelsIPs(rf)
215284
if err != nil {
285+
rf.Status = redisfailoverv1.RedisFailoverStatus{
286+
State: redisfailoverv1.NotHealthyState,
287+
Message: "unable to get sentinels IPs",
288+
}
216289
return err
217290
}
218291

@@ -223,6 +296,9 @@ func (r *RedisFailoverHandler) CheckAndHeal(rf *redisfailoverv1.RedisFailover) e
223296
if err != nil {
224297
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Warningf("Fixing sentinel not monitoring expected master: %s", err.Error())
225298
if err := r.rfHealer.NewSentinelMonitor(sip, master, rf); err != nil {
299+
rf.Status = redisfailoverv1.RedisFailoverStatus{
300+
State: redisfailoverv1.NotHealthyState,
301+
}
226302
return err
227303
}
228304
}
@@ -233,37 +309,62 @@ func (r *RedisFailoverHandler) CheckAndHeal(rf *redisfailoverv1.RedisFailover) e
233309
func (r *RedisFailoverHandler) checkAndHealBootstrapMode(rf *redisfailoverv1.RedisFailover) error {
234310

235311
if !r.rfChecker.IsRedisRunning(rf) {
236-
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.REDIS_REPLICA_MISMATCH, metrics.NOT_APPLICABLE, errors.New("not all replicas running"))
312+
errorMsg := "not all replicas running"
313+
r.k8sservice.UpdateRedisFailoverStatus(context.Background(), rf.Namespace, rf, metav1.UpdateOptions{})
314+
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.REDIS_REPLICA_MISMATCH, metrics.NOT_APPLICABLE, errors.New(errorMsg))
237315
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Debugf("Number of redis mismatch, waiting for redis statefulset reconcile")
238316
return nil
239317
}
240318

241319
err := r.UpdateRedisesPods(rf)
242320
if err != nil {
243-
return err
321+
rf.Status = redisfailoverv1.RedisFailoverStatus{
322+
State: redisfailoverv1.NotHealthyState,
323+
Message: "unable to update Redis PODs",
324+
}
244325
}
245326
err = r.applyRedisCustomConfig(rf)
246327
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.APPLY_REDIS_CONFIG, metrics.NOT_APPLICABLE, err)
247328
if err != nil {
329+
rf.Status = redisfailoverv1.RedisFailoverStatus{
330+
State: redisfailoverv1.NotHealthyState,
331+
Message: "unable to set Redis custom config",
332+
}
248333
return err
249334
}
250335

251336
bootstrapSettings := rf.Spec.BootstrapNode
252337
err = r.rfHealer.SetExternalMasterOnAll(bootstrapSettings.Host, bootstrapSettings.Port, rf)
253338
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.APPLY_EXTERNAL_MASTER, metrics.NOT_APPLICABLE, err)
254339
if err != nil {
340+
rf.Status = redisfailoverv1.RedisFailoverStatus{
341+
State: redisfailoverv1.NotHealthyState,
342+
Message: "unable to set external master to all",
343+
}
255344
return err
256345
}
257346

258347
if rf.SentinelsAllowed() {
259348
if !r.rfChecker.IsSentinelRunning(rf) {
260-
setRedisCheckerMetrics(r.mClient, "sentinel", rf.Namespace, rf.Name, metrics.SENTINEL_REPLICA_MISMATCH, metrics.NOT_APPLICABLE, errors.New("not all replicas running"))
349+
errorMsg := "not all replicas running"
350+
rf.Status = redisfailoverv1.RedisFailoverStatus{
351+
State: redisfailoverv1.NotHealthyState,
352+
Message: errorMsg,
353+
}
354+
r.k8sservice.UpdateRedisFailoverStatus(context.Background(), rf.Namespace, rf, metav1.UpdateOptions{})
355+
setRedisCheckerMetrics(r.mClient, "sentinel", rf.Namespace, rf.Name, metrics.SENTINEL_REPLICA_MISMATCH, metrics.NOT_APPLICABLE, errors.New(errorMsg))
261356
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Debugf("Number of sentinel mismatch, waiting for sentinel deployment reconcile")
262357
return nil
358+
} else {
359+
r.k8sservice.UpdateRedisFailoverStatus(context.Background(), rf.Namespace, rf, metav1.UpdateOptions{})
263360
}
264361

265362
sentinels, err := r.rfChecker.GetSentinelsIPs(rf)
266363
if err != nil {
364+
rf.Status = redisfailoverv1.RedisFailoverStatus{
365+
State: redisfailoverv1.NotHealthyState,
366+
Message: "unable to get sentinels IPs",
367+
}
267368
return err
268369
}
269370
for _, sip := range sentinels {
@@ -272,6 +373,10 @@ func (r *RedisFailoverHandler) checkAndHealBootstrapMode(rf *redisfailoverv1.Red
272373
if err != nil {
273374
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Warningf("Fixing sentinel not monitoring expected master: %s", err.Error())
274375
if err := r.rfHealer.NewSentinelMonitorWithPort(sip, bootstrapSettings.Host, bootstrapSettings.Port, rf); err != nil {
376+
rf.Status = redisfailoverv1.RedisFailoverStatus{
377+
State: redisfailoverv1.NotHealthyState,
378+
Message: "unable to check sentinel monitor",
379+
}
275380
return err
276381
}
277382
}
@@ -346,3 +451,10 @@ func setRedisCheckerMetrics(metricsClient metrics.Recorder, mode /* redis or sen
346451
}
347452
}
348453
}
454+
455+
func updateStatus(k8sservice k8s.Services, rf *redisfailoverv1.RedisFailover, oldState string) {
456+
if oldState != rf.Status.State {
457+
rf.Status.LastChanged = time.Now().Format(time.RFC3339)
458+
}
459+
k8sservice.UpdateRedisFailoverStatus(context.Background(), rf.Namespace, rf, metav1.UpdateOptions{})
460+
}

operator/redisfailover/checker_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package redisfailover_test
33
import (
44
"errors"
55
"fmt"
6+
v1 "github.com/spotahome/redis-operator/api/redisfailover/v1"
67
"testing"
78
"time"
89

@@ -420,8 +421,10 @@ func TestCheckAndHeal(t *testing.T) {
420421

421422
if expErr {
422423
assert.Error(err)
424+
assert.Equal(v1.NotHealthyState, rf.Status.State)
423425
} else {
424426
assert.NoError(err)
427+
assert.Equal(v1.HealthyState, rf.Status.State)
425428
}
426429
mrfc.AssertExpectations(t)
427430
mrfh.AssertExpectations(t)

0 commit comments

Comments
 (0)