Skip to content

Commit dcf1dbf

Browse files
author
Simon Plourde
authored
Bugfix for sensuctl cluster health (#3469)
Signed-off-by: Eric Chlebek <[email protected]>
1 parent 285e09a commit dcf1dbf

File tree

3 files changed

+37
-8
lines changed

3 files changed

+37
-8
lines changed

backend/apid/routers/health.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ func (r *HealthRouter) health(w http.ResponseWriter, req *http.Request) {
4040
}
4141
ctx := req.Context()
4242
if timeout > 0 {
43-
tctx, cancel := context.WithTimeout(ctx, time.Duration(timeout)*time.Second)
44-
defer cancel()
45-
ctx = tctx
43+
// We're storing the timeout as a value so it can be used by several
44+
// contexts in GetClusterHealth, which is a concurrent gatherer.
45+
ctx = context.WithValue(ctx, "timeout", time.Duration(timeout)*time.Second)
4646
}
4747
clusterHealth := r.controller.GetClusterHealth(ctx)
4848
_ = json.NewEncoder(w).Encode(clusterHealth)

backend/store/etcd/health_store.go

+26-5
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,22 @@ func (s *Store) getHealth(ctx context.Context, id uint64, name string, urls []st
6969
func (s *Store) GetClusterHealth(ctx context.Context, cluster clientv3.Cluster, etcdClientTLSConfig *tls.Config) *corev2.HealthResponse {
7070
healthResponse := &corev2.HealthResponse{}
7171

72+
var timeout time.Duration
73+
if val := ctx.Value("timeout"); val != nil {
74+
timeout, _ = val.(time.Duration)
75+
}
76+
7277
// Do a get op against every cluster member. Collect the memberIDs and
7378
// op errors into a response map, and return this map as etcd health
7479
// information.
75-
mList, err := cluster.MemberList(ctx)
80+
tctx := ctx
81+
if timeout > 0 {
82+
var cancel context.CancelFunc
83+
tctx, cancel = context.WithTimeout(ctx, timeout)
84+
defer cancel()
85+
}
86+
87+
mList, err := cluster.MemberList(tctx)
7688
if err != nil {
7789
logger.WithError(err).Error("could not get the cluster member list")
7890
healthResponse.ClusterHealth = []*corev2.ClusterHealth{&corev2.ClusterHealth{
@@ -96,10 +108,13 @@ func (s *Store) GetClusterHealth(ctx context.Context, cluster clientv3.Cluster,
96108
for _, member := range mList.Members {
97109
go func(id uint64, name string, urls []string) {
98110
defer wg.Done()
99-
select {
100-
case healths <- s.getHealth(ctx, id, name, urls, etcdClientTLSConfig):
101-
case <-ctx.Done():
111+
tctx := ctx
112+
if timeout > 0 {
113+
var cancel context.CancelFunc
114+
tctx, cancel = context.WithTimeout(ctx, timeout)
115+
defer cancel()
102116
}
117+
healths <- s.getHealth(tctx, id, name, urls, etcdClientTLSConfig)
103118
}(member.ID, member.Name, member.ClientURLs)
104119
}
105120

@@ -112,7 +127,13 @@ func (s *Store) GetClusterHealth(ctx context.Context, cluster clientv3.Cluster,
112127
return healthResponse.ClusterHealth[i].Name < healthResponse.ClusterHealth[j].Name
113128
})
114129

115-
alarmResponse, err := s.client.Maintenance.AlarmList(ctx)
130+
if timeout > 0 {
131+
var cancel context.CancelFunc
132+
tctx, cancel = context.WithTimeout(ctx, timeout)
133+
defer cancel()
134+
}
135+
136+
alarmResponse, err := s.client.Maintenance.AlarmList(tctx)
116137
if err != nil {
117138
logger.WithError(err).Error("failed to fetch etcd alarm list")
118139
} else {

backend/store/etcd/health_store_test.go

+8
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"context"
77
"crypto/tls"
88
"testing"
9+
"time"
910

1011
"github.com/coreos/etcd/clientv3"
1112
"github.com/sensu/sensu-go/backend/store"
@@ -18,3 +19,10 @@ func TestGetClusterHealth(t *testing.T) {
1819
assert.Empty(t, healthResult.ClusterHealth[0].Err)
1920
})
2021
}
22+
23+
func TestGetClusterHealthTimeout(t *testing.T) {
24+
testWithEtcdClient(t, func(store store.Store, client *clientv3.Client) {
25+
result := store.GetClusterHealth(context.WithValue(context.Background(), "timeout", time.Nanosecond), client.Cluster, (*tls.Config)(nil))
26+
assert.NotEmpty(t, result.ClusterHealth[0].Err)
27+
})
28+
}

0 commit comments

Comments
 (0)