From 5a9b293ac12ff294c184d23bdca8823eb32e44a0 Mon Sep 17 00:00:00 2001 From: Spencer Amann Date: Tue, 11 Jul 2023 08:56:51 -0400 Subject: [PATCH] check ping if healthz returns non 200 --- pkg/monitor/cluster/cluster.go | 7 ++++++- pkg/monitor/cluster/healthz.go | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/pkg/monitor/cluster/cluster.go b/pkg/monitor/cluster/cluster.go index b4e934bcf53..806d666b129 100644 --- a/pkg/monitor/cluster/cluster.go +++ b/pkg/monitor/cluster/cluster.go @@ -137,7 +137,7 @@ func (mon *Monitor) Monitor(ctx context.Context) (errs []error) { }) } - // If API is not returning 200, don't need to run the next checks + // If API is not returning 200, fallback to checking ping and short circuit the rest of the checks statusCode, err := mon.emitAPIServerHealthzCode(ctx) if err != nil { errs = append(errs, err) @@ -146,6 +146,11 @@ func (mon *Monitor) Monitor(ctx context.Context) (errs []error) { mon.emitGauge("monitor.clustererrors", 1, map[string]string{"monitor": friendlyFuncName}) } if statusCode != http.StatusOK { + err := mon.emitAPIServerPingCode(ctx) + if err != nil { + errs = append(errs, err) + mon.log.Printf("%s: %s", steps.FriendlyName(mon.emitAPIServerPingCode), err) + } return } for _, f := range []func(context.Context) error{ diff --git a/pkg/monitor/cluster/healthz.go b/pkg/monitor/cluster/healthz.go index 2cd271aa23a..436f063c95e 100644 --- a/pkg/monitor/cluster/healthz.go +++ b/pkg/monitor/cluster/healthz.go @@ -23,3 +23,19 @@ func (mon *Monitor) emitAPIServerHealthzCode(ctx context.Context) (int, error) { return statusCode, err } + +func (mon *Monitor) emitAPIServerPingCode(ctx context.Context) error { + var statusCode int + err := mon.cli.Discovery().RESTClient(). + Get(). + AbsPath("/healthz/ping"). + Do(ctx). + StatusCode(&statusCode). + Error() + + mon.emitGauge("apiserver.healthz.ping.code", 1, map[string]string{ + "code": strconv.FormatInt(int64(statusCode), 10), + }) + + return err +}