Refactor tests for cluster health collector

This adds a more robust test for the cluster health collector by abstracting the parsing of a response into a single function. Fixtures have been added for multiple elasticsearch versions to ensure compatibility. This is in preparation for a larger refactoring and will ensure that future changes maintain the desired functionality. Signed-off-by: Joe Adams <github@joeadams.io>
prometheus-community · sysadmind · Jun 15, 2021 · Jun 15, 2021 · f52ecc1b57a189274527e1e5d2d20d7a914e7829
commit f52ecc1b57a189274527e1e5d2d20d7a914e7829
diff --git a/collector/cluster_health.go b/collector/cluster_health.go
@@ -16,6 +16,7 @@ package collector
 import (
 	"encoding/json"
 	"fmt"
+	"io"
 	"io/ioutil"
 	"net/http"
 	"net/url"
@@ -54,9 +55,6 @@ type ClusterHealth struct {
 	client *http.Client
 	url    *url.URL
 
-	up                              prometheus.Gauge
-	totalScrapes, jsonParseFailures prometheus.Counter
-
 	metrics      []*clusterHealthMetric
 	statusMetric *clusterHealthStatusMetric
 }
@@ -70,19 +68,6 @@ func NewClusterHealth(logger log.Logger, client *http.Client, url *url.URL) *Clu
 		client: client,
 		url:    url,
 
-		up: prometheus.NewGauge(prometheus.GaugeOpts{
-			Name: prometheus.BuildFQName(namespace, subsystem, "up"),
-			Help: "Was the last scrape of the ElasticSearch cluster health endpoint successful.",
-		}),
-		totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
-			Name: prometheus.BuildFQName(namespace, subsystem, "total_scrapes"),
-			Help: "Current total ElasticSearch cluster health scrapes.",
-		}),
-		jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
-			Name: prometheus.BuildFQName(namespace, subsystem, "json_parse_failures"),
-			Help: "Number of errors while parsing JSON.",
-		}),
-
 		metrics: []*clusterHealthMetric{
 			{
 				Type: prometheus.GaugeValue,
@@ -229,10 +214,18 @@ func (c *ClusterHealth) Describe(ch chan<- *prometheus.Desc) {
 		ch <- metric.Desc
 	}
 	ch <- c.statusMetric.Desc
+}
 
-	ch <- c.up.Desc()
-	ch <- c.totalScrapes.Desc()
-	ch <- c.jsonParseFailures.Desc()
+func parseClusterHealth(r io.Reader) (clusterHealthResponse, error) {
+	chr := clusterHealthResponse{}
+
+	b, err := ioutil.ReadAll(r)
+	if err != nil {
+		return chr, err
+	}
+
+	err = json.Unmarshal(b, &chr)
+	return chr, err
 }
 
 func (c *ClusterHealth) fetchAndDecodeClusterHealth() (clusterHealthResponse, error) {
@@ -260,40 +253,48 @@ func (c *ClusterHealth) fetchAndDecodeClusterHealth() (clusterHealthResponse, er
 		return chr, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
 	}
 
-	bts, err := ioutil.ReadAll(res.Body)
+	chr, err = parseClusterHealth(res.Body)
+
+	return chr, nil
+}
+
+func (c *ClusterHealth) Update(ch chan<- prometheus.Metric) error {
+	clusterHealthResp, err := c.fetchAndDecodeClusterHealth()
 	if err != nil {
-		c.jsonParseFailures.Inc()
-		return chr, err
+		_ = level.Warn(c.logger).Log(
+			"msg", "failed to fetch and decode cluster health",
+			"err", err,
+		)
+		return err
 	}
 
-	if err := json.Unmarshal(bts, &chr); err != nil {
-		c.jsonParseFailures.Inc()
-		return chr, err
+	for _, metric := range c.metrics {
+		ch <- prometheus.MustNewConstMetric(
+			metric.Desc,
+			metric.Type,
+			metric.Value(clusterHealthResp),
+			clusterHealthResp.ClusterName,
+		)
 	}
 
-	return chr, nil
+	for _, color := range colors {
+		ch <- prometheus.MustNewConstMetric(
+			c.statusMetric.Desc,
+			c.statusMetric.Type,
+			c.statusMetric.Value(clusterHealthResp, color),
+			clusterHealthResp.ClusterName, color,
+		)
+	}
+
+	return nil
 }
 
 // Collect collects ClusterHealth metrics.
 func (c *ClusterHealth) Collect(ch chan<- prometheus.Metric) {
-	var err error
-	c.totalScrapes.Inc()
-	defer func() {
-		ch <- c.up
-		ch <- c.totalScrapes
-		ch <- c.jsonParseFailures
-	}()
-
 	clusterHealthResp, err := c.fetchAndDecodeClusterHealth()
 	if err != nil {
-		c.up.Set(0)
-		_ = level.Warn(c.logger).Log(
-			"msg", "failed to fetch and decode cluster health",
-			"err", err,
-		)
 		return
 	}
-	c.up.Set(1)
 
 	for _, metric := range c.metrics {
 		ch <- prometheus.MustNewConstMetric(

diff --git a/collector/cluster_health_test.go b/collector/cluster_health_test.go
@@ -18,6 +18,8 @@ import (
 	"net/http"
 	"net/http/httptest"
 	"net/url"
+	"os"
+	"reflect"
 	"testing"
 
 	"github.com/go-kit/kit/log"
@@ -71,3 +73,36 @@ func TestClusterHealth(t *testing.T) {
 		}
 	}
 }
+
+func Test_parseClusterHealth(t *testing.T) {
+	desired := clusterHealthResponse{
+		ClusterName: "elasticsearch",
+		Status:      "yellow",
+
+		NumberOfNodes:               1,
+		NumberOfDataNodes:           1,
+		ActivePrimaryShards:         30,
+		ActiveShards:                30,
+		UnassignedShards:            30,
+		TaskMaxWaitingInQueueMillis: 12,
+		ActiveShardsPercentAsNumber: 50,
+	}
+	for _, ver := range testElasticsearchVersions {
+		t.Run(fmt.Sprintf("version %s", ver), func(t *testing.T) {
+			file, err := os.Open(fmt.Sprintf("../fixtures/clusterhealth/%s.json", ver))
+			if err != nil {
+				t.Fatalf("failed to open fixture file: %v", err)
+			}
+			defer file.Close()
+
+			got, err := parseClusterHealth(file)
+			if err != nil {
+				t.Errorf("parseClusterHealth() error = %v", err)
+				return
+			}
+			if !reflect.DeepEqual(got, desired) {
+				t.Errorf("parseClusterHealth() = %v, want %v", got, desired)
+			}
+		})
+	}
+}
diff --git a/fixtures/clusterhealth/5.4.2.json b/fixtures/clusterhealth/5.4.2.json
@@ -0,0 +1,17 @@
+{
+  "cluster_name": "elasticsearch",
+  "status": "yellow",
+  "timed_out": false,
+  "number_of_nodes": 1,
+  "number_of_data_nodes": 1,
+  "active_primary_shards": 30,
+  "active_shards": 30,
+  "relocating_shards": 0,
+  "initializing_shards": 0,
+  "unassigned_shards": 30,
+  "delayed_unassigned_shards": 0,
+  "number_of_pending_tasks": 0,
+  "number_of_in_flight_fetch": 0,
+  "task_max_waiting_in_queue_millis": 12,
+  "active_shards_percent_as_number": 50
+}
diff --git a/fixtures/clusterhealth/5.6.16.json b/fixtures/clusterhealth/5.6.16.json
@@ -0,0 +1,17 @@
+{
+  "cluster_name": "elasticsearch",
+  "status": "yellow",
+  "timed_out": false,
+  "number_of_nodes": 1,
+  "number_of_data_nodes": 1,
+  "active_primary_shards": 30,
+  "active_shards": 30,
+  "relocating_shards": 0,
+  "initializing_shards": 0,
+  "unassigned_shards": 30,
+  "delayed_unassigned_shards": 0,
+  "number_of_pending_tasks": 0,
+  "number_of_in_flight_fetch": 0,
+  "task_max_waiting_in_queue_millis": 12,
+  "active_shards_percent_as_number": 50
+}
diff --git a/fixtures/clusterhealth/6.5.4.json b/fixtures/clusterhealth/6.5.4.json
@@ -0,0 +1,17 @@
+{
+  "cluster_name": "elasticsearch",
+  "status": "yellow",
+  "timed_out": false,
+  "number_of_nodes": 1,
+  "number_of_data_nodes": 1,
+  "active_primary_shards": 30,
+  "active_shards": 30,
+  "relocating_shards": 0,
+  "initializing_shards": 0,
+  "unassigned_shards": 30,
+  "delayed_unassigned_shards": 0,
+  "number_of_pending_tasks": 0,
+  "number_of_in_flight_fetch": 0,
+  "task_max_waiting_in_queue_millis": 12,
+  "active_shards_percent_as_number": 50
+}
diff --git a/fixtures/clusterhealth/6.8.8.json b/fixtures/clusterhealth/6.8.8.json
@@ -0,0 +1,17 @@
+{
+  "cluster_name": "elasticsearch",
+  "status": "yellow",
+  "timed_out": false,
+  "number_of_nodes": 1,
+  "number_of_data_nodes": 1,
+  "active_primary_shards": 30,
+  "active_shards": 30,
+  "relocating_shards": 0,
+  "initializing_shards": 0,
+  "unassigned_shards": 30,
+  "delayed_unassigned_shards": 0,
+  "number_of_pending_tasks": 0,
+  "number_of_in_flight_fetch": 0,
+  "task_max_waiting_in_queue_millis": 12,
+  "active_shards_percent_as_number": 50
+}
diff --git a/fixtures/clusterhealth/7.13.1.json b/fixtures/clusterhealth/7.13.1.json
@@ -0,0 +1,17 @@
+{
+  "cluster_name": "elasticsearch",
+  "status": "yellow",
+  "timed_out": false,
+  "number_of_nodes": 1,
+  "number_of_data_nodes": 1,
+  "active_primary_shards": 30,
+  "active_shards": 30,
+  "relocating_shards": 0,
+  "initializing_shards": 0,
+  "unassigned_shards": 30,
+  "delayed_unassigned_shards": 0,
+  "number_of_pending_tasks": 0,
+  "number_of_in_flight_fetch": 0,
+  "task_max_waiting_in_queue_millis": 12,
+  "active_shards_percent_as_number": 50
+}
diff --git a/fixtures/clusterhealth/7.3.0.json b/fixtures/clusterhealth/7.3.0.json
@@ -0,0 +1,17 @@
+{
+  "cluster_name": "elasticsearch",
+  "status": "yellow",
+  "timed_out": false,
+  "number_of_nodes": 1,
+  "number_of_data_nodes": 1,
+  "active_primary_shards": 30,
+  "active_shards": 30,
+  "relocating_shards": 0,
+  "initializing_shards": 0,
+  "unassigned_shards": 30,
+  "delayed_unassigned_shards": 0,
+  "number_of_pending_tasks": 0,
+  "number_of_in_flight_fetch": 0,
+  "task_max_waiting_in_queue_millis": 12,
+  "active_shards_percent_as_number": 50
+}
diff --git a/fixtures/clusterhealth/7.6.2.json b/fixtures/clusterhealth/7.6.2.json
@@ -0,0 +1,17 @@
+{
+  "cluster_name": "elasticsearch",
+  "status": "yellow",
+  "timed_out": false,
+  "number_of_nodes": 1,
+  "number_of_data_nodes": 1,
+  "active_primary_shards": 30,
+  "active_shards": 30,
+  "relocating_shards": 0,
+  "initializing_shards": 0,
+  "unassigned_shards": 30,
+  "delayed_unassigned_shards": 0,
+  "number_of_pending_tasks": 0,
+  "number_of_in_flight_fetch": 0,
+  "task_max_waiting_in_queue_millis": 12,
+  "active_shards_percent_as_number": 50
+}