From f89a6816a873ebeeeb8e6bb3095330e5c006cb00 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Wed, 8 May 2024 13:06:03 +0100 Subject: [PATCH 1/2] Scrape more metrics from more places Signed-off-by: Michel Hollands --- .../templates/agent/_helpers-agent.tpl | 9 ++++ .../templates/agent/config.yaml | 4 +- charts/meta-monitoring/values.yaml | 44 +++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/charts/meta-monitoring/templates/agent/_helpers-agent.tpl b/charts/meta-monitoring/templates/agent/_helpers-agent.tpl index d8d6642..48d84ab 100644 --- a/charts/meta-monitoring/templates/agent/_helpers-agent.tpl +++ b/charts/meta-monitoring/templates/agent/_helpers-agent.tpl @@ -9,6 +9,15 @@ {{- define "agent.all_namespaces" -}} {{- $list := list }} {{- range .Values.namespacesToMonitor }} +{{- $list = append $list (printf "\"%s\"" .) }} +{{- end }} +{{- $list = append $list (printf "\"%s\"" .Release.Namespace) }} +{{- join ", " $list }} +{{- end }} + +{{- define "agent.all_namespaces_bar" -}} +{{- $list := list }} +{{- range .Values.namespacesToMonitor }} {{- $list = append $list (printf "%s" .) }} {{- end }} {{- $list = append $list .Release.Namespace }} diff --git a/charts/meta-monitoring/templates/agent/config.yaml b/charts/meta-monitoring/templates/agent/config.yaml index 55e6963..b356c95 100644 --- a/charts/meta-monitoring/templates/agent/config.yaml +++ b/charts/meta-monitoring/templates/agent/config.yaml @@ -93,7 +93,7 @@ data: role = "pod" namespaces { own_namespace = true - names = [ {{ include "agent.namespaces" . }} ] + names = [ {{ include "agent.all_namespaces" . }} ] } } @@ -143,7 +143,7 @@ data: rule { source_labels = ["namespace"] - regex = "{{ include "agent.all_namespaces" . }}" + regex = "{{ include "agent.all_namespaces_bar" . }}" action = "keep" } diff --git a/charts/meta-monitoring/values.yaml b/charts/meta-monitoring/values.yaml index 476cbb6..f100220 100644 --- a/charts/meta-monitoring/values.yaml +++ b/charts/meta-monitoring/values.yaml @@ -75,9 +75,12 @@ logs: metrics: # The list of metrics to retain for logging dashboards retain: + - agent_build_info - agent_config_last_load_success_timestamp_seconds - agent_config_last_load_successful - agent_config_load_failures_total + - agent_wal_samples_appended_total + - agent_wal_storage_active_series - container_cpu_usage_seconds_total - container_fs_writes_bytes_total - container_memory_working_set_bytes @@ -94,6 +97,7 @@ metrics: - cortex_prometheus_rule_group_last_evaluation_timestamp_seconds - cortex_prometheus_rule_group_iterations_missed_total - go_gc_duration_seconds + - go_gc_duration_seconds_count - go_goroutines - go_memstats_heap_inuse_bytes - kubelet_volume_stats_used_bytes @@ -130,6 +134,7 @@ metrics: - loki_compactor_oldest_pending_delete_request_age_seconds - loki_compactor_pending_delete_requests_count - loki_discarded_samples_total + - loki_discarded_bytes_total - loki_distributor_bytes_received_total - loki_distributor_lines_received_total - loki_distributor_structured_metadata_bytes_received_total @@ -169,7 +174,46 @@ metrics: - loki_write_sent_entries_total - node_disk_read_bytes_total - node_disk_written_bytes_total + - process_start_time_seconds + - prometheus_remote_storage_enqueue_retries_total + - prometheus_remote_storage_highest_timestamp_in_seconds + - prometheus_remote_storage_queue_highest_sent_timestamp_seconds + - prometheus_remote_storage_samples_dropped_total + - prometheus_remote_storage_samples_failed_total + - prometheus_remote_storage_samples_pending + - prometheus_remote_storage_samples_retried_total + - prometheus_remote_storage_samples_total + - prometheus_remote_storage_sent_batch_duration_seconds_bucket + - prometheus_remote_storage_sent_batch_duration_seconds_count + - prometheus_remote_storage_sent_batch_duration_seconds_sum + - prometheus_remote_storage_shard_capacity + - prometheus_remote_storage_shards + - prometheus_remote_storage_shards_desired + - prometheus_remote_storage_shards_max + - prometheus_remote_storage_shards_min + - prometheus_remote_storage_succeeded_samples_total + - prometheus_sd_discovered_targets + - prometheus_target_interval_length_seconds_count + - prometheus_target_interval_length_seconds_sum + - prometheus_target_scrapes_exceeded_sample_limit_total + - prometheus_target_scrapes_sample_duplicate_timestamp_total + - prometheus_target_scrapes_sample_out_of_bounds_total + - prometheus_target_scrapes_sample_out_of_order_total + - prometheus_target_sync_length_seconds_sum + - prometheus_wal_watcher_current_segment - promtail_custom_bad_words_total + - promtail_dropped_bytes_total + - promtail_files_active_total + - promtail_read_bytes_total + - promtail_read_lines_total + - promtail_request_duration_seconds_bucket + - promtail_sent_entries_total + - traces_exporter_sent_spans + - traces_exporter_send_failed_spans + - traces_loadbalancer_backend_outcome + - traces_loadbalancer_num_backends + - traces_receiver_accepted_spans + - traces_receiver_refused_spans # Additional metrics to retain extraMetrics: [] # Set enabled = true to add the default logs dashboards to the local Grafana From b99d816057ffa43ee96ea95d3b4d7909e4a6c6d3 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Wed, 8 May 2024 15:59:22 +0100 Subject: [PATCH 2/2] Add Alloy dashboards and metrics Signed-off-by: Michel Hollands --- .../src/dashboards/agent-logs-pipeline.json | 1082 ------------ .../src/dashboards/agent-operational.json | 1189 ------------- .../src/dashboards/agent-remote-write.json | 1512 ----------------- .../dashboards/agent-tracing-pipeline.json | 1065 ------------ .../meta-monitoring/src/dashboards/agent.json | 786 --------- .../src/dashboards/alloy-cluster-node.json | 1171 +++++++++++++ .../dashboards/alloy-cluster-overview.json | 540 ++++++ .../src/dashboards/alloy-controller.json | 970 +++++++++++ .../src/dashboards/alloy-opentelemetry.json | 923 ++++++++++ .../src/dashboards/alloy-prometheus.json | 1337 +++++++++++++++ .../src/dashboards/alloy-resources.json | 840 +++++++++ .../templates/grafana/agent-dashboards-1.yaml | 19 - .../templates/grafana/alloy-dashboards-1.yaml | 21 + .../templates/grafana/dashboard.yaml | 6 +- .../templates/grafana/grafana-deployment.yaml | 8 +- .../templates/grafana/loki-dashboards-1.yaml | 2 - charts/meta-monitoring/values.yaml | 60 +- 17 files changed, 5863 insertions(+), 5668 deletions(-) delete mode 100644 charts/meta-monitoring/src/dashboards/agent-logs-pipeline.json delete mode 100644 charts/meta-monitoring/src/dashboards/agent-operational.json delete mode 100644 charts/meta-monitoring/src/dashboards/agent-remote-write.json delete mode 100644 charts/meta-monitoring/src/dashboards/agent-tracing-pipeline.json delete mode 100644 charts/meta-monitoring/src/dashboards/agent.json create mode 100644 charts/meta-monitoring/src/dashboards/alloy-cluster-node.json create mode 100644 charts/meta-monitoring/src/dashboards/alloy-cluster-overview.json create mode 100644 charts/meta-monitoring/src/dashboards/alloy-controller.json create mode 100644 charts/meta-monitoring/src/dashboards/alloy-opentelemetry.json create mode 100644 charts/meta-monitoring/src/dashboards/alloy-prometheus.json create mode 100644 charts/meta-monitoring/src/dashboards/alloy-resources.json delete mode 100644 charts/meta-monitoring/templates/grafana/agent-dashboards-1.yaml create mode 100644 charts/meta-monitoring/templates/grafana/alloy-dashboards-1.yaml diff --git a/charts/meta-monitoring/src/dashboards/agent-logs-pipeline.json b/charts/meta-monitoring/src/dashboards/agent-logs-pipeline.json deleted file mode 100644 index 305136a..0000000 --- a/charts/meta-monitoring/src/dashboards/agent-logs-pipeline.json +++ /dev/null @@ -1,1082 +0,0 @@ -{ - "__inputs": [ ], - "__requires": [ ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ ], - "refresh": "30s", - "rows": [ - { - "collapse": false, - "collapsed": false, - "height": 500, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 2, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by($groupBy) (rate(promtail_dropped_bytes_total{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{$groupBy}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Dropped bytes rate [B/s]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { }, - "id": 3, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by($groupBy) (rate(promtail_request_duration_seconds_bucket{status_code=~\"2..\", cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n/\nsum by($groupBy) (rate(promtail_request_duration_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n* 100\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{$groupBy}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Write requests success rate [%]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "%", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "%", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Errors", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "height": 500, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { }, - "id": 4, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(\n 0.990000, \n sum by (le, $groupBy)\n (rate(promtail_request_duration_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{$groupBy}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Write latencies p99 [s]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { }, - "id": 5, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(\n 0.900000, \n sum by (le, $groupBy)\n (rate(promtail_request_duration_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{$groupBy}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Write latencies p90 [s]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { }, - "id": 6, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(\n 0.500000, \n sum by (le, $groupBy)\n (rate(promtail_request_duration_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{$groupBy}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Write latencies p50 [s]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { }, - "id": 7, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "(sum by (le, $groupBy) (rate(promtail_request_duration_seconds_sum{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval])))\n/\n(sum by (le, $groupBy) (rate(promtail_request_duration_seconds_count{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval])))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{$groupBy}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Write latencies average [s]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Latencies", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "height": 500, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 8, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by($groupBy) (rate(promtail_read_bytes_total{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{$groupBy}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes read rate [B/s]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 9, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by($groupBy) (rate(promtail_read_lines_total{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{$groupBy}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Lines read rate [lines/s]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 10, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by($groupBy) (promtail_files_active_total{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"})\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{$groupBy}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Active files count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 11, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by($groupBy) (rate(promtail_sent_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval]))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{$groupBy}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Entries sent rate [entries/s]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Logs volume", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "grafana-agent-mixin" - ], - "templating": { - "list": [ - { - "hide": 0, - "label": null, - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(agent_build_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(agent_build_info, namespace)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "container", - "options": [ ], - "query": "label_values(agent_build_info, container)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "pod", - "options": [ ], - "query": "label_values(agent_build_info{container=~\"$container\"}, pod)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "pod", - "value": "pod" - }, - "hide": 0, - "includeAll": false, - "label": "", - "multi": false, - "name": "groupBy", - "options": [ - { - "text": "pod", - "value": "pod" - }, - { - "text": "cluster", - "value": "cluster" - }, - { - "text": "namespace", - "value": "namespace" - } - ], - "query": "pod,cluster,namespace", - "refresh": 0, - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Agent Logs Pipeline", - "version": 0 -} diff --git a/charts/meta-monitoring/src/dashboards/agent-operational.json b/charts/meta-monitoring/src/dashboards/agent-operational.json deleted file mode 100644 index 6cc2a65..0000000 --- a/charts/meta-monitoring/src/dashboards/agent-operational.json +++ /dev/null @@ -1,1189 +0,0 @@ -{ - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ ], - "refresh": "30s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(go_gc_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "GCs [count/s]", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Go Heap In Use", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "go_goroutines{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Goroutines", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage [time/s]", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Working Set Size", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(promtail_custom_bad_words_total{cluster=~\"$cluster\", exported_namespace=~\"$namespace\", exported_job=~\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{job}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Promtail Bad Words", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "General", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Received Bytes [B/s]", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Transmitted Bytes [B/s]", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "(sum by (pod) (avg_over_time(go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}[1m])))\n/\n(sum by (pod) (agent_wal_storage_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Heap Used per Series per Pod", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "(sum by (container) (avg_over_time(go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}[1m])))\n/\n(sum by (container) (agent_wal_storage_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"}))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{container}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Avg Heap Used per Series", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (pod) (agent_wal_storage_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Series Count per Pod", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (instance_group_name) (agent_wal_storage_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance_group_name}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Series per Config", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (container) (agent_wal_storage_active_series{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\", pod=~\"$pod\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{container}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Total Series", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Prometheus Read", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "grafana-agent-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data Source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(agent_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(agent_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "container", - "multi": true, - "name": "container", - "options": [ ], - "query": "label_values(agent_build_info{cluster=~\"$cluster\", namespace=\"$namespace\"}, container)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "pod", - "multi": true, - "name": "pod", - "options": [ ], - "query": "label_values(agent_build_info{cluster=~\"$cluster\", namespace=\"$namespace\", container=\"$container\"}, pod)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Agent Operational", - "uid": "", - "version": 0 -} diff --git a/charts/meta-monitoring/src/dashboards/agent-remote-write.json b/charts/meta-monitoring/src/dashboards/agent-remote-write.json deleted file mode 100644 index f222774..0000000 --- a/charts/meta-monitoring/src/dashboards/agent-remote-write.json +++ /dev/null @@ -1,1512 +0,0 @@ -{ - "__inputs": [ ], - "__requires": [ ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ ], - "refresh": "30s", - "rows": [ - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 2, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}\n -\n ignoring(url, remote_name) group_right(pod)\n prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Highest Timestamp In vs. Highest Timestamp Sent", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 3, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]) / rate(prometheus_remote_storage_sent_batch_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "mean {{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.99, rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "p99 {{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Latency [1m]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Timestamps", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 4, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(agent_wal_samples_appended_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Rate in [5m]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 5, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Rate succeeded [5m]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 6, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Pending Samples", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 7, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Dropped Samples", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 8, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Failed Samples", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 9, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Retried Samples", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Samples", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 10, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "minSpan": 6, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Current Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 11, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Max Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 12, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Min Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 13, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Desired Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Shards", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 14, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Shard Capacity", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Shard Details", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 15, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Remote Write Current Segment", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Segments", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 16, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{pod}}-{{instance_group_name}}-{{url}}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Enqueue Retries", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Misc. Rates", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "grafana-agent-mixin" - ], - "templating": { - "list": [ - { - "hide": 0, - "label": null, - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(agent_build_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(agent_build_info, namespace)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "container", - "options": [ ], - "query": "label_values(agent_build_info, container)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "pod", - "options": [ ], - "query": "label_values(agent_build_info{container=~\"$container\"}, pod)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "url", - "options": [ ], - "query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", pod=~\"$pod\"}, url)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Agent Prometheus Remote Write", - "version": 0 -} diff --git a/charts/meta-monitoring/src/dashboards/agent-tracing-pipeline.json b/charts/meta-monitoring/src/dashboards/agent-tracing-pipeline.json deleted file mode 100644 index 3e9415b..0000000 --- a/charts/meta-monitoring/src/dashboards/agent-tracing-pipeline.json +++ /dev/null @@ -1,1065 +0,0 @@ -{ - "__inputs": [ ], - "__requires": [ ], - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ ], - "refresh": "30s", - "rows": [ - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { }, - "id": 2, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(traces_receiver_accepted_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",receiver!=\"otlp/lb\"}[$__rate_interval])\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ pod }} - {{ receiver }}/{{ transport }}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Accepted spans", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { }, - "id": 3, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(traces_receiver_refused_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",receiver!=\"otlp/lb\"}[$__rate_interval])\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ pod }} - {{ receiver }}/{{ transport }}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Refused spans", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { }, - "id": 4, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(traces_exporter_sent_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",exporter!=\"otlp\"}[$__rate_interval])\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ pod }} - {{ exporter }}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Exported spans", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { }, - "id": 5, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(traces_exporter_send_failed_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",exporter!=\"otlp\"}[$__rate_interval])\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ pod }} - {{ exporter }}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Exported failed spans", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 6, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(traces_receiver_accepted_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",receiver!=\"otlp/lb\"}[$__rate_interval]))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Accepted", - "refId": "A" - }, - { - "expr": "sum(rate(traces_receiver_refused_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",receiver!=\"otlp/lb\"}[$__rate_interval]))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Refused", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Received spans", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 7, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(traces_exporter_sent_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",exporter!=\"otlp\"}[$__rate_interval]))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Sent", - "refId": "A" - }, - { - "expr": "sum(rate(traces_exporter_send_failed_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",exporter!=\"otlp\"}[$__rate_interval]))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Send failed", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Exported spans", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Write / Read", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 8, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "rate(traces_loadbalancer_backend_outcome{cluster=~\"$cluster\",namespace=~\"$namespace\",success=\"true\",container=~\"$container\",pod=~\"$pod\"}[$__rate_interval])\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ pod }}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Load-balanced spans", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "fillGradient": 0, - "gridPos": { }, - "id": 9, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "traces_loadbalancer_num_backends{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ pod }}", - "refId": "A" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Number of peers", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 10, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(traces_receiver_accepted_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",receiver=\"otlp/lb\"}[$__rate_interval]))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Accepted", - "refId": "A" - }, - { - "expr": "sum(rate(traces_receiver_refused_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",receiver=\"otlp/lb\"}[$__rate_interval]))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Refused", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Received spans", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { }, - "id": 11, - "interval": "1m", - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(traces_exporter_sent_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",exporter=\"otlp\"}[$__rate_interval]))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Sent", - "refId": "A" - }, - { - "expr": "sum(rate(traces_exporter_send_failed_spans{cluster=~\"$cluster\",namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\",exporter=\"otlp\"}[$__rate_interval]))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Send failed", - "refId": "B" - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Exported spans", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Load balancing", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "grafana-agent-mixin" - ], - "templating": { - "list": [ - { - "hide": 0, - "label": null, - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(agent_build_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(agent_build_info, namespace)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "container", - "options": [ ], - "query": "label_values(agent_build_info, container)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "pod", - "options": [ ], - "query": "label_values(agent_build_info{container=~\"$container\"}, pod)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Agent Tracing Pipeline", - "version": 0 -} diff --git a/charts/meta-monitoring/src/dashboards/agent.json b/charts/meta-monitoring/src/dashboards/agent.json deleted file mode 100644 index 768fccb..0000000 --- a/charts/meta-monitoring/src/dashboards/agent.json +++ /dev/null @@ -1,786 +0,0 @@ -{ - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ ], - "refresh": "30s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Count", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Uptime", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Container", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "container", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "pod", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "Version", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTargetBlank": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "version", - "thresholds": [ ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "count by (pod, container, version) (agent_build_info{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"})", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "max by (pod, container) (time() - process_start_time_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"})", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Agent Stats", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "transform": "table", - "type": "table", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Agent Stats", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])) by (pod, scrape_job) * 1e3", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}/{{scrape_job}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Target Sync", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (pod) (prometheus_sd_discovered_targets{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Targets", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Prometheus Discovery", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_target_interval_length_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])\n/\nrate(prometheus_target_interval_length_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m])\n* 1e3\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}} {{interval}} configured", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Average Scrape Interval Duration", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "exceeded sample limit: {{job}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "duplicate timestamp: {{job}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "out of bounds: {{job}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "out of order: {{job}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Scrape failures", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ ], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (job, instance_group_name) (rate(agent_wal_samples_appended_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"$container\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{job}} {{instance_group_name}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Appended Samples", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Prometheus Retrieval", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "grafana-agent-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data Source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "cluster", - "multi": true, - "name": "cluster", - "options": [ ], - "query": "label_values(agent_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "namespace", - "multi": true, - "name": "namespace", - "options": [ ], - "query": "label_values(agent_build_info, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "container", - "multi": true, - "name": "container", - "options": [ ], - "query": "label_values(agent_build_info, container)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "grafana-agent-.*", - "current": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "pod", - "multi": true, - "name": "pod", - "options": [ ], - "query": "label_values(agent_build_info{container=~\"$container\"}, pod)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Agent", - "uid": "", - "version": 0 -} diff --git a/charts/meta-monitoring/src/dashboards/alloy-cluster-node.json b/charts/meta-monitoring/src/dashboards/alloy-cluster-node.json new file mode 100644 index 0000000..5423937 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/alloy-cluster-node.json @@ -0,0 +1,1171 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }, + { + "datasource": "$loki_datasource", + "enable": true, + "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"alloy\" | name_extracted=~\"alloy.*\"", + "iconColor": "rgba(0, 211, 255, 1)", + "instant": false, + "name": "Deployments", + "titleFormat": "{{cluster}}/{{namespace}}" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 24, + "links": [ + { + "icon": "doc", + "targetBlank": true, + "title": "Documentation", + "tooltip": "Clustering documentation", + "type": "link", + "url": "https://grafana.com/docs/alloy/latest/reference/cli/run/#clustered-mode" + }, + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "alloy-mixin" + ], + "targetBlank": false, + "title": "Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "title": "Node Info", + "type": "row" + }, + { + "datasource": "${datasource}", + "description": "Information about a specific cluster node.\n\n* Lamport clock time: The observed Lamport time on the specific node's clock used to provide partial ordering around gossip messages. Nodes should ideally be observing roughly the same time, meaning they are up-to-date on the cluster state. If a node is falling behind, it means that it has not recently processed the same number of messages and may have an outdated view of its peers.\n\n* Internal cluster state observers: The number of Observer functions that are registered to run whenever the node detects a cluster change.\n\n* Gossip health score: A health score assigned to this node by the memberlist implementation. The lower, the better.\n\n* Gossip protocol version: The protocol version used by nodes to communicate with one another. It should match across all nodes.\n", + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(cluster_node_lamport_time{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "Lamport clock time" + }, + { + "datasource": "${datasource}", + "expr": "sum(cluster_node_update_observers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "Internal cluster state observers" + }, + { + "datasource": "${datasource}", + "expr": "sum(cluster_node_gossip_health_score{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "Gossip health score" + }, + { + "datasource": "${datasource}", + "expr": "sum(cluster_node_gossip_proto_version{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "Gossip protocol version" + } + ], + "title": "Node Info", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "Value #(.*)", + "renamePattern": "$1" + } + }, + { + "id": "reduce", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Field": "Metric", + "Max": "Value" + } + } + } + ], + "type": "table" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(cluster_node_gossip_received_events_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", + "instant": false, + "legendFormat": "{{event}}", + "range": true, + "refId": "A" + } + ], + "title": "Gossip ops/s", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Known peers to the node (including the local node).\n", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "suffix:peers" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"})", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Known peers", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "Known peers to the node by state (including the local node).\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "suffix:nodes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "cluster_node_peers{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", + "instant": false, + "legendFormat": "{{state}}", + "range": true, + "refId": "A" + } + ], + "title": "Peers by state", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 6, + "title": "Gossip Transport", + "type": "row" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": true, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 18 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(cluster_transport_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", + "instant": false, + "legendFormat": "rx", + "range": true, + "refId": "A" + }, + { + "datasource": "${datasource}", + "expr": "-1 * rate(cluster_transport_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", + "instant": false, + "legendFormat": "tx", + "range": true, + "refId": "B" + } + ], + "title": "Transport bandwidth", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 18 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "1 - (\nrate(cluster_transport_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\nrate(cluster_transport_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n)\n", + "instant": false, + "legendFormat": "Tx success %", + "range": true, + "refId": "A" + }, + { + "datasource": "${datasource}", + "expr": "1 - (\n rate(cluster_transport_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", + "instant": false, + "legendFormat": "Rx success %", + "range": true, + "refId": "B" + } + ], + "title": "Packet write success rate", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "The number of packets enqueued currently to be decoded or encoded and sent during communication with other nodes.\n\nThe incoming and outgoing packet queue should be as empty as possible; a growing queue means that Alloy cannot keep up with the number of messages required to have all nodes informed of cluster changes, and the nodes may not converge in a timely fashion.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pkts" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 18 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "cluster_transport_tx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", + "instant": false, + "legendFormat": "tx queue", + "range": true, + "refId": "A" + }, + { + "datasource": "${datasource}", + "expr": "cluster_transport_rx_packet_queue_length{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", + "instant": false, + "legendFormat": "rx queue", + "range": true, + "refId": "B" + } + ], + "title": "Pending packet queue", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": true, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 26 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(cluster_transport_stream_rx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", + "instant": false, + "legendFormat": "rx", + "range": true, + "refId": "A" + }, + { + "datasource": "${datasource}", + "expr": "-1 * rate(cluster_transport_stream_tx_bytes_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])", + "instant": false, + "legendFormat": "tx", + "range": true, + "refId": "B" + } + ], + "title": "Stream bandwidth", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 26 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "1 - (\n rate(cluster_transport_stream_tx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_tx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", + "instant": false, + "legendFormat": "Tx success %", + "range": true, + "refId": "A" + }, + { + "datasource": "${datasource}", + "expr": "1 - (\n rate(cluster_transport_stream_rx_packets_failed_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) /\n rate(cluster_transport_stream_rx_packets_total{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n )\n", + "instant": false, + "legendFormat": "Rx success %", + "range": true, + "refId": "B" + } + ], + "title": "Stream write success rate", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "The number of open connections from this node to its peers.\n\nEach node picks up a subset of its peers to continuously gossip messages around cluster status using streaming HTTP/2 connections. This panel can be used to detect networking failures that result in cluster communication being disrupted and convergence taking longer than expected or outright failing.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 26 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "cluster_transport_streams{instance=\"$instance\", cluster=\"$cluster\", namespace=\"$namespace\"}", + "instant": false, + "legendFormat": "Open streams", + "range": true, + "refId": "A" + } + ], + "title": "Open transport streams", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "alloy-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Mimir", + "value": "mimir_ds" + }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "Loki", + "value": "loki_ds" + }, + "hide": 0, + "includeAll": false, + "label": "Loki Data Source", + "multi": false, + "name": "loki_datasource", + "options": [], + "query": "loki", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "datasource" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components, cluster)\n", + "refId": "cluster" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", + "refId": "namespace" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "instance", + "multi": false, + "name": "instance", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", + "refId": "instance" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d", + "90d" + ] + }, + "timezone": "", + "title": "Alloy / Cluster Node", + "uid": "4047e755d822da63c8158cde32ae4dce", + "version": 0, + "weekStart": "" + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/alloy-cluster-overview.json b/charts/meta-monitoring/src/dashboards/alloy-cluster-overview.json new file mode 100644 index 0000000..a4f0bbe --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/alloy-cluster-overview.json @@ -0,0 +1,540 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }, + { + "datasource": "$loki_datasource", + "enable": true, + "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"alloy\" | name_extracted=~\"alloy.*\"", + "iconColor": "rgba(0, 211, 255, 1)", + "instant": false, + "name": "Deployments", + "titleFormat": "{{cluster}}/{{namespace}}" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 27, + "links": [ + { + "icon": "doc", + "targetBlank": true, + "title": "Documentation", + "tooltip": "Clustering documentation", + "type": "link", + "url": "https://grafana.com/docs/alloy/latest/reference/cli/run/#clustered-mode" + }, + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "alloy-mixin" + ], + "targetBlank": false, + "title": "Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": "${datasource}", + "expr": "count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Nodes", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "Nodes info.\n", + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Dashboard" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "1": { + "index": 0, + "text": "Link" + } + }, + "type": "value" + } + ] + }, + { + "id": "links", + "value": [ + { + "targetBlank": false, + "title": "Detail dashboard for node", + "url": "/d/4047e755d822da63c8158cde32ae4dce/alloy-cluster-node?var-instance=${__data.fields.instance}&var-datasource=${datasource}&var-loki_datasource=${loki_datasource}&var-cluster=${cluster}&var-namespace=${namespace}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 16, + "x": 8, + "y": 0 + }, + "id": 2, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": "${datasource}", + "expr": "cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Node table", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": false, + "__name__": true, + "cluster": true, + "namespace": true, + "state": false + }, + "indexByName": {}, + "renameByName": { + "Value": "Dashboard", + "instance": "", + "state": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": "${datasource}", + "description": "Whether the cluster state has converged.\n\nIt is normal for the cluster state to be diverged briefly as gossip events propagate. It is not normal for the cluster state to be diverged for a long period of time.\n\nThis will show one of the following:\n\n* Converged: Nodes are aware of all other nodes, with the correct states.\n* Not converged: A subset of nodes aren't aware of their peers, or don't have an updated view of peer states.\n", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "1": { + "color": "red", + "index": 1, + "text": "Not converged" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "green", + "index": 0, + "text": "Converged" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "suffix:nodes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 9 + }, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": "${datasource}", + "expr": "clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"}) != 0) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"})) != 0))\n ),\n 1, 1\n)\n", + "format": "time_series", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Convergance state", + "type": "stat" + }, + { + "datasource": "${datasource}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "fillOpacity": 80, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineWidth": 0, + "spanNulls": true + }, + "mappings": [ + { + "options": { + "0": { + "color": "green", + "text": "Yes" + } + }, + "type": "value" + }, + { + "options": { + "1": { + "color": "red", + "text": "No" + } + }, + "type": "value" + } + ], + "max": 1, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 16, + "x": 8, + "y": 9 + }, + "id": 4, + "options": { + "alignValue": "left", + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "mergeValues": true, + "rowHeight": 0.9, + "showValue": "auto", + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "ceil(clamp((\n sum(stddev by (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) or\n (sum(abs(sum without (state) (cluster_node_peers{cluster=\"$cluster\", namespace=\"$namespace\"})) - scalar(count(cluster_node_info{cluster=\"$cluster\", namespace=\"$namespace\"}))))\n ),\n 0, 1\n))\n", + "instant": false, + "legendFormat": "Converged", + "range": true, + "refId": "A" + } + ], + "title": "Convergance state timeline", + "type": "state-timeline" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "alloy-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Mimir", + "value": "mimir_ds" + }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "Loki", + "value": "loki_ds" + }, + "hide": 0, + "includeAll": false, + "label": "Loki Data Source", + "multi": false, + "name": "loki_datasource", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "datasource" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components, cluster)\n", + "refId": "cluster" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", + "refId": "namespace" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d", + "90d" + ] + }, + "timezone": "", + "title": "Alloy / Cluster Overview", + "uid": "", + "version": 0, + "weekStart": "" + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/alloy-controller.json b/charts/meta-monitoring/src/dashboards/alloy-controller.json new file mode 100644 index 0000000..8191007 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/alloy-controller.json @@ -0,0 +1,970 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }, + { + "datasource": "$loki_datasource", + "enable": true, + "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"alloy\" | name_extracted=~\"alloy.*\"", + "iconColor": "rgba(0, 211, 255, 1)", + "instant": false, + "name": "Deployments", + "titleFormat": "{{cluster}}/{{namespace}}" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 28, + "links": [ + { + "icon": "doc", + "targetBlank": true, + "title": "Documentation", + "tooltip": "Component controller documentation", + "type": "link", + "url": "https://grafana.com/docs/alloy/latest/concepts/component_controller/" + }, + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "alloy-mixin" + ], + "targetBlank": false, + "title": "Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "${datasource}", + "description": "The number of Alloy instances whose metrics are being sent and reported.\n", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "instances" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 10, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": "${datasource}", + "expr": "count(alloy_component_controller_evaluating{cluster=\"$cluster\", namespace=\"$namespace\"})", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Running instances", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "Breakdown of components by health across all running instances.\n\n* Healthy: components have been evaluated completely and are reporting themselves as healthy.\n* Unhealthy: Components either could not be evaluated or are reporting themselves as unhealthy.\n* Unknown: A component has been created but has not yet been started.\n* Exited: A component has exited. It will not return to the running state.\n\nMore information on a component's health state can be retrieved using\nthe Alloy UI.\n\nNote that components may be in a degraded state even if they report\nthemselves as healthy. Use component-specific dashboards and alerts\nto observe detailed information about the behavior of a component.\n", + "fieldConfig": { + "defaults": { + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Unhealthy" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unknown" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Exited" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 14, + "x": 10, + "y": 0 + }, + "id": 4, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"healthy\"}) or vector(0)", + "instant": true, + "legendFormat": "Healthy", + "range": false, + "refId": "A" + }, + { + "datasource": "${datasource}", + "expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unhealthy\"}) or vector(0)", + "instant": true, + "legendFormat": "Unhealthy", + "range": false, + "refId": "B" + }, + { + "datasource": "${datasource}", + "expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"unknown\"}) or vector(0)", + "instant": true, + "legendFormat": "Unknown", + "range": false, + "refId": "C" + }, + { + "datasource": "${datasource}", + "expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\", health_type=\"exited\"}) or vector(0)", + "instant": true, + "legendFormat": "Exited", + "range": false, + "refId": "D" + } + ], + "title": "Components by health", + "type": "bargauge" + }, + { + "datasource": "${datasource}", + "description": "The number of running components across all running instances.\n", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "components" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 10, + "x": 0, + "y": 4 + }, + "id": 2, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Running components", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "The percentage of components which are in a healthy state.\n", + "fieldConfig": { + "defaults": { + "mappings": [], + "max": 1, + "min": 0, + "noValue": "No components", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 10, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": { + "valueSize": 80 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\",health_type=\"healthy\"}) /\nsum(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"})\n", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Overall component health", + "type": "stat" + }, + { + "datasource": "${datasource}", + "description": "The frequency at which components get updated.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum by (instance) (rate(alloy_component_evaluation_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Component evaluation rate", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "The percentiles for how long it takes to complete component evaluations.\n\nComponent evaluations must complete for components to have the latest\narguments. The longer the evaluations take, the slower it will be to\nreconcile the state of components.\n\nIf evaluation is taking too long, consider sharding your components to\ndeal with smaller amounts of data and reuse data as much as possible.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 12 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "histogram_quantile(0.99, sum(rate(alloy_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.99, sum by (le) (rate(alloy_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", + "instant": false, + "legendFormat": "99th percentile", + "range": true, + "refId": "A" + }, + { + "datasource": "${datasource}", + "expr": "histogram_quantile(0.50, sum(rate(alloy_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\nor\nhistogram_quantile(0.50, sum by (le) (rate(alloy_component_evaluation_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n", + "instant": false, + "legendFormat": "50th percentile", + "range": true, + "refId": "B" + }, + { + "datasource": "${datasource}", + "expr": "(\n histogram_sum(sum(rate(alloy_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))) /\n histogram_count(sum(rate(alloy_component_evaluation_seconds{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))\n)\nor\n(\n sum(rate(alloy_component_evaluation_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])) /\n sum(rate(alloy_component_evaluation_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n)\n", + "instant": false, + "legendFormat": "Average", + "range": true, + "refId": "C" + } + ], + "title": "Component evaluation time", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "The percentage of time spent evaluating 'slow' components - components that took longer than 1 minute to evaluate.\n\nIdeally, no component should take more than 1 minute to evaluate. The components displayed in this chart\nmay be a sign of a problem with the pipeline.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 12 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum by (component_path, component_id) (rate(alloy_component_evaluation_slow_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n/ scalar(sum(rate(alloy_component_evaluation_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))\n", + "instant": false, + "legendFormat": "{{component path}} {{component_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Slow components evaluation times", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Detailed histogram view of how long component evaluations take.\n\nThe goal is to design your config so that evaluations take as little\ntime as possible; under 100ms is a good goal.\n", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 22 + }, + "id": 8, + "maxDataPoints": 30, + "options": { + "calculate": false, + "cellGap": 0, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 0.1 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(increase(alloy_component_evaluation_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(alloy_component_evaluation_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", + "format": "heatmap", + "instant": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "Component evaluation histogram", + "type": "heatmap" + }, + { + "datasource": "${datasource}", + "description": "Detailed histogram of how long components wait to be evaluated after their dependency is updated.\n\nThe goal is to design your config so that most of the time components do not\nqueue for long; under 10ms is a good goal.\n", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 22 + }, + "id": 9, + "maxDataPoints": 30, + "options": { + "calculate": false, + "cellGap": 0, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 0.1 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(increase(alloy_component_dependencies_wait_seconds{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\nor ignoring (le)\nsum by (le) (increase(alloy_component_dependencies_wait_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n", + "format": "heatmap", + "instant": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "Component dependency wait histogram", + "type": "heatmap" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "alloy-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Mimir", + "value": "mimir_ds" + }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "Loki", + "value": "loki_ds" + }, + "hide": 0, + "includeAll": false, + "label": "Loki Data Source", + "multi": false, + "name": "loki_datasource", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "datasource" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components, cluster)\n", + "refId": "cluster" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", + "refId": "namespace" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d", + "90d" + ] + }, + "timezone": "", + "title": "Alloy / Controller", + "uid": "bf9f456aad7108b2c808dbd9973e386f", + "version": 0, + "weekStart": "" + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/alloy-opentelemetry.json b/charts/meta-monitoring/src/dashboards/alloy-opentelemetry.json new file mode 100644 index 0000000..c8e837f --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/alloy-opentelemetry.json @@ -0,0 +1,923 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 25, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "alloy-mixin" + ], + "targetBlank": false, + "title": "Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "title": "Receivers for traces [otelcol.receiver]", + "type": "row" + }, + { + "datasource": "${datasource}", + "description": "Number of spans successfully pushed into the pipeline.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(receiver_accepted_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{ pod }} / {{ transport }}", + "range": true, + "refId": "A" + } + ], + "title": "Accepted spans", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Number of spans that could not be pushed into the pipeline.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(receiver_refused_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{ pod }} / {{ transport }}", + "range": true, + "refId": "A" + } + ], + "title": "Refused spans", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "The duration of inbound RPCs.\n", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 4, + "maxDataPoints": 30, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 65 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "ms" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum by (le) (increase(rpc_server_duration_milliseconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", rpc_service=\"opentelemetry.proto.collector.trace.v1.TraceService\"}[$__rate_interval]))", + "format": "heatmap", + "instant": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "RPC server duration", + "type": "heatmap" + }, + { + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 5, + "title": "Batching of logs, metrics, and traces [otelcol.processor.batch]", + "type": "row" + }, + { + "datasource": "${datasource}", + "description": "Number of spans, metric datapoints, or log lines in a batch\n", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "id": 6, + "maxDataPoints": 30, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 65 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "short" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum by (le) (increase(processor_batch_batch_send_size_ratio_bucket{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "heatmap", + "instant": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "Number of units in the batch", + "type": "heatmap" + }, + { + "datasource": "${datasource}", + "description": "Number of distinct metadata value combinations being processed\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 12 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "processor_batch_metadata_cardinality_ratio{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}\n", + "instant": false, + "legendFormat": "{{ pod }}", + "range": true, + "refId": "A" + } + ], + "title": "Distinct metadata values", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Number of times the batch was sent due to a timeout trigger\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 12 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(processor_batch_timeout_trigger_send_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{ pod }}", + "range": true, + "refId": "A" + } + ], + "title": "Timeout trigger", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 9, + "title": "Exporters for traces [otelcol.exporter]", + "type": "row" + }, + { + "datasource": "${datasource}", + "description": "Number of spans successfully sent to destination.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 23 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(exporter_sent_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{ pod }}", + "range": true, + "refId": "A" + } + ], + "title": "Exported sent spans", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Number of spans in failed attempts to send to destination.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 23 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(exporter_send_failed_spans_ratio_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{ pod }}", + "range": true, + "refId": "A" + } + ], + "title": "Exported failed spans", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "alloy-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Mimir", + "value": "mimir_ds" + }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "Loki", + "value": "loki_ds" + }, + "hide": 0, + "includeAll": false, + "label": "Loki Data Source", + "multi": false, + "name": "loki_datasource", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "datasource" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components, cluster)\n", + "refId": "cluster" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", + "refId": "namespace" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": true, + "label": "instance", + "multi": true, + "name": "instance", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", + "refId": "instance" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d", + "90d" + ] + }, + "timezone": "", + "title": "Alloy / OpenTelemetry", + "uid": "9b6d37c8603e19e8922133984faad93d", + "version": 0, + "weekStart": "" + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/alloy-prometheus.json b/charts/meta-monitoring/src/dashboards/alloy-prometheus.json new file mode 100644 index 0000000..1f73748 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/alloy-prometheus.json @@ -0,0 +1,1337 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }, + { + "datasource": "$loki_datasource", + "enable": true, + "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"alloy\" | name_extracted=~\"alloy.*\"", + "iconColor": "rgba(0, 211, 255, 1)", + "instant": false, + "name": "Deployments", + "titleFormat": "{{cluster}}/{{namespace}}" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 29, + "links": [ + { + "icon": "doc", + "targetBlank": true, + "title": "Documentation", + "tooltip": "Component documentation", + "type": "link", + "url": "https://grafana.com/docs/alloy/latest/reference/components/prometheus.remote_write/" + }, + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "alloy-mixin" + ], + "targetBlank": false, + "title": "Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "title": "prometheus.scrape", + "type": "row" + }, + { + "datasource": "${datasource}", + "description": "Percentage of targets successfully scraped by prometheus.scrape\ncomponents.\n\nThis metric is calculated by dividing the number of targets\nsuccessfully scraped by the total number of targets scraped,\nacross all the namespaces in the selected cluster.\n\nLow success rates can indicate a problem with scrape targets,\nstale service discovery, or Alloy misconfiguration.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(up{cluster=\"$cluster\"})\n/\ncount (up{cluster=\"$cluster\"})\n", + "instant": false, + "legendFormat": "% of targets successfully scraped", + "range": true, + "refId": "A" + } + ], + "title": "Scrape success rate in $cluster", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Duration of successful scrapes by prometheus.scrape components,\nacross all the namespaces in the selected cluster.\n\nThis metric should be below your configured scrape interval.\nHigh durations can indicate a problem with a scrape target or\na performance issue with Alloy.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "quantile(0.99, scrape_duration_seconds{cluster=\"$cluster\"})\n", + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "A" + }, + { + "datasource": "${datasource}", + "expr": "quantile(0.95, scrape_duration_seconds{cluster=\"$cluster\"})\n", + "instant": false, + "legendFormat": "p95", + "range": true, + "refId": "B" + }, + { + "datasource": "${datasource}", + "expr": "quantile(0.50, scrape_duration_seconds{cluster=\"$cluster\"})\n", + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "C" + } + ], + "title": "Scrape duration in $cluster", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": "${datasource}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 4, + "title": "prometheus.remote_write", + "type": "row" + }, + { + "datasource": "${datasource}", + "description": "How far behind prometheus.remote_write from samples recently written\nto the WAL.\n\nEach endpoint prometheus.remote_write is configured to send metrics\nhas its own delay. The time shown here is the sum across all\nendpoints for the given component.\n\nIt is normal for the WAL delay to be within 1-3 scrape intervals. If\nthe WAL delay continues to increase beyond that amount, try\nincreasing the number of maximum shards.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 0, + "y": 12 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum by (instance, component_path, component_id) (\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\"}\n - ignoring(url, remote_name) group_right(instance)\n prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"}\n)\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_path}} {{component_id}}", + "range": true, + "refId": "A" + } + ], + "title": "WAL delay", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Rate of data containing samples and metadata sent by\nprometheus.remote_write.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 6, + "y": 12 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum without (remote_name, url) (\n rate(prometheus_remote_storage_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval]) +\n rate(prometheus_remote_storage_metadata_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_path}} {{component_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Data write throughput", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Latency of writes to the remote system made by\nprometheus.remote_write.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 12, + "y": 12 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "histogram_quantile(0.99, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", + "instant": false, + "legendFormat": "99th percentile", + "range": true, + "refId": "A" + }, + { + "datasource": "${datasource}", + "expr": "histogram_quantile(0.50, sum by (le) (\n rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n))\n", + "instant": false, + "legendFormat": "50th percentile", + "range": true, + "refId": "B" + }, + { + "datasource": "${datasource}", + "expr": "sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\"}[$__rate_interval])) /\nsum(rate(prometheus_remote_storage_sent_batch_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\"}[$__rate_interval]))\n", + "instant": false, + "legendFormat": "Average", + "range": true, + "refId": "C" + } + ], + "title": "Write latency", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Total number of shards which are concurrently sending samples read\nfrom the Write-Ahead Log.\n\nShards are bound to a minimum and maximum, displayed on the graph.\nThe lowest minimum and the highest maximum across all clients is\nshown.\n\nEach client has its own set of shards, minimum shards, and maximum\nshards; filter to a specific URL to display more granular\ninformation.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Minimum" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 15 + ], + "fill": "dash" + } + }, + { + "id": "custom.showPoints", + "value": "never" + }, + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Maximum" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 15 + ], + "fill": "dash" + } + }, + { + "id": "custom.showPoints", + "value": "never" + }, + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 18, + "y": 12 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum without (remote_name, url) (\n prometheus_remote_storage_shards{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"}\n)\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_path}} {{component_id}}", + "range": true, + "refId": "A" + }, + { + "datasource": "${datasource}", + "expr": "min (\n prometheus_remote_storage_shards_min{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"}\n)\n", + "instant": false, + "legendFormat": "Minimum", + "range": true, + "refId": "B" + }, + { + "datasource": "${datasource}", + "expr": "max (\n prometheus_remote_storage_shards_max{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"}\n)\n", + "instant": false, + "legendFormat": "Maximum", + "range": true, + "refId": "C" + } + ], + "title": "Shards", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Total outgoing samples sent by prometheus.remote_write.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 22 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum without (url, remote_name) (\n rate(prometheus_remote_storage_samples_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_path}} {{component_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Sent samples / second", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Rate of samples which prometheus.remote_write could not send due to\nnon-recoverable errors.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 22 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_failed_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_path}} {{component_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Failed samples / second", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Rate of samples which prometheus.remote_write attempted to resend\nafter receiving a recoverable error.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 22 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum without (url,remote_name) (\n rate(prometheus_remote_storage_samples_retried_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"}[$__rate_interval])\n)\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_path}} {{component_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Retried samples / second", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Total number of active series across all components.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 32 + }, + "id": 12, + "options": { + "legend": { + "displayMode": "list", + "showLegend": false + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum(prometheus_remote_write_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"})\n", + "instant": false, + "legendFormat": "Series", + "range": true, + "refId": "A" + } + ], + "title": "Active series (total)", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, with separate lines for each Alloy instance.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 32 + }, + "id": 13, + "targets": [ + { + "datasource": "${datasource}", + "expr": "prometheus_remote_write_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"}\n", + "instant": false, + "legendFormat": "{{instance}} / {{component_path}} {{component_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Active series (by instance/component)", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Total number of active series which are currently being tracked by\nprometheus.remote_write components, aggregated across all instances.\n\nAn \"active series\" is a series that prometheus.remote_write recently\nreceived a sample for. Active series are garbage collected whenever a\ntruncation of the WAL occurs.\n", + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 32 + }, + "id": 14, + "targets": [ + { + "datasource": "${datasource}", + "expr": "sum by (component_path, component_id) (prometheus_remote_write_wal_storage_active_series{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id!=\"\", component_path=~\"$component_path\", component_id=~\"$component\", url=~\"$url\"})\n", + "instant": false, + "legendFormat": "{{component_path}} {{component_id}}", + "range": true, + "refId": "A" + } + ], + "title": "Active series (by component)", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "alloy-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Mimir", + "value": "mimir_ds" + }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "Loki", + "value": "loki_ds" + }, + "hide": 0, + "includeAll": false, + "label": "Loki Data Source", + "multi": false, + "name": "loki_datasource", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "datasource" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components, cluster)\n", + "refId": "cluster" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", + "refId": "namespace" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": true, + "label": "instance", + "multi": true, + "name": "instance", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", + "refId": "instance" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": true, + "label": "component_path", + "multi": true, + "name": "component_path", + "options": [], + "query": { + "query": "label_values(prometheus_remote_write_wal_samples_appended_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"prometheus\\\\.remote_write\\\\..*\", component_path=~\".*\"}, component_path)\n", + "refId": "component_path" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": true, + "label": "component", + "multi": true, + "name": "component", + "options": [], + "query": { + "query": "label_values(prometheus_remote_write_wal_samples_appended_total{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"prometheus\\\\.remote_write\\\\..*\"}, component_id)\n", + "refId": "component" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": true, + "label": "url", + "multi": true, + "name": "url", + "options": [], + "query": { + "query": "label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=\"$cluster\", namespace=\"$namespace\", instance=~\"$instance\", component_id=~\"$component\"}, url)\n", + "refId": "url" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d", + "90d" + ] + }, + "timezone": "", + "title": "Alloy / Prometheus Components", + "uid": "e324cc55567d7f3a8e32860ff8e6d0d9", + "version": 0, + "weekStart": "" + } \ No newline at end of file diff --git a/charts/meta-monitoring/src/dashboards/alloy-resources.json b/charts/meta-monitoring/src/dashboards/alloy-resources.json new file mode 100644 index 0000000..0071006 --- /dev/null +++ b/charts/meta-monitoring/src/dashboards/alloy-resources.json @@ -0,0 +1,840 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }, + { + "datasource": "$loki_datasource", + "enable": true, + "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"alloy\" | name_extracted=~\"alloy.*\"", + "iconColor": "rgba(0, 211, 255, 1)", + "instant": false, + "name": "Deployments", + "titleFormat": "{{cluster}}/{{namespace}}" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 26, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "alloy-mixin" + ], + "targetBlank": false, + "title": "Dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "${datasource}", + "description": "CPU usage of the Alloy process relative to 1 CPU core.\n\nFor example, 100% means using one entire CPU core.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(alloy_resources_process_cpu_seconds_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])", + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU usage", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Resident memory size of the Alloy process.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "alloy_resources_process_resident_memory_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}", + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Memory (RSS)", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Rate at which the Alloy process performs garbage collections.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(go_gc_duration_seconds_count{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[5m])\nand on(instance)\nalloy_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Garbage collections", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Number of goroutines which are running in parallel. An infinitely\ngrowing number of these indicates a goroutine leak.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "go_goroutines{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nalloy_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Goroutines", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Heap memory currently in use by the Alloy process.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\nand on(instance)\nalloy_build_info{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}\n", + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Memory (heap inuse)", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Rate of data received across all network interfaces for the machine\nAlloy is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Alloy process.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(alloy_resources_machine_rx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Network receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "${datasource}", + "description": "Rate of data sent across all network interfaces for the machine\nAlloy is running on.\n\nData shown here is across all running processes and not exclusive to\nthe running Alloy process.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${datasource}", + "expr": "rate(alloy_resources_machine_tx_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\",instance=~\"$instance\"}[$__rate_interval])\n", + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Network send bandwidth", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "alloy-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Mimir", + "value": "mimir_ds" + }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "Loki", + "value": "loki_ds" + }, + "hide": 0, + "includeAll": false, + "label": "Loki Data Source", + "multi": false, + "name": "loki_datasource", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "datasource" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components, cluster)\n", + "refId": "cluster" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\"}, namespace)\n", + "refId": "namespace" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "uid": "${datasource}" + }, + "definition": "", + "hide": 0, + "includeAll": true, + "label": "instance", + "multi": true, + "name": "instance", + "options": [], + "query": { + "query": "label_values(alloy_component_controller_running_components{cluster=\"$cluster\", namespace=\"$namespace\"}, instance)\n", + "refId": "instance" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d", + "90d" + ] + }, + "timezone": "", + "title": "Alloy / Resources", + "uid": "d6a8574c31f3d7cb8f1345ec84d15a67", + "version": 0, + "weekStart": "" + } \ No newline at end of file diff --git a/charts/meta-monitoring/templates/grafana/agent-dashboards-1.yaml b/charts/meta-monitoring/templates/grafana/agent-dashboards-1.yaml deleted file mode 100644 index 6ab2f2b..0000000 --- a/charts/meta-monitoring/templates/grafana/agent-dashboards-1.yaml +++ /dev/null @@ -1,19 +0,0 @@ -{{- if and .Values.local.grafana.enabled .Values.dashboards.logs.enabled }} ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: agent-dashboards-1 - namespace: {{ $.Release.Namespace }} -data: - "agent-logs-pipeline.json": | - {{ $.Files.Get "src/dashboards/agent-logs-pipeline.json" | fromJson | toJson }} - "agent-operational.json": | - {{ $.Files.Get "src/dashboards/agent-operational.json" | fromJson | toJson }} - "agent-remote-write.json": | - {{ $.Files.Get "src/dashboards/agent-remote-write.json" | fromJson | toJson }} - "agent-tracing-pipeline.json": | - {{ $.Files.Get "src/dashboards/agent-tracing-pipeline.json" | fromJson | toJson }} - "agent.json": | - {{ $.Files.Get "src/dashboards/agent.json" | fromJson | toJson }} -{{- end }} \ No newline at end of file diff --git a/charts/meta-monitoring/templates/grafana/alloy-dashboards-1.yaml b/charts/meta-monitoring/templates/grafana/alloy-dashboards-1.yaml new file mode 100644 index 0000000..0795032 --- /dev/null +++ b/charts/meta-monitoring/templates/grafana/alloy-dashboards-1.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.local.grafana.enabled .Values.dashboards.logs.enabled }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: alloy-dashboards-1 + namespace: {{ $.Release.Namespace }} +data: + "alloy-cluster-node.json": | + {{ $.Files.Get "src/dashboards/alloy-cluster-node.json" | fromJson | toJson }} + "alloy-cluster-overview.json": | + {{ $.Files.Get "src/dashboards/alloy-cluster-overview.json" | fromJson | toJson }} + "alloy-controller.json": | + {{ $.Files.Get "src/dashboards/alloy-controller.json" | fromJson | toJson }} + "alloy-opentelemetry.json": | + {{ $.Files.Get "src/dashboards/alloy-opentelemetry.json" | fromJson | toJson }} + "alloy-prometheus.json": | + {{ $.Files.Get "src/dashboards/alloy-prometheus.json" | fromJson | toJson }} + "alloy-resources.json": | + {{ $.Files.Get "src/dashboards/alloy-resources.json" | fromJson | toJson }} +{{- end }} \ No newline at end of file diff --git a/charts/meta-monitoring/templates/grafana/dashboard.yaml b/charts/meta-monitoring/templates/grafana/dashboard.yaml index 36ce80d..6956561 100644 --- a/charts/meta-monitoring/templates/grafana/dashboard.yaml +++ b/charts/meta-monitoring/templates/grafana/dashboard.yaml @@ -30,10 +30,10 @@ data: {{- end }} - disableDeletion: true editable: false - folder: Agent - name: agent-1 + folder: Alloy + name: alloy-1 options: - path: /var/lib/grafana/dashboards/agent-1 + path: /var/lib/grafana/dashboards/alloy-1 orgId: 1 type: file {{- end }} \ No newline at end of file diff --git a/charts/meta-monitoring/templates/grafana/grafana-deployment.yaml b/charts/meta-monitoring/templates/grafana/grafana-deployment.yaml index d7d795b..b8fcd7d 100644 --- a/charts/meta-monitoring/templates/grafana/grafana-deployment.yaml +++ b/charts/meta-monitoring/templates/grafana/grafana-deployment.yaml @@ -63,8 +63,8 @@ spec: - mountPath: /var/lib/grafana/dashboards/loki-2 name: loki-dashboards-2 {{- end }} - - mountPath: /var/lib/grafana/dashboards/agent-1 - name: agent-dashboards-1 + - mountPath: /var/lib/grafana/dashboards/alloy-1 + name: alloy-dashboards-1 volumes: - name: grafana-pv persistentVolumeClaim: @@ -83,7 +83,7 @@ spec: configMap: name: loki-dashboards-2 {{- end }} - - name: agent-dashboards-1 + - name: alloy-dashboards-1 configMap: - name: agent-dashboards-1 + name: alloy-dashboards-1 {{- end }} diff --git a/charts/meta-monitoring/templates/grafana/loki-dashboards-1.yaml b/charts/meta-monitoring/templates/grafana/loki-dashboards-1.yaml index a23e439..2021341 100644 --- a/charts/meta-monitoring/templates/grafana/loki-dashboards-1.yaml +++ b/charts/meta-monitoring/templates/grafana/loki-dashboards-1.yaml @@ -12,8 +12,6 @@ data: {{ $.Files.Get "src/dashboards/loki-deletion.json" | fromJson | toJson }} "loki-logs.json": | {{ $.Files.Get "src/dashboards/loki-logs.json" | fromJson | toJson }} - "loki-mixin-recording-rules.json": | - {{ $.Files.Get "src/dashboards/loki-mixin-recording-rules.json" | fromJson | toJson }} "loki-operational.json": | {{ $.Files.Get "src/dashboards/loki-operational.json" | fromJson | toJson }} {{- end }} \ No newline at end of file diff --git a/charts/meta-monitoring/values.yaml b/charts/meta-monitoring/values.yaml index f100220..ca5f6e5 100644 --- a/charts/meta-monitoring/values.yaml +++ b/charts/meta-monitoring/values.yaml @@ -75,12 +75,46 @@ logs: metrics: # The list of metrics to retain for logging dashboards retain: - - agent_build_info - - agent_config_last_load_success_timestamp_seconds - - agent_config_last_load_successful - - agent_config_load_failures_total - - agent_wal_samples_appended_total - - agent_wal_storage_active_series + - alloy_build_info + - alloy_config_last_load_success_timestamp_seconds + - alloy_config_last_load_successful + - alloy_config_load_failures_total + - alloy_component_controller_evaluating + - alloy_component_dependencies_wait_seconds + - alloy_component_dependencies_wait_seconds_bucket + - alloy_component_evaluation_seconds + - alloy_component_evaluation_seconds_bucket + - alloy_component_evaluation_seconds_count + - alloy_component_evaluation_seconds_sum + - alloy_component_evaluation_slow_seconds + - alloy_component_controller_running_components + - alloy_resources_machine_rx_bytes_total + - alloy_resources_machine_tx_bytes_total + - alloy_resources_process_cpu_seconds_total + - alloy_resources_process_resident_memory_bytes + - prometheus_remote_write_wal_samples_appended_total + - prometheus_remote_write_wal_storage_active_series + - cluster_node_info + - cluster_node_lamport_time + - cluster_node_update_observers + - cluster_node_gossip_health_score + - cluster_node_gossip_proto_version + - cluster_node_gossip_received_events_total + - cluster_node_peers + - cluster_transport_rx_bytes_total + - cluster_transport_rx_packets_total + - cluster_transport_rx_packets_failed_total + - cluster_transport_stream_rx_bytes_total + - cluster_transport_stream_rx_packets_failed_total + - cluster_transport_stream_rx_packets_total + - cluster_transport_stream_tx_bytes_total + - cluster_transport_stream_tx_packets_total + - cluster_transport_stream_tx_packets_failed_total + - cluster_transport_streams + - cluster_transport_tx_packets_total + - cluster_transport_tx_packets_failed_total + - cluster_transport_rx_packet_queue_length + - cluster_transport_tx_packet_queue_length - container_cpu_usage_seconds_total - container_fs_writes_bytes_total - container_memory_working_set_bytes @@ -96,6 +130,8 @@ metrics: - cortex_prometheus_rule_group_last_duration_seconds - cortex_prometheus_rule_group_last_evaluation_timestamp_seconds - cortex_prometheus_rule_group_iterations_missed_total + - exporter_send_failed_spans_ratio_total + - exporter_sent_spans_ratio_total - go_gc_duration_seconds - go_gc_duration_seconds_count - go_goroutines @@ -175,8 +211,13 @@ metrics: - node_disk_read_bytes_total - node_disk_written_bytes_total - process_start_time_seconds + - processor_batch_batch_send_size_ratio_bucket + - processor_batch_metadata_cardinality_ratio + - processor_batch_timeout_trigger_send_ratio_total + - prometheus_remote_storage_bytes_total - prometheus_remote_storage_enqueue_retries_total - prometheus_remote_storage_highest_timestamp_in_seconds + - prometheus_remote_storage_metadata_bytes_total - prometheus_remote_storage_queue_highest_sent_timestamp_seconds - prometheus_remote_storage_samples_dropped_total - prometheus_remote_storage_samples_failed_total @@ -192,6 +233,8 @@ metrics: - prometheus_remote_storage_shards_max - prometheus_remote_storage_shards_min - prometheus_remote_storage_succeeded_samples_total + - prometheus_remote_write_wal_samples_appended_total + - prometheus_remote_write_wal_storage_active_series - prometheus_sd_discovered_targets - prometheus_target_interval_length_seconds_count - prometheus_target_interval_length_seconds_sum @@ -208,12 +251,17 @@ metrics: - promtail_read_lines_total - promtail_request_duration_seconds_bucket - promtail_sent_entries_total + - rpc_server_duration_milliseconds_bucket + - receiver_accepted_spans_ratio_total + - receiver_refused_spans_ratio_total + - scrape_duration_seconds - traces_exporter_sent_spans - traces_exporter_send_failed_spans - traces_loadbalancer_backend_outcome - traces_loadbalancer_num_backends - traces_receiver_accepted_spans - traces_receiver_refused_spans + - up # Additional metrics to retain extraMetrics: [] # Set enabled = true to add the default logs dashboards to the local Grafana