Skip to content

Commit

Permalink
Discovery improvements (#1120)
Browse files Browse the repository at this point in the history
* Use new alloy modules to enable the right change to get openshift working

Signed-off-by: Pete Wall <[email protected]>

* Further improvements to discovery and openshift

Signed-off-by: Pete Wall <[email protected]>

* Update charts/k8s-monitoring/charts/feature-cluster-metrics/README.md

Co-authored-by: Stephen Lang <[email protected]>

* Also fix values.yaml

Signed-off-by: Pete Wall <[email protected]>

* More readme updates

Signed-off-by: Pete Wall <[email protected]>

---------

Signed-off-by: Pete Wall <[email protected]>
Co-authored-by: Stephen Lang <[email protected]>
  • Loading branch information
petewall and skl authored Jan 16, 2025
1 parent a3990d2 commit 242434d
Show file tree
Hide file tree
Showing 66 changed files with 1,730 additions and 538 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ declare "scrape" {
optional = true
}

argument "scheme" {
comment = "The scheme to use when scraping metrics (default: http)"
optional = true
}

argument "bearer_token_file" {
comment = "The bearer token file (default: none)"
optional = true
}

argument "scrape_interval" {
comment = "How often to scrape metrics from the targets (default: 60s)"
optional = true
Expand Down Expand Up @@ -125,6 +135,12 @@ declare "scrape" {
scrape_interval = coalesce(argument.scrape_interval.value, "60s")
scrape_timeout = coalesce(argument.scrape_timeout.value, "10s")

scheme = coalesce(argument.scheme.value, "http")
bearer_token_file = coalesce(argument.bearer_token_file.value, "")
tls_config {
insecure_skip_verify = true
}

clustering {
enabled = coalesce(argument.clustering.value, false)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,16 @@ declare "scrape" {
optional = true
}

argument "scheme" {
comment = "The scheme to use when scraping metrics (default: http)"
optional = true
}

argument "bearer_token_file" {
comment = "The bearer token file (default: none)"
optional = true
}

argument "scrape_interval" {
comment = "How often to scrape metrics from the targets (default: 60s)"
optional = true
Expand Down Expand Up @@ -217,6 +227,12 @@ declare "scrape" {
scrape_interval = coalesce(argument.scrape_interval.value, "60s")
scrape_timeout = coalesce(argument.scrape_timeout.value, "10s")

scheme = coalesce(argument.scheme.value, "http")
bearer_token_file = coalesce(argument.bearer_token_file.value, "")
tls_config {
insecure_skip_verify = true
}

clustering {
enabled = coalesce(argument.clustering.value, false)
}
Expand Down
16 changes: 10 additions & 6 deletions charts/k8s-monitoring/charts/feature-cluster-metrics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ Be sure perform actual integration testing in a live environment in the main [k8

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| kube-state-metrics.bearerTokenFile | string | `""` | The bearer token file to use when scraping metrics from kube-state-metrics. |
| kube-state-metrics.deploy | bool | `true` | Deploy kube-state-metrics. Set to false if your cluster already has kube-state-metrics deployed. |
| kube-state-metrics.enabled | bool | `true` | Scrape metrics from kube-state-metrics. |
| kube-state-metrics.extraDiscoveryRules | string | `""` | Rule blocks to be added to the discovery.relabel component for kube-state-metrics. These relabeling rules are applied pre-scrape against the targets from service discovery. Before the scrape, any remaining target labels that start with __ (i.e. __meta_kubernetes*) are dropped. ([docs](https://grafana.com/docs/alloy/latest/reference/components/discovery/discovery.relabel/#rule-block)) |
Expand All @@ -197,7 +198,10 @@ Be sure perform actual integration testing in a live environment in the main [k8
| kube-state-metrics.metricsTuning.excludeMetrics | list | `[]` | Metrics to drop. Can use regular expressions. |
| kube-state-metrics.metricsTuning.includeMetrics | list | `[]` | Metrics to keep. Can use regular expressions. |
| kube-state-metrics.metricsTuning.useDefaultAllowList | bool | `true` | Filter the list of metrics from Kube State Metrics to a useful, minimal set. |
| kube-state-metrics.namespace | string | `""` | Namespace to locate kube-state-metrics pods. If `deploy` is set to `true`, this will automatically be set to the namespace where this Helm chart is deployed. |
| kube-state-metrics.scrapeInterval | string | `60s` | How frequently to scrape kube-state-metrics metrics. |
| kube-state-metrics.service.portName | string | `"http"` | The port name used by kube-state-metrics. |
| kube-state-metrics.service.scheme | string | `"http"` | The scrape scheme used by kube-state-metrics. |

### Kube Controller Manager

Expand Down Expand Up @@ -273,16 +277,12 @@ Be sure perform actual integration testing in a live environment in the main [k8
| kubeletResource.metricsTuning.useDefaultAllowList | bool | `true` | Filter the list of resources metrics from the Kubelet to the minimal set required for Kubernetes Monitoring. |
| kubeletResource.scrapeInterval | string | `60s` | How frequently to scrape Kubelet Resource metrics. |

### Node Exporter - Deployment settings

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| node-exporter.deploy | bool | `true` | Deploy Node Exporter. Set to false if your cluster already has Node Exporter deployed. |

### Node Exporter

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| node-exporter.bearerTokenFile | string | `""` | The bearer token file to use when scraping metrics from Node Exporter. |
| node-exporter.deploy | bool | `true` | Deploy Node Exporter. Set to false if your cluster already has Node Exporter deployed. |
| node-exporter.enabled | bool | `true` | Scrape metrics from Node Exporter. |
| node-exporter.extraDiscoveryRules | string | `""` | Rule blocks to be added to the discovery.relabel component for Node Exporter. These relabeling rules are applied pre-scrape against the targets from service discovery. Before the scrape, any remaining target labels that start with __ (i.e. __meta_kubernetes*) are dropped. ([docs](https://grafana.com/docs/alloy/latest/reference/components/discovery/discovery.relabel/#rule-block)) |
| node-exporter.extraMetricProcessingRules | string | `""` | Rule blocks to be added to the prometheus.relabel component for Node Exporter metrics. These relabeling rules are applied post-scrape against the metrics returned from the scraped target, no `__meta*` labels are present. ([docs](https://grafana.com/docs/alloy/latest/reference/components/prometheus/prometheus.relabel/#rule-block)) |
Expand All @@ -293,7 +293,10 @@ Be sure perform actual integration testing in a live environment in the main [k8
| node-exporter.metricsTuning.includeMetrics | list | `[]` | Metrics to keep. Can use regular expressions. |
| node-exporter.metricsTuning.useDefaultAllowList | bool | `true` | Filter the list of metrics from Node Exporter to the minimal set required for Kubernetes Monitoring. |
| node-exporter.metricsTuning.useIntegrationAllowList | bool | `false` | Filter the list of metrics from Node Exporter to the minimal set required for Kubernetes Monitoring as well as the Node Exporter integration. |
| node-exporter.namespace | string | `""` | Namespace to locate Node Exporter pods. If `deploy` is set to `true`, this will automatically be set to the namespace where this Helm chart is deployed. |
| node-exporter.scrapeInterval | string | `60s` | How frequently to scrape Node Exporter metrics. |
| node-exporter.service.portName | string | `"metrics"` | The port name used by Node Exporter. |
| node-exporter.service.scheme | string | `"http"` | The scrape scheme used by Node Exporter. |

### OpenCost

Expand Down Expand Up @@ -332,5 +335,6 @@ Be sure perform actual integration testing in a live environment in the main [k8
| windows-exporter.metricsTuning.excludeMetrics | list | `[]` | Metrics to drop. Can use regular expressions. |
| windows-exporter.metricsTuning.includeMetrics | list | `[]` | Metrics to keep. Can use regular expressions. |
| windows-exporter.metricsTuning.useDefaultAllowList | bool | `true` | Filter the list of metrics from Windows Exporter to the minimal set required for Kubernetes Monitoring. |
| windows-exporter.namespace | string | `""` | Namespace to locate Windows Exporter pods. If `deploy` is set to `true`, this will automatically be set to the namespace where this Helm chart is deployed. |
| windows-exporter.scrapeInterval | string | `60s` | How frequently to scrape metrics from Windows Exporter. |
<!-- markdownlint-enable no-space-in-emphasis -->
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@
{{- include "alloyModules.load" (deepCopy $ | merge (dict "name" "kube_state_metrics" "path" "modules/kubernetes/kube-state-metrics/metrics.alloy")) | nindent 0 }}

kube_state_metrics.kubernetes "targets" {
{{- if (index .Values "kube-state-metrics").deploy }}
namespaces = [{{ .Release.Namespace | quote }}]
{{- else if (index .Values "kube-state-metrics").namespace }}
namespaces = [{{ (index .Values "kube-state-metrics").namespace | quote }}]
{{- end }}
port_name = {{ (index .Values "kube-state-metrics").service.portName | quote }}
label_selectors = [
{{- range $label, $value := (index .Values "kube-state-metrics").labelMatchers }}
{{ printf "%s=%s" $label $value | quote }},
Expand Down Expand Up @@ -43,6 +49,10 @@ kube_state_metrics.scrape "metrics" {
{{- end }}
{{- if $metricDenyList }}
drop_metrics = {{ $metricDenyList | join "|" | quote }}
{{- end }}
scheme = {{ (index .Values "kube-state-metrics").service.scheme | quote }}
{{- if (index .Values "kube-state-metrics").bearerTokenFile }}
bearer_token_file = {{ (index .Values "kube-state-metrics").bearerTokenFile | quote }}
{{- end }}
scrape_interval = {{ (index .Values "kube-state-metrics").scrapeInterval | default .Values.global.scrapeInterval | quote }}
max_cache_size = {{ (index .Values "kube-state-metrics").maxCacheSize | default .Values.global.maxCacheSize | int }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
{{- include "alloyModules.load" (deepCopy $ | merge (dict "name" "node_exporter" "path" "modules/system/node-exporter/metrics.alloy")) | nindent 0 }}

node_exporter.kubernetes "targets" {
{{- if (index .Values "node-exporter").deploy }}
namespaces = [{{ .Release.Namespace | quote }}]
{{- else if (index .Values "node-exporter").namespace }}
namespaces = [{{ (index .Values "node-exporter").namespace | quote }}]
{{- end }}
port_name = {{ (index .Values "node-exporter").service.portName | quote }}
label_selectors = [
{{- range $label, $value := (index .Values "node-exporter").labelMatchers }}
{{ printf "%s=%s" $label $value | quote }},
Expand Down Expand Up @@ -50,6 +56,10 @@ node_exporter.scrape "metrics" {
{{- end }}
{{- if $metricDenyList }}
drop_metrics = {{ $metricDenyList | join "|" | quote }}
{{- end }}
scheme = {{ (index .Values "node-exporter").service.scheme | quote }}
{{- if (index .Values "node-exporter").bearerTokenFile }}
bearer_token_file = {{ (index .Values "node-exporter").bearerTokenFile | quote }}
{{- end }}
scrape_interval = {{ (index .Values "node-exporter").scrapeInterval | default .Values.global.scrapeInterval | quote }}
max_cache_size = {{ (index .Values "node-exporter").maxCacheSize | default .Values.global.maxCacheSize | int }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ discovery.kubernetes "windows_exporter_pods" {
namespaces {
names = [{{ .Release.Namespace | quote }}]
}
{{- else if (index .Values "windows-exporter").namespace }}
namespaces {
names = [{{ (index .Values "windows-exporter").namespace | quote }}]
}
{{- end }}
selectors {
role = "pod"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,8 @@ tests:
}
kube_state_metrics.kubernetes "targets" {
namespaces = ["NAMESPACE"]
port_name = "http"
label_selectors = [
"app.kubernetes.io/name=kube-state-metrics",
"release=RELEASE-NAME",
Expand All @@ -257,6 +259,7 @@ tests:
targets = kube_state_metrics.kubernetes.targets.output
clustering = true
keep_metrics = "up|scrape_samples_scraped|kube_configmap_info|kube_configmap_metadata_resource_version|kube_daemonset.*|kube_deployment_metadata_generation|kube_deployment_spec_replicas|kube_deployment_status_condition|kube_deployment_status_observed_generation|kube_deployment_status_replicas_available|kube_deployment_status_replicas_updated|kube_horizontalpodautoscaler_spec_max_replicas|kube_horizontalpodautoscaler_spec_min_replicas|kube_horizontalpodautoscaler_status_current_replicas|kube_horizontalpodautoscaler_status_desired_replicas|kube_job.*|kube_namespace_status_phase|kube_node.*|kube_persistentvolume_status_phase|kube_persistentvolumeclaim_access_mode|kube_persistentvolumeclaim_info|kube_persistentvolumeclaim_labels|kube_persistentvolumeclaim_resource_requests_storage_bytes|kube_persistentvolumeclaim_status_phase|kube_pod_container_info|kube_pod_container_resource_limits|kube_pod_container_resource_requests|kube_pod_container_status_last_terminated_reason|kube_pod_container_status_restarts_total|kube_pod_container_status_waiting_reason|kube_pod_info|kube_pod_owner|kube_pod_spec_volumes_persistentvolumeclaims_info|kube_pod_start_time|kube_pod_status_phase|kube_pod_status_reason|kube_replicaset.*|kube_resourcequota|kube_secret_metadata_resource_version|kube_statefulset.*"
scheme = "http"
scrape_interval = "60s"
max_cache_size = 100000
forward_to = argument.metrics_destinations.value
Expand All @@ -270,6 +273,8 @@ tests:
}
node_exporter.kubernetes "targets" {
namespaces = ["NAMESPACE"]
port_name = "metrics"
label_selectors = [
"app.kubernetes.io/name=node-exporter",
"release=RELEASE-NAME",
Expand All @@ -290,6 +295,7 @@ tests:
job_label = "integrations/node_exporter"
clustering = true
keep_metrics = "up|scrape_samples_scraped|node_cpu.*|node_exporter_build_info|node_filesystem.*|node_memory.*|node_network_receive_bytes_total|node_network_receive_drop_total|node_network_transmit_bytes_total|node_network_transmit_drop_total|process_cpu_seconds_total|process_resident_memory_bytes"
scheme = "http"
scrape_interval = "60s"
max_cache_size = 100000
forward_to = argument.metrics_destinations.value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ tests:
}
kube_state_metrics.kubernetes "targets" {
namespaces = ["NAMESPACE"]
port_name = "http"
label_selectors = [
"app.kubernetes.io/name=kube-state-metrics",
"release=RELEASE-NAME",
Expand All @@ -167,6 +169,7 @@ tests:
targets = discovery.relabel.kube_state_metrics.output
clustering = true
keep_metrics = "up|scrape_samples_scraped|kube_configmap_info|kube_configmap_metadata_resource_version|kube_daemonset.*|kube_deployment_metadata_generation|kube_deployment_spec_replicas|kube_deployment_status_condition|kube_deployment_status_observed_generation|kube_deployment_status_replicas_available|kube_deployment_status_replicas_updated|kube_horizontalpodautoscaler_spec_max_replicas|kube_horizontalpodautoscaler_spec_min_replicas|kube_horizontalpodautoscaler_status_current_replicas|kube_horizontalpodautoscaler_status_desired_replicas|kube_job.*|kube_namespace_status_phase|kube_node.*|kube_persistentvolume_status_phase|kube_persistentvolumeclaim_access_mode|kube_persistentvolumeclaim_info|kube_persistentvolumeclaim_labels|kube_persistentvolumeclaim_resource_requests_storage_bytes|kube_persistentvolumeclaim_status_phase|kube_pod_container_info|kube_pod_container_resource_limits|kube_pod_container_resource_requests|kube_pod_container_status_last_terminated_reason|kube_pod_container_status_restarts_total|kube_pod_container_status_waiting_reason|kube_pod_info|kube_pod_owner|kube_pod_spec_volumes_persistentvolumeclaims_info|kube_pod_start_time|kube_pod_status_phase|kube_pod_status_reason|kube_replicaset.*|kube_resourcequota|kube_secret_metadata_resource_version|kube_statefulset.*"
scheme = "http"
scrape_interval = "60s"
max_cache_size = 100000
forward_to = [prometheus.relabel.kube_state_metrics.receiver]
Expand All @@ -191,6 +194,8 @@ tests:
}
node_exporter.kubernetes "targets" {
namespaces = ["NAMESPACE"]
port_name = "metrics"
label_selectors = [
"app.kubernetes.io/name=node-exporter",
"release=RELEASE-NAME",
Expand All @@ -216,6 +221,7 @@ tests:
job_label = "integrations/node_exporter"
clustering = true
keep_metrics = "up|scrape_samples_scraped|node_cpu.*|node_exporter_build_info|node_filesystem.*|node_memory.*|node_network_receive_bytes_total|node_network_receive_drop_total|node_network_transmit_bytes_total|node_network_transmit_drop_total|process_cpu_seconds_total|process_resident_memory_bytes"
scheme = "http"
scrape_interval = "60s"
max_cache_size = 100000
forward_to = argument.metrics_destinations.value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ tests:
}
kube_state_metrics.kubernetes "targets" {
namespaces = ["NAMESPACE"]
port_name = "http"
label_selectors = [
"app.kubernetes.io/name=kube-state-metrics",
"release=RELEASE-NAME",
Expand All @@ -140,6 +142,7 @@ tests:
targets = kube_state_metrics.kubernetes.targets.output
clustering = true
keep_metrics = "up|scrape_samples_scraped|kube_configmap_info|kube_configmap_metadata_resource_version|kube_daemonset.*|kube_deployment_metadata_generation|kube_deployment_spec_replicas|kube_deployment_status_condition|kube_deployment_status_observed_generation|kube_deployment_status_replicas_available|kube_deployment_status_replicas_updated|kube_horizontalpodautoscaler_spec_max_replicas|kube_horizontalpodautoscaler_spec_min_replicas|kube_horizontalpodautoscaler_status_current_replicas|kube_horizontalpodautoscaler_status_desired_replicas|kube_job.*|kube_namespace_status_phase|kube_node.*|kube_persistentvolume_status_phase|kube_persistentvolumeclaim_access_mode|kube_persistentvolumeclaim_info|kube_persistentvolumeclaim_labels|kube_persistentvolumeclaim_resource_requests_storage_bytes|kube_persistentvolumeclaim_status_phase|kube_pod_container_info|kube_pod_container_resource_limits|kube_pod_container_resource_requests|kube_pod_container_status_last_terminated_reason|kube_pod_container_status_restarts_total|kube_pod_container_status_waiting_reason|kube_pod_info|kube_pod_owner|kube_pod_spec_volumes_persistentvolumeclaims_info|kube_pod_start_time|kube_pod_status_phase|kube_pod_status_reason|kube_replicaset.*|kube_resourcequota|kube_secret_metadata_resource_version|kube_statefulset.*"
scheme = "http"
scrape_interval = "60s"
max_cache_size = 100000
forward_to = argument.metrics_destinations.value
Expand All @@ -153,6 +156,8 @@ tests:
}
node_exporter.kubernetes "targets" {
namespaces = ["NAMESPACE"]
port_name = "metrics"
label_selectors = [
"app.kubernetes.io/name=node-exporter",
"release=RELEASE-NAME",
Expand All @@ -173,6 +178,7 @@ tests:
job_label = "integrations/node_exporter"
clustering = true
keep_metrics = "up|scrape_samples_scraped|node_cpu.*|node_exporter_build_info|node_filesystem.*|node_memory.*|node_network_receive_bytes_total|node_network_receive_drop_total|node_network_transmit_bytes_total|node_network_transmit_drop_total|process_cpu_seconds_total|process_resident_memory_bytes"
scheme = "http"
scrape_interval = "60s"
max_cache_size = 100000
forward_to = argument.metrics_destinations.value
Expand Down
Loading

0 comments on commit 242434d

Please sign in to comment.