Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: stop scraping unused metrics #3184

Open
wants to merge 3 commits into
base: drosiek-cleanup-prometheus-rules
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 15 additions & 60 deletions deploy/helm/sumologic/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1668,11 +1668,9 @@ kube-prometheus-stack:
## see docs/scraped_metrics.md
## apiserver_request_count
## apiserver_request_total
## apiserver_request_duration_seconds_count
## apiserver_request_duration_seconds_sum
metricRelabelings:
- action: keep
regex: (?:apiserver_request_(?:count|total)|apiserver_request_(?:duration_seconds)_(?:count|sum))
regex: (?:apiserver_request_(?:count|total))
sourceLabels: [__name__]
kubelet:
serviceMonitor:
Expand All @@ -1682,30 +1680,11 @@ kube-prometheus-stack:
probes: false
## Enable scraping /metrics/resource/v1alpha1 from kubelet's service
resource: false
## see docs/scraped_metrics.md
## kubelet metrics:
## kubelet_docker_operations_errors
## kubelet_docker_operations_errors_total
## kubelet_docker_operations_duration_seconds_count
## kubelet_docker_operations_duration_seconds_sum
## kubelet_runtime_operations_duration_seconds_count
## kubelet_runtime_operations_duration_seconds_sum
## kubelet_running_container_count
## kubelet_running_containers
## kubelet_running_pod_count
## kubelet_running_pods
## kubelet_docker_operations_latency_microseconds
## kubelet_docker_operations_latency_microseconds_count
## kubelet_docker_operations_latency_microseconds_sum
## kubelet_runtime_operations_latency_microseconds
## kubelet_runtime_operations_latency_microseconds_count
## kubelet_runtime_operations_latency_microseconds_sum
## Drop all metrics
metricRelabelings:
- action: keep
regex: (?:kubelet_docker_operations_errors(?:|_total)|kubelet_(?:docker|runtime)_operations_duration_seconds_(?:count|sum)|kubelet_running_(?:container|pod)(?:_count|s)|kubelet_(:?docker|runtime)_operations_latency_microseconds(?:|_count|_sum))
regex: "^$"
sourceLabels: [__name__]
- action: labeldrop
regex: id
## see docs/scraped_metrics.md
## cadvisor container metrics
## container_cpu_usage_seconds_total
Expand All @@ -1719,15 +1698,15 @@ kube-prometheus-stack:
## container_network_transmit_bytes_total
cAdvisorMetricRelabelings:
- action: keep
regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_fs_usage_bytes|container_fs_limit_bytes|container_cpu_cfs_throttled_seconds_total|container_network_receive_bytes_total|container_network_transmit_bytes_total)
regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_cpu_cfs_throttled_seconds_total|container_network_receive_bytes_total|container_network_transmit_bytes_total)
sourceLabels: [__name__]
## Drop container metrics with container tag set to an empty string:
## these are the pod aggregated container metrics which can be aggregated
## in Sumo anyway. There's also some cgroup-specific time series we also
## do not need.
- action: drop
sourceLabels: [__name__, container]
regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_fs_usage_bytes|container_fs_limit_bytes);$
regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes);$
- action: labelmap
regex: container_name
replacement: container
Expand All @@ -1738,19 +1717,7 @@ kube-prometheus-stack:
regex: (id|name)
kubeControllerManager:
serviceMonitor:
## Scrape interval. If not set, the Prometheus default scrape interval is used.
interval:
## see docs/scraped_metrics.md
## controller manager metrics
## https://kubernetes.io/docs/concepts/cluster-administration/monitoring/#kube-controller-manager-metrics
## e.g.
## cloudprovider_aws_api_request_duration_seconds_bucket
## cloudprovider_aws_api_request_duration_seconds_count
## cloudprovider_aws_api_request_duration_seconds_sum
metricRelabelings:
- action: keep
regex: (?:cloudprovider_.*_api_request_duration_seconds.*)
sourceLabels: [__name__]
enabled: false
coreDns:
serviceMonitor:
## Scrape interval. If not set, the Prometheus default scrape interval is used.
Expand Down Expand Up @@ -1813,33 +1780,23 @@ kube-prometheus-stack:
## process_resident_memory_bytes
metricRelabelings:
- action: keep
regex: (?:etcd_request_cache_(?:add|get)_(?:duration_seconds|latencies_summary)_(?:count|sum)|etcd_helper_cache_(?:hit|miss)_(?:count|total)|etcd_mvcc_db_total_size_in_bytes|etcd_debugging_(store_(expires_total|watchers))|etcd_disk_(backend_commit|wal_fsync)_duration_seconds_.*|etcd_grpc_proxy_cache_(hits|misses)_total|etcd_network_client_grpc_(received|sent)_bytes_total|etcd_server_(has_leader|leader_changes_seen_total)|etcd_server_proposals_(pending|(applied|committed|failed)_total)|process_(cpu_seconds_total|open_fds|resident_memory_bytes))
regex: (?:etcd_debugging_(store_(expires_total|watchers))|etcd_disk_(backend_commit|wal_fsync)_duration_seconds_.*|etcd_grpc_proxy_cache_(hits|misses)_total|etcd_network_client_grpc_(received|sent)_bytes_total|etcd_server_(has_leader|leader_changes_seen_total)|etcd_server_proposals_(pending|(applied|committed|failed)_total)|process_(cpu_seconds_total|open_fds|resident_memory_bytes))
sourceLabels: [__name__]
kubeScheduler:
serviceMonitor:
## Scrape interval. If not set, the Prometheus default scrape interval is used.
interval:
## see docs/scraped_metrics.md
##
## scheduler_e2e_* is present for K8s <1.23
## scheduler_e2e_scheduling_duration_seconds_bucket
## scheduler_e2e_scheduling_duration_seconds_count
## scheduler_e2e_scheduling_duration_seconds_sum
##
## scheduler_scheduling_attempt_duration_seconds is present for K8s >=1.23
## scheduler_scheduling_attempt_duration_seconds_bucket
## scheduler_scheduling_attempt_duration_seconds_count
## scheduler_scheduling_attempt_duration_seconds_sum
##
## scheduler_framework_extension_point_duration_seconds_bucket
## scheduler_framework_extension_point_duration_seconds_count
## scheduler_framework_extension_point_duration_seconds_sum
## scheduler_scheduling_algorithm_duration_seconds_bucket
## scheduler_scheduling_algorithm_duration_seconds_count
## scheduler_scheduling_attempt_duration_seconds_sum
## scheduler_scheduling_algorithm_duration_seconds_sum
metricRelabelings:
- action: keep
regex: (?:scheduler_(?:e2e_scheduling|scheduling_attempt|framework_extension_point|scheduling_algorithm)_duration_seconds.*)
regex: (?:scheduler_(?:e2e_scheduling|scheduling_attempt|scheduling_algorithm)_duration_seconds_sum)
sourceLabels: [__name__]

alertmanager:
Expand All @@ -1865,6 +1822,11 @@ kube-prometheus-stack:
enabled: false
tls:
enabled: false
serviceMonitor:
metricRelabelings:
- action: keep
regex: "^$"
sourceLabels: [__name__]
## Resource limits for kube-state-metrics
kube-state-metrics:
## Put here the new name if you want to override the full name used for Kube State Metrics components.
Expand Down Expand Up @@ -1912,7 +1874,6 @@ kube-prometheus-stack:
## kube_statefulset_status_replicas
## kube_hpa_spec_max_replicas
## kube_hpa_spec_min_replicas
## kube_hpa_status_condition
## kube_hpa_status_current_replicas
## kube_hpa_status_desired_replicas
## kube pod state metrics
Expand All @@ -1925,13 +1886,9 @@ kube-prometheus-stack:
## kube_pod_container_status_waiting_reason
## kube_pod_status_phase
## kube_pod_info
## kube_service_info
## kube_service_spec_external_ip
## kube_service_spec_type
## kube_service_status_load_balancer_ingress
metricRelabelings:
- action: keep
regex: (?:kube_statefulset_status_observed_generation|kube_statefulset_status_replicas|kube_statefulset_replicas|kube_statefulset_metadata_generation|kube_daemonset_status_current_number_scheduled|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_number_misscheduled|kube_daemonset_status_number_unavailable|kube_deployment_spec_replicas|kube_deployment_status_replicas_available|kube_deployment_status_replicas_unavailable|kube_node_info|kube_node_status_allocatable|kube_node_status_capacity|kube_node_status_condition|kube_hpa_spec_max_replicas|kube_hpa_spec_min_replicas|kube_hpa_status_(condition|(current|desired)_replicas)|kube_pod_container_info|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kube_pod_container_status_ready|kube_pod_container_status_terminated_reason|kube_pod_container_status_waiting_reason|kube_pod_container_status_restarts_total|kube_pod_status_phase|kube_pod_info|kube_service_info|kube_service_spec_external_ip|kube_service_spec_type|kube_service_status_load_balancer_ingress)
regex: (?:kube_statefulset_status_observed_generation|kube_statefulset_status_replicas|kube_statefulset_replicas|kube_statefulset_metadata_generation|kube_daemonset_status_current_number_scheduled|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_number_misscheduled|kube_daemonset_status_number_unavailable|kube_deployment_spec_replicas|kube_deployment_status_replicas_available|kube_deployment_status_replicas_unavailable|kube_node_info|kube_node_status_allocatable|kube_node_status_capacity|kube_node_status_condition|kube_hpa_spec_max_replicas|kube_hpa_spec_min_replicas|kube_hpa_status_(current|desired)_replicas|kube_pod_container_info|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kube_pod_container_status_ready|kube_pod_container_status_terminated_reason|kube_pod_container_status_waiting_reason|kube_pod_container_status_restarts_total|kube_pod_status_phase|kube_pod_info)
sourceLabels: [__name__]
## Drop unnecessary labels Prometheus adds to these metrics
## We don't want container=kube-state-metrics on everything
Expand Down Expand Up @@ -1993,8 +1950,6 @@ kube-prometheus-stack:
## node_network_transmit_bytes_total
## node_filesystem_avail_bytes
## node_filesystem_size_bytes
## node_filesystem_files_free
## node_filesystem_files
metricRelabelings:
- action: keep
regex: (?:node_load1|node_load5|node_load15|node_cpu_seconds_total|node_disk_io_time_weighted_seconds_total|node_disk_io_time_seconds_total|node_vmstat_pgpgin|node_vmstat_pgpgout|node_memory_MemFree_bytes|node_memory_Cached_bytes|node_memory_Buffers_bytes|node_memory_MemTotal_bytes|node_network_receive_drop_total|node_network_transmit_drop_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_filesystem_avail_bytes|node_filesystem_size_bytes)
Expand Down