From 840418cef93d293bf50bff8155b438049c73ff42 Mon Sep 17 00:00:00 2001 From: Dominik Rosiek Date: Thu, 3 Aug 2023 11:13:42 +0200 Subject: [PATCH 1/3] feat!: stop scraping unused metrics Signed-off-by: Dominik Rosiek --- deploy/helm/sumologic/values.yaml | 33 +++++++------------------------ 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/deploy/helm/sumologic/values.yaml b/deploy/helm/sumologic/values.yaml index cbbb74b25a..16ebe5153e 100644 --- a/deploy/helm/sumologic/values.yaml +++ b/deploy/helm/sumologic/values.yaml @@ -1668,11 +1668,9 @@ kube-prometheus-stack: ## see docs/scraped_metrics.md ## apiserver_request_count ## apiserver_request_total - ## apiserver_request_duration_seconds_count - ## apiserver_request_duration_seconds_sum metricRelabelings: - action: keep - regex: (?:apiserver_request_(?:count|total)|apiserver_request_(?:duration_seconds)_(?:count|sum)) + regex: (?:apiserver_request_(?:count|total)) sourceLabels: [__name__] kubelet: serviceMonitor: @@ -1719,7 +1717,7 @@ kube-prometheus-stack: ## container_network_transmit_bytes_total cAdvisorMetricRelabelings: - action: keep - regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_fs_usage_bytes|container_fs_limit_bytes|container_cpu_cfs_throttled_seconds_total|container_network_receive_bytes_total|container_network_transmit_bytes_total) + regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_cpu_cfs_throttled_seconds_total|container_network_receive_bytes_total|container_network_transmit_bytes_total) sourceLabels: [__name__] ## Drop container metrics with container tag set to an empty string: ## these are the pod aggregated container metrics which can be aggregated @@ -1727,7 +1725,7 @@ kube-prometheus-stack: ## do not need. - action: drop sourceLabels: [__name__, container] - regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_fs_usage_bytes|container_fs_limit_bytes);$ + regex: (?:container_cpu_usage_seconds_total|container_memory_working_set_bytes);$ - action: labelmap regex: container_name replacement: container @@ -1813,7 +1811,7 @@ kube-prometheus-stack: ## process_resident_memory_bytes metricRelabelings: - action: keep - regex: (?:etcd_request_cache_(?:add|get)_(?:duration_seconds|latencies_summary)_(?:count|sum)|etcd_helper_cache_(?:hit|miss)_(?:count|total)|etcd_mvcc_db_total_size_in_bytes|etcd_debugging_(store_(expires_total|watchers))|etcd_disk_(backend_commit|wal_fsync)_duration_seconds_.*|etcd_grpc_proxy_cache_(hits|misses)_total|etcd_network_client_grpc_(received|sent)_bytes_total|etcd_server_(has_leader|leader_changes_seen_total)|etcd_server_proposals_(pending|(applied|committed|failed)_total)|process_(cpu_seconds_total|open_fds|resident_memory_bytes)) + regex: (?:etcd_debugging_(store_(expires_total|watchers))|etcd_disk_(backend_commit|wal_fsync)_duration_seconds_.*|etcd_grpc_proxy_cache_(hits|misses)_total|etcd_network_client_grpc_(received|sent)_bytes_total|etcd_server_(has_leader|leader_changes_seen_total)|etcd_server_proposals_(pending|(applied|committed|failed)_total)|process_(cpu_seconds_total|open_fds|resident_memory_bytes)) sourceLabels: [__name__] kubeScheduler: serviceMonitor: @@ -1821,25 +1819,15 @@ kube-prometheus-stack: interval: ## see docs/scraped_metrics.md ## - ## scheduler_e2e_* is present for K8s <1.23 - ## scheduler_e2e_scheduling_duration_seconds_bucket - ## scheduler_e2e_scheduling_duration_seconds_count ## scheduler_e2e_scheduling_duration_seconds_sum ## ## scheduler_scheduling_attempt_duration_seconds is present for K8s >=1.23 - ## scheduler_scheduling_attempt_duration_seconds_bucket - ## scheduler_scheduling_attempt_duration_seconds_count - ## scheduler_scheduling_attempt_duration_seconds_sum ## - ## scheduler_framework_extension_point_duration_seconds_bucket - ## scheduler_framework_extension_point_duration_seconds_count - ## scheduler_framework_extension_point_duration_seconds_sum - ## scheduler_scheduling_algorithm_duration_seconds_bucket - ## scheduler_scheduling_algorithm_duration_seconds_count + ## scheduler_scheduling_attempt_duration_seconds_sum ## scheduler_scheduling_algorithm_duration_seconds_sum metricRelabelings: - action: keep - regex: (?:scheduler_(?:e2e_scheduling|scheduling_attempt|framework_extension_point|scheduling_algorithm)_duration_seconds.*) + regex: (?:scheduler_(?:e2e_scheduling|scheduling_attempt|scheduling_algorithm)_duration_seconds_sum) sourceLabels: [__name__] alertmanager: @@ -1912,7 +1900,6 @@ kube-prometheus-stack: ## kube_statefulset_status_replicas ## kube_hpa_spec_max_replicas ## kube_hpa_spec_min_replicas - ## kube_hpa_status_condition ## kube_hpa_status_current_replicas ## kube_hpa_status_desired_replicas ## kube pod state metrics @@ -1925,13 +1912,9 @@ kube-prometheus-stack: ## kube_pod_container_status_waiting_reason ## kube_pod_status_phase ## kube_pod_info - ## kube_service_info - ## kube_service_spec_external_ip - ## kube_service_spec_type - ## kube_service_status_load_balancer_ingress metricRelabelings: - action: keep - regex: (?:kube_statefulset_status_observed_generation|kube_statefulset_status_replicas|kube_statefulset_replicas|kube_statefulset_metadata_generation|kube_daemonset_status_current_number_scheduled|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_number_misscheduled|kube_daemonset_status_number_unavailable|kube_deployment_spec_replicas|kube_deployment_status_replicas_available|kube_deployment_status_replicas_unavailable|kube_node_info|kube_node_status_allocatable|kube_node_status_capacity|kube_node_status_condition|kube_hpa_spec_max_replicas|kube_hpa_spec_min_replicas|kube_hpa_status_(condition|(current|desired)_replicas)|kube_pod_container_info|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kube_pod_container_status_ready|kube_pod_container_status_terminated_reason|kube_pod_container_status_waiting_reason|kube_pod_container_status_restarts_total|kube_pod_status_phase|kube_pod_info|kube_service_info|kube_service_spec_external_ip|kube_service_spec_type|kube_service_status_load_balancer_ingress) + regex: (?:kube_statefulset_status_observed_generation|kube_statefulset_status_replicas|kube_statefulset_replicas|kube_statefulset_metadata_generation|kube_daemonset_status_current_number_scheduled|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_number_misscheduled|kube_daemonset_status_number_unavailable|kube_deployment_spec_replicas|kube_deployment_status_replicas_available|kube_deployment_status_replicas_unavailable|kube_node_info|kube_node_status_allocatable|kube_node_status_capacity|kube_node_status_condition|kube_hpa_spec_max_replicas|kube_hpa_spec_min_replicas|kube_hpa_status_(current|desired)_replicas|kube_pod_container_info|kube_pod_container_resource_requests|kube_pod_container_resource_limits|kube_pod_container_status_ready|kube_pod_container_status_terminated_reason|kube_pod_container_status_waiting_reason|kube_pod_container_status_restarts_total|kube_pod_status_phase|kube_pod_info) sourceLabels: [__name__] ## Drop unnecessary labels Prometheus adds to these metrics ## We don't want container=kube-state-metrics on everything @@ -1993,8 +1976,6 @@ kube-prometheus-stack: ## node_network_transmit_bytes_total ## node_filesystem_avail_bytes ## node_filesystem_size_bytes - ## node_filesystem_files_free - ## node_filesystem_files metricRelabelings: - action: keep regex: (?:node_load1|node_load5|node_load15|node_cpu_seconds_total|node_disk_io_time_weighted_seconds_total|node_disk_io_time_seconds_total|node_vmstat_pgpgin|node_vmstat_pgpgout|node_memory_MemFree_bytes|node_memory_Cached_bytes|node_memory_Buffers_bytes|node_memory_MemTotal_bytes|node_network_receive_drop_total|node_network_transmit_drop_total|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_filesystem_avail_bytes|node_filesystem_size_bytes) From d2df8dccab433045fa470a60edecb446fb0b953f Mon Sep 17 00:00:00 2001 From: Dominik Rosiek Date: Thu, 3 Aug 2023 11:28:38 +0200 Subject: [PATCH 2/3] feat!: stop scraping unused metrics Signed-off-by: Dominik Rosiek --- deploy/helm/sumologic/values.yaml | 37 +++---------------------------- 1 file changed, 3 insertions(+), 34 deletions(-) diff --git a/deploy/helm/sumologic/values.yaml b/deploy/helm/sumologic/values.yaml index 16ebe5153e..01b87b93fd 100644 --- a/deploy/helm/sumologic/values.yaml +++ b/deploy/helm/sumologic/values.yaml @@ -1680,30 +1680,11 @@ kube-prometheus-stack: probes: false ## Enable scraping /metrics/resource/v1alpha1 from kubelet's service resource: false - ## see docs/scraped_metrics.md - ## kubelet metrics: - ## kubelet_docker_operations_errors - ## kubelet_docker_operations_errors_total - ## kubelet_docker_operations_duration_seconds_count - ## kubelet_docker_operations_duration_seconds_sum - ## kubelet_runtime_operations_duration_seconds_count - ## kubelet_runtime_operations_duration_seconds_sum - ## kubelet_running_container_count - ## kubelet_running_containers - ## kubelet_running_pod_count - ## kubelet_running_pods - ## kubelet_docker_operations_latency_microseconds - ## kubelet_docker_operations_latency_microseconds_count - ## kubelet_docker_operations_latency_microseconds_sum - ## kubelet_runtime_operations_latency_microseconds - ## kubelet_runtime_operations_latency_microseconds_count - ## kubelet_runtime_operations_latency_microseconds_sum + ## Drop all metrics metricRelabelings: - action: keep - regex: (?:kubelet_docker_operations_errors(?:|_total)|kubelet_(?:docker|runtime)_operations_duration_seconds_(?:count|sum)|kubelet_running_(?:container|pod)(?:_count|s)|kubelet_(:?docker|runtime)_operations_latency_microseconds(?:|_count|_sum)) + regex: "^$" sourceLabels: [__name__] - - action: labeldrop - regex: id ## see docs/scraped_metrics.md ## cadvisor container metrics ## container_cpu_usage_seconds_total @@ -1736,19 +1717,7 @@ kube-prometheus-stack: regex: (id|name) kubeControllerManager: serviceMonitor: - ## Scrape interval. If not set, the Prometheus default scrape interval is used. - interval: - ## see docs/scraped_metrics.md - ## controller manager metrics - ## https://kubernetes.io/docs/concepts/cluster-administration/monitoring/#kube-controller-manager-metrics - ## e.g. - ## cloudprovider_aws_api_request_duration_seconds_bucket - ## cloudprovider_aws_api_request_duration_seconds_count - ## cloudprovider_aws_api_request_duration_seconds_sum - metricRelabelings: - - action: keep - regex: (?:cloudprovider_.*_api_request_duration_seconds.*) - sourceLabels: [__name__] + enabled: false coreDns: serviceMonitor: ## Scrape interval. If not set, the Prometheus default scrape interval is used. From 48f4933543856c93959d09e73e2e900fb96e3bb2 Mon Sep 17 00:00:00 2001 From: Dominik Rosiek Date: Thu, 3 Aug 2023 16:32:43 +0200 Subject: [PATCH 3/3] feat!: stop scraping unused metrics Signed-off-by: Dominik Rosiek --- deploy/helm/sumologic/values.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/deploy/helm/sumologic/values.yaml b/deploy/helm/sumologic/values.yaml index 01b87b93fd..41c7d642ff 100644 --- a/deploy/helm/sumologic/values.yaml +++ b/deploy/helm/sumologic/values.yaml @@ -1822,6 +1822,11 @@ kube-prometheus-stack: enabled: false tls: enabled: false + serviceMonitor: + metricRelabelings: + - action: keep + regex: "^$" + sourceLabels: [__name__] ## Resource limits for kube-state-metrics kube-state-metrics: ## Put here the new name if you want to override the full name used for Kube State Metrics components.