Skip to content

Commit

Permalink
[CONTP-60] New unscheduled config telemetry (#19306)
Browse files Browse the repository at this point in the history
* Telemetry for unscheduled cluster checks

* Update alphabetical order of metric

* Remove unused auto-generated sample_tags column

* Update datadog_cluster_agent/changelog.d/19306.added

Co-authored-by: Kyle Neale <[email protected]>

---------

Co-authored-by: Kyle Neale <[email protected]>
  • Loading branch information
gabedos and Kyle-Neale authored Jan 6, 2025
1 parent d51bcbe commit 511fef8
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 11 deletions.
1 change: 1 addition & 0 deletions datadog_cluster_agent/changelog.d/19306.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add telemetry for checks that are not scheduled.
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
'cluster_checks_busyness': 'cluster_checks.busyness',
'cluster_checks_configs_dangling': 'cluster_checks.configs_dangling',
'cluster_checks_configs_dispatched': 'cluster_checks.configs_dispatched',
'cluster_checks_unscheduled_check': 'cluster_checks.unscheduled_check',
'cluster_checks_configs_info': 'cluster_checks.configs_info',
'cluster_checks_failed_stats_collection': 'cluster_checks.failed_stats_collection',
'cluster_checks_nodes_reporting': 'cluster_checks.nodes_reporting',
Expand Down
23 changes: 12 additions & 11 deletions datadog_cluster_agent/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ datadog.cluster_agent.admission_webhooks.cws_exec_instrumentation_attempts.count
datadog.cluster_agent.admission_webhooks.cws_exec_instrumentation_attempts.sum,count,,,,CWS exec Instrumentation attempts sum,0,datadog_cluster_agent,cws exec instrumentation attempts,
datadog.cluster_agent.admission_webhooks.cws_pod_instrumentation_attempts.count,count,,,,CWS pod Instrumentation attempts count,0,datadog_cluster_agent,cws pod instrumentation attempts,
datadog.cluster_agent.admission_webhooks.cws_pod_instrumentation_attempts.sum,count,,,,CWS pod Instrumentation attempts sum,0,datadog_cluster_agent,cws pod instrumentation attempts,
datadog.cluster_agent.admission_webhooks.library_injection_attempts,count,,,,"Number of library injection attempts by language",0,datadog_cluster_agent,library injection attempts,
datadog.cluster_agent.admission_webhooks.library_injection_errors,count,,,,"Number of library injection failures by language",0,datadog_cluster_agent,library injection errors,
datadog.cluster_agent.admission_webhooks.library_injection_attempts,count,,,,Number of library injection attempts by language,0,datadog_cluster_agent,library injection attempts,
datadog.cluster_agent.admission_webhooks.library_injection_errors,count,,,,Number of library injection failures by language,0,datadog_cluster_agent,library injection errors,
datadog.cluster_agent.admission_webhooks.mutation_attempts,gauge,,,,Number of pod mutation attempts by mutation type,0,datadog_cluster_agent,admission webhooks mutation attempts,
datadog.cluster_agent.admission_webhooks.mutation_errors,gauge,,,,Number of mutation failures by mutation type,-1,datadog_cluster_agent,admission webhooks mutation errors,
datadog.cluster_agent.admission_webhooks.patcher.attempts,count,,,,"Number of patch attempts",0,datadog_cluster_agent,patch attempts,
datadog.cluster_agent.admission_webhooks.patcher.completed,count,,,,"Number of completed patch attempts",0,datadog_cluster_agent,completed patch attempts,
datadog.cluster_agent.admission_webhooks.patcher.errors,count,,,,"Number of patch errors",0,datadog_cluster_agent,patch errors,
datadog.cluster_agent.admission_webhooks.rc_provider.configs,gauge,,,,"Number of valid remote configuration",0,datadog_cluster_agent,valid remote configurations,
datadog.cluster_agent.admission_webhooks.rc_provider.invalid_configs,gauge,,,,"Number of invalid remote configurations",0,datadog_cluster_agent,invalid remote configurations,
datadog.cluster_agent.admission_webhooks.patcher.attempts,count,,,,Number of patch attempts,0,datadog_cluster_agent,patch attempts,
datadog.cluster_agent.admission_webhooks.patcher.completed,count,,,,Number of completed patch attempts,0,datadog_cluster_agent,completed patch attempts,
datadog.cluster_agent.admission_webhooks.patcher.errors,count,,,,Number of patch errors,0,datadog_cluster_agent,patch errors,
datadog.cluster_agent.admission_webhooks.rc_provider.configs,gauge,,,,Number of valid remote configuration,0,datadog_cluster_agent,valid remote configurations,
datadog.cluster_agent.admission_webhooks.rc_provider.invalid_configs,gauge,,,,Number of invalid remote configurations,0,datadog_cluster_agent,invalid remote configurations,
datadog.cluster_agent.admission_webhooks.reconcile_errors,gauge,,,,Number of reconcile errors per controller,-1,datadog_cluster_agent,admission webhooks reconcile errors,
datadog.cluster_agent.admission_webhooks.reconcile_success,gauge,,success,,Number of reconcile successes per controller,0,datadog_cluster_agent,admission webhooks reconcile success,
datadog.cluster_agent.admission_webhooks.response_duration.count,count,,,,Webhook response duration count,0,datadog_cluster_agent,webhook response duration count,
Expand All @@ -35,6 +35,7 @@ datadog.cluster_agent.cluster_checks.nodes_reporting,gauge,,,,Number of node age
datadog.cluster_agent.cluster_checks.rebalancing_decisions,count,,,,Total number of check rebalancing decisions,0,datadog_cluster_agent,cluster check rebalancing decisions,
datadog.cluster_agent.cluster_checks.rebalancing_duration_seconds,gauge,,second,,Duration of the check rebalancing algorithm last execution,0,datadog_cluster_agent,cluster check rebalancing duration,
datadog.cluster_agent.cluster_checks.successful_rebalancing_moves,count,,check,,Total number of successful check rebalancing decisions,1,datadog_cluster_agent,cluster check rebalancing moves,
datadog.cluster_agent.cluster_checks.unscheduled_check,gauge,,,,Number of check configurations not scheduled,-1,datadog_cluster_agent,cluster check unscheduled check,
datadog.cluster_agent.cluster_checks.updating_stats_duration_seconds,gauge,,second,,Duration of collecting stats from check runners and updating cache,0,datadog_cluster_agent,cluster check updating stats duration,
datadog.cluster_agent.datadog.rate_limit_queries.limit,gauge,,query,,Maximum number of queries to the Datadog API allowed in the period by endpoint,0,datadog_cluster_agent,rate limit queries limit,
datadog.cluster_agent.datadog.rate_limit_queries.period,gauge,,second,,Period of rate limiting for the Datadog API by endpoint,0,datadog_cluster_agent,rate limit queries period,
Expand All @@ -53,10 +54,10 @@ datadog.cluster_agent.external_metrics.processed_value,gauge,,,,Value processed
datadog.cluster_agent.go.goroutines,gauge,,,,Number of goroutines that currently exist,0,datadog_cluster_agent,go goroutines,
datadog.cluster_agent.go.memstats.alloc_bytes,gauge,,byte,,Number of bytes allocated and still in use,0,datadog_cluster_agent,go memstats alloc bytes,
datadog.cluster_agent.go.threads,gauge,,thread,,Number of OS threads created,0,datadog_cluster_agent,go threads,
datadog.cluster_agent.kubernetes_apiserver.emitted_events,count,,,,"Datadog events emitted by the kubernetes_apiserver check",0,datadog_cluster_agent,datadog events events,
datadog.cluster_agent.kubernetes_apiserver.kube_events,count,,,,"Kubernetes events processed by the kubernetes_apiserver check",0,datadog_cluster_agent,apiserver events,
datadog.cluster_agent.language_detection_dca_handler.processed_requests,count,,,,"The number of process language detection requests processed by the handler",0,datadog_cluster_agent,language detection processed requests,
datadog.cluster_agent.language_detection_patcher.patches,count,,,,"The number of patch requests sent by the patcher to the kube api server",0,datadog_cluster_agent,language detection patches,
datadog.cluster_agent.kubernetes_apiserver.emitted_events,count,,,,Datadog events emitted by the kubernetes_apiserver check,0,datadog_cluster_agent,datadog events events,
datadog.cluster_agent.kubernetes_apiserver.kube_events,count,,,,Kubernetes events processed by the kubernetes_apiserver check,0,datadog_cluster_agent,apiserver events,
datadog.cluster_agent.language_detection_dca_handler.processed_requests,count,,,,The number of process language detection requests processed by the handler,0,datadog_cluster_agent,language detection processed requests,
datadog.cluster_agent.language_detection_patcher.patches,count,,,,The number of patch requests sent by the patcher to the kube api server,0,datadog_cluster_agent,language detection patches,
datadog.cluster_agent.secret_backend.elapsed,gauge,,millisecond,,The elapsed time of secret backend invocation,0,datadog_cluster_agent,secret backend elapsed time duration,
datadog.cluster_agent.tagger.stored_entities,gauge,,,,Number of entities stored in the tagger,0,datadog_cluster_agent,tagger stored entities,
datadog.cluster_agent.tagger.updated_entities,count,,,,Number of updates made to entities in the tagger,0,datadog_cluster_agent,tagger updated entities,
Expand Down
3 changes: 3 additions & 0 deletions datadog_cluster_agent/tests/fixtures/metrics.txt
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,9 @@ cluster_checks_configs_info{check_id="http_check:kubernetes:c964669d244288ea",jo
cluster_checks_configs_info{check_id="http_check:nginx 1:c89937d23c8b01a0",join_leader="true",node="datadog-clusterchecks-779dfcd7bf-jm4mz"} 1
cluster_checks_configs_info{check_id="http_check:nginx 2:89d07d5d897f5241",join_leader="true",node="datadog-clusterchecks-779dfcd7bf-hxjwg"} 1
cluster_checks_configs_info{check_id="http_check:nginx 3:8f889113ed52b58e",join_leader="true",node="datadog-clusterchecks-779dfcd7bf-hxjwg"} 1
# HELP cluster_checks_unscheduled_check Number of check configurations not scheduled.
# TYPE cluster_checks_unscheduled_check gauge
cluster_checks_unscheduled_check{config_name="kubernetes_state_core",config_source="file:/etc/datadog-agent/conf.d/kubernetes_state_core.yaml.default",join_leader="true"} 1
# HELP autodiscovery_errors Number of Autodiscovery errors by provider.
# TYPE autodiscovery_errors gauge
autodiscovery_errors{provider="kubernetes-services"} 1
Expand Down
1 change: 1 addition & 0 deletions datadog_cluster_agent/tests/test_datadog_cluster_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
'autodiscovery.watched_resources',
'cluster_checks.busyness',
'cluster_checks.configs_dangling',
'cluster_checks.unscheduled_check',
'cluster_checks.configs_dispatched',
'cluster_checks.configs_info',
'cluster_checks.failed_stats_collection',
Expand Down

0 comments on commit 511fef8

Please sign in to comment.