@@ -482,6 +482,28 @@ spec:
482
482
(1 - central:sli:availability:extended_avg_over_time1h) / (1 - scalar(central:slo:availability))
483
483
record: central:slo:availability:burnrate1h
484
484
485
+ - name : rhacs-central-grpc-api-latency.sli
486
+ rules :
487
+ # The 90th percentile value of central's handled GRPC API requests latencies for the last 10 minutes.
488
+ # These do not include long-running and synchronous GRPC APIs.
489
+ - expr : |
490
+ histogram_quantile(0.9, sum by(le, namespace) (rate(grpc_server_handling_seconds_bucket{container="central", grpc_method!~"ScanImageInternal|DeleteImages|EnrichLocalImageInternal|RunReport|ScanImage|TriggerExternalBackup"}[10m])))
491
+ record: central:grpc_server_handling_seconds:rate10m:quantile90
492
+
493
+ - name : rhacs-central-http-api-latency.sli
494
+ rules :
495
+ # The 90th percentile value of central's handled HTTP API request latencies for the last 10 minutes.
496
+ - expr : |
497
+ histogram_quantile(0.9, sum by(le, namespace) (rate(http_incoming_request_duration_histogram_seconds_bucket{container="central"}[10m])))
498
+ record: central:http_incoming_request_duration_seconds:rate10m:quantile90
499
+
500
+ - name : rhacs-central-graphql-api-latency.sli
501
+ rules :
502
+ # The 90th percentile value of central's handled GraphQL API request latencies for the last 10 minutes.
503
+ - expr : |
504
+ histogram_quantile(0.9, sum by(namespace, le) (rate(rox_central_graphql_query_duration_bucket{container="central"}[10m])))
505
+ record: central:rox_central_graphql_query_duration:rate10m:quantile90
506
+
485
507
- name : rhacs-central.alerts
486
508
rules :
487
509
- alert : Central availability error budget exhaustion - 90%
@@ -533,6 +555,32 @@ spec:
533
555
severity : critical
534
556
namespace : " {{ $labels.namespace }}"
535
557
rhacs_instance_id : " {{ $labels.rhacs_instance_id }}"
558
+ <<<<<<< HEAD
559
+ =======
560
+ rhacs_org_name : " {{ $labels.rhacs_org_name }}"
561
+ rhacs_org_id : " {{ $labels.rhacs_org_id }}"
562
+ rhacs_cluster_name : " {{ $labels.rhacs_cluster_name }}"
563
+ rhacs_environment : " {{ $labels.rhacs_environment }}"
564
+
565
+ - alert : The 90th percentile of GRPC request latencies is greater than 100ms
566
+ expr : |
567
+ count(central:grpc_server_handling_seconds:rate10m:quantile90 > 0.1) > 0
568
+ annotations :
569
+ message : " The 90th percentile of grpc request latencies is greater than 100ms."
570
+
571
+ - alert : The 90th percentile of HTTP request latencies is greater than 100ms.
572
+ expr : |
573
+ count(central:http_incoming_request_duration_seconds:rate10m:quantile90 > 0.1) > 0
574
+ annotations :
575
+ message : " The 90th percentile of HTTP request latencies is greater than 100ms."
576
+
577
+ - alert : The 90th percentile of GraphQL request latencies is greater than 100ms.
578
+ expr : |
579
+ count(central:rox_central_graphql_query_duration:rate10m:quantile90 > 0.1) > 0
580
+ annotations :
581
+ message : " The 90th percentile of GraphQL request latencies is greater than 100ms."
582
+
583
+ >>>>>>> 0182f2d (ROX-17469: implemented sli/alerts for central api latencies)
536
584
- name : az-resources
537
585
rules :
538
586
- record : strictly_worker_nodes
0 commit comments