From d71e0945db0d44fb86b3d3970d435fd1ae67d642 Mon Sep 17 00:00:00 2001 From: Michal Hajas Date: Tue, 27 Jun 2023 16:38:35 +0200 Subject: [PATCH] Make otel work with Openshift and add Grafana dashboard for Client credentials scenario (#384) Relates to #372 --- .../keycloak-create-deployment/action.yml | 4 + provision/common/Taskfile.yaml | 1 + .../minikube/keycloak/templates/keycloak.yaml | 2 + .../minikube/keycloak/templates/otel-pvc.yaml | 4 + .../dashboards/client-credentials-otel.json | 421 ++++++++++++++++++ .../dashboards/keycloak-infinispan.json | 52 ++- .../dashboards/client-credentials-otel.json | 1 + 7 files changed, 463 insertions(+), 22 deletions(-) create mode 100644 provision/minikube/monitoring/dashboards/client-credentials-otel.json create mode 120000 provision/openshift/monitoring/dashboards/client-credentials-otel.json diff --git a/.github/actions/keycloak-create-deployment/action.yml b/.github/actions/keycloak-create-deployment/action.yml index 645960091..2c2374175 100644 --- a/.github/actions/keycloak-create-deployment/action.yml +++ b/.github/actions/keycloak-create-deployment/action.yml @@ -24,6 +24,9 @@ inputs: description: 'Keycloak pod CPU limit' heapMaxSizeMB: description: 'Keycloak server maximum Java heap size (in MB)' + otel: + description: 'Enable OpenTelemetry' + default: 'true' runs: using: "composite" @@ -45,3 +48,4 @@ runs: KC_CPU_REQUESTS: ${{ inputs.podCpuRequests }} KC_CPU_LIMITS: ${{ inputs.podCpuLimit }} KC_HEAP_MAX_MB: ${{ inputs.heapMaxSizeMB }} + KC_OTEL: ${{ inputs.otel }} diff --git a/provision/common/Taskfile.yaml b/provision/common/Taskfile.yaml index 9303bc8ca..f1c8da81a 100644 --- a/provision/common/Taskfile.yaml +++ b/provision/common/Taskfile.yaml @@ -88,6 +88,7 @@ tasks: - test "{{.KC_METASPACE_INIT_MB}}" == "$(cat .task/var-KC_METASPACE_INIT_MB)" - test "{{.KC_METASPACE_MAX_MB}}" == "$(cat .task/var-KC_METASPACE_MAX_MB)" - test "{{.KC_CUSTOM_INFINISPAN_CONFIG}}" == "$(cat .task/var-KC_CUSTOM_INFINISPAN_CONFIG)" + - test "{{.KC_DISABLE_STICKY_SESSION}}" == "$(cat .task/var-KC_DISABLE_STICKY_SESSION)" mvnw: dir: ../.. diff --git a/provision/minikube/keycloak/templates/keycloak.yaml b/provision/minikube/keycloak/templates/keycloak.yaml index b1c86e68d..7413451b1 100644 --- a/provision/minikube/keycloak/templates/keycloak.yaml +++ b/provision/minikube/keycloak/templates/keycloak.yaml @@ -136,6 +136,7 @@ spec: # https://github.com/open-telemetry/opentelemetry-java/blob/main/sdk-extensions/autoconfigure/README.md - name: OTEL_RESOURCE_ATTRIBUTES value: service.name=keycloak + {{ if ne .Values.environment "openshift" }} - name: OTEL_TRACES_EXPORTER # with otel+tempo 1.4.1 forwarding of traces works, but searching is not returning all values for now, for example delete users was missing value: jaeger @@ -145,6 +146,7 @@ spec: value: parentbased_traceidratio # always_on, parentbased_traceidratio, ... - name: OTEL_TRACES_SAMPLER_ARG value: {{ .Values.otelSamplingPercentage | quote }} + {{ end }} - name: OTEL_METRICS_EXPORTER value: prometheus {{ end }} diff --git a/provision/minikube/keycloak/templates/otel-pvc.yaml b/provision/minikube/keycloak/templates/otel-pvc.yaml index 501075505..55185f1a3 100644 --- a/provision/minikube/keycloak/templates/otel-pvc.yaml +++ b/provision/minikube/keycloak/templates/otel-pvc.yaml @@ -12,6 +12,10 @@ spec: resources: requests: storage: 100Mi + {{ if eq .Values.environment "openshift" }} + storageClassName: efs-sc + {{ else }} storageClassName: standard + {{ end }} volumeMode: Filesystem {{end}} diff --git a/provision/minikube/monitoring/dashboards/client-credentials-otel.json b/provision/minikube/monitoring/dashboards/client-credentials-otel.json new file mode 100644 index 000000000..c85f69b4f --- /dev/null +++ b/provision/minikube/monitoring/dashboards/client-credentials-otel.json @@ -0,0 +1,421 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 13, + "panels": [], + "repeat": "pod_name", + "repeatDirection": "h", + "title": "$pod_name stats", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(http_server_duration_bucket{http_route=\"/realms/{realm}/protocol/{protocol}/token\", le=\"250.0\", namespace=\"$namespace\", pod=\"$pod_name\"}[2m])) without (le) / rate(http_server_duration_count{http_route=\"/realms/{realm}/protocol/{protocol}/token\", namespace=\"$namespace\", pod=\"$pod_name\"}[2m])", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "Percentage below 250ms", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "rate(vendor_cache_manager_keycloak_cache_realms_statistics_hits{namespace=\"${namespace}\", pod=\"$pod_name\", job=\"${namespace}/keycloak-otel\"}[2m]) / (rate(vendor_cache_manager_keycloak_cache_realms_statistics_hits{namespace=\"${namespace}\", pod=\"$pod_name\", job=\"${namespace}/keycloak-otel\"}[2m]) + rate(vendor_cache_manager_keycloak_cache_realms_statistics_misses{namespace=\"${namespace}\", pod=\"$pod_name\", job=\"${namespace}/keycloak-otel\"}[2m]))", + "hide": false, + "legendFormat": "Realms cache hit ratio", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "rate(vendor_cache_manager_keycloak_cache_users_statistics_hits{namespace=\"${namespace}\", pod=\"$pod_name\", job=\"${namespace}/keycloak-otel\"}[2m]) / (rate(vendor_cache_manager_keycloak_cache_users_statistics_hits{namespace=\"${namespace}\", pod=\"$pod_name\", job=\"${namespace}/keycloak-otel\"}[2m]) + rate(vendor_cache_manager_keycloak_cache_users_statistics_misses{namespace=\"${namespace}\", pod=\"$pod_name\", job=\"${namespace}/keycloak-otel\"}[2m]))", + "hide": false, + "legendFormat": "Users cache hit ratio", + "range": true, + "refId": "C" + } + ], + "title": "$pod_name - Below 250ms response time percentage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 1 + }, + "id": 2, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum without (instance) (idelta(http_server_duration_bucket{http_route=\"/realms/{realm}/protocol/{protocol}/token\", namespace=\"$namespace\", pod=\"$pod_name\"} [2m]))", + "format": "heatmap", + "interval": "", + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "$pod_name - Response time histogram", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(http_server_duration_bucket{http_route=\"/realms/{realm}/protocol/{protocol}/token\", namespace=\"$namespace\", pod=\"$pod_name\"}[2m])) by (le))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "$pod_name - 99 quantile millis", + "type": "timeseries" + } + ], + "refresh": "5s", + "revision": 1, + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "keycloak", + "value": "keycloak" + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(namespace)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "query_result(http_server_duration_count)\n", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "pod_name", + "options": [], + "query": { + "query": "query_result(http_server_duration_count)\n", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/.*pod=\"([^\"]+)\".*/", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Client credentials SLO", + "uid": "A0kQZ7u4k", + "version": 1, + "weekStart": "" +} diff --git a/provision/minikube/monitoring/dashboards/keycloak-infinispan.json b/provision/minikube/monitoring/dashboards/keycloak-infinispan.json index e9deb5b40..8d3d99f0c 100644 --- a/provision/minikube/monitoring/dashboards/keycloak-infinispan.json +++ b/provision/minikube/monitoring/dashboards/keycloak-infinispan.json @@ -126,7 +126,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum without(instance,node) (vendor_cache_manager_keycloak_cache_container_health_number_of_nodes{namespace=\"$namespace\"})", + "expr": "avg without(instance,node,job,endpoint,otel_scope_name) (vendor_cache_manager_keycloak_cache_container_health_number_of_nodes{namespace=\"$namespace\"})", "legendFormat": "{{pod}}", "range": true, "refId": "A" @@ -199,7 +199,7 @@ "x": 5, "y": 1 }, - "id": 2, + "id": 227, "options": { "legend": { "calcs": [ @@ -222,7 +222,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum without(instance,node) (vendor_cache_manager_keycloak_cache_container_stats_hit_ratio{namespace=\"${namespace}\"})", + "expr": "rate(vendor_cache_manager_keycloak_cache_container_stats_hits{namespace=\"${namespace}\", job=\"${namespace}/keycloak-metrics\"}[2m]) / (rate(vendor_cache_manager_keycloak_cache_container_stats_hits{namespace=\"${namespace}\", job=\"${namespace}/keycloak-metrics\"}[2m]) + rate(vendor_cache_manager_keycloak_cache_container_stats_misses{namespace=\"${namespace}\", job=\"${namespace}/keycloak-metrics\"}[2m]))", "legendFormat": "{{pod}}", "range": true, "refId": "A" @@ -331,7 +331,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum without (instance,node) (vendor_cache_manager_keycloak_cache_${distributed_cache}_statistics_approximate_entries_unique{namespace=\"${namespace}\"})", + "expr": "avg without(instance,node,job,endpoint,otel_scope_name) (vendor_cache_manager_keycloak_cache_${distributed_cache}_statistics_approximate_entries_unique{namespace=\"${namespace}\"})", "legendFormat": "{{pod}}", "range": true, "refId": "A" @@ -426,7 +426,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum without (instance,node) (vendor_cache_manager_keycloak_cache_${distributed_cache}_statistics_approximate_entries_in_memory{namespace=\"${namespace}\"})", + "expr": "avg without(instance,node,job,endpoint,otel_scope_name) (vendor_cache_manager_keycloak_cache_${distributed_cache}_statistics_approximate_entries_in_memory{namespace=\"${namespace}\"})", "legendFormat": "{{pod}}", "range": true, "refId": "A" @@ -486,7 +486,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum without(instance,node) (vendor_cache_manager_keycloak_cache_${distributed_cache}_statistics_average_read_time_nanos{namespace=\"${namespace}\"})", + "expr": "avg without(instance,node,job,endpoint,otel_scope_name) (vendor_cache_manager_keycloak_cache_${distributed_cache}_statistics_average_read_time_nanos{namespace=\"${namespace}\"})", "legendFormat": "{{pod}}", "range": true, "refId": "A" @@ -546,7 +546,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum without(instance,node) (vendor_cache_manager_keycloak_cache_${distributed_cache}_statistics_average_write_time_nanos{namespace=\"${namespace}\"})", + "expr": "avg without(instance,node,job,endpoint,otel_scope_name) (vendor_cache_manager_keycloak_cache_${distributed_cache}_statistics_average_write_time_nanos{namespace=\"${namespace}\"})", "legendFormat": "{{pod}}", "range": true, "refId": "A" @@ -561,7 +561,7 @@ "h": 1, "w": 24, "x": 0, - "y": 88 + "y": 99 }, "id": 8, "panels": [], @@ -581,6 +581,8 @@ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -613,7 +615,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -628,7 +631,7 @@ "h": 10, "w": 5, "x": 0, - "y": 89 + "y": 100 }, "id": 10, "options": { @@ -653,7 +656,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum without (instance,node) (vendor_cache_manager_keycloak_cache_${local_cache}_statistics_approximate_entries_in_memory{namespace=\"${namespace}\"})", + "expr": "avg without(instance,node,job,endpoint,otel_scope_name) (vendor_cache_manager_keycloak_cache_${local_cache}_statistics_approximate_entries_in_memory{namespace=\"${namespace}\"})", "legendFormat": "{{pod}}", "range": true, "refId": "A" @@ -674,6 +677,8 @@ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -709,7 +714,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null } ] }, @@ -721,7 +727,7 @@ "h": 10, "w": 5, "x": 5, - "y": 89 + "y": 100 }, "id": 25, "options": { @@ -746,7 +752,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum without(instance,node) (vendor_cache_manager_keycloak_cache_${local_cache}_statistics_hit_ratio{namespace=\"${namespace}\"})", + "expr": "rate(vendor_cache_manager_keycloak_cache_${local_cache}_statistics_hits{namespace=\"${namespace}\", job=\"${namespace}/keycloak-metrics\"}[2m]) / (rate(vendor_cache_manager_keycloak_cache_${local_cache}_statistics_hits{namespace=\"${namespace}\", job=\"${namespace}/keycloak-metrics\"}[2m]) + rate(vendor_cache_manager_keycloak_cache_${local_cache}_statistics_misses{namespace=\"${namespace}\", job=\"${namespace}/keycloak-metrics\"}[2m]))", "legendFormat": "{{pod}}", "range": true, "refId": "A" @@ -770,7 +776,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null } ] }, @@ -782,7 +789,7 @@ "h": 10, "w": 3, "x": 10, - "y": 89 + "y": 100 }, "id": 51, "options": { @@ -797,7 +804,7 @@ "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.0.5", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { @@ -805,7 +812,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum without(instance,node) (vendor_cache_manager_keycloak_cache_${local_cache}_statistics_average_read_time_nanos{namespace=\"${namespace}\"})", + "expr": "avg without(instance,node,job,endpoint,otel_scope_name) (vendor_cache_manager_keycloak_cache_${local_cache}_statistics_average_read_time_nanos{namespace=\"${namespace}\"})", "legendFormat": "{{pod}}", "range": true, "refId": "A" @@ -829,7 +836,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null } ] }, @@ -841,7 +849,7 @@ "h": 10, "w": 3, "x": 13, - "y": 89 + "y": 100 }, "id": 36, "options": { @@ -856,7 +864,7 @@ "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.0.5", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { @@ -864,7 +872,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum without(instance,node) (vendor_cache_manager_keycloak_cache_${local_cache}_statistics_average_write_time_nanos{namespace=\"${namespace}\"})", + "expr": "avg without(instance,node,job,endpoint,otel_scope_name) (vendor_cache_manager_keycloak_cache_${local_cache}_statistics_average_write_time_nanos{namespace=\"${namespace}\"})", "legendFormat": "{{pod}}", "range": true, "refId": "A" diff --git a/provision/openshift/monitoring/dashboards/client-credentials-otel.json b/provision/openshift/monitoring/dashboards/client-credentials-otel.json new file mode 120000 index 000000000..90a5c44e3 --- /dev/null +++ b/provision/openshift/monitoring/dashboards/client-credentials-otel.json @@ -0,0 +1 @@ +../../../minikube/monitoring/dashboards/client-credentials-otel.json \ No newline at end of file