diff --git a/helm-charts/common/dashboard/json/README.txt b/helm-charts/common/dashboard/json/README.txt new file mode 100644 index 000000000..bc27caa7c --- /dev/null +++ b/helm-charts/common/dashboard/json/README.txt @@ -0,0 +1,17 @@ +# Generating Grafana dashboard Helm chart configMap templates + +User would need to manually load Grafana dashboard JSON file, but +when Helm Dashboard chart installs it to Grafana namespace inside a +suitably labeled configMap, Grafana will load it automatically. + +Here are the dashboard JSON spec files used as sources for those +configMaps. After dashboard is updated in Grafana, it can be saved +again to a JSON file here, and the corresponding configMap updated +with the provided conversion script. + +Usage: + +``` +cd ../templates/ +../json/convert-dashboard.sh ../json/*.json +``` diff --git a/helm-charts/common/dashboard/json/convert-dashboard.sh b/helm-charts/common/dashboard/json/convert-dashboard.sh new file mode 100755 index 000000000..136f63c4b --- /dev/null +++ b/helm-charts/common/dashboard/json/convert-dashboard.sh @@ -0,0 +1,125 @@ +#!/bin/sh +# +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e + +# OPEA CI requires copyright/license for the converted file +COPYRIGHT="Copyright (C) 2025 Intel Corporation" +LICENSE="SPDX-License-Identifier: Apache-2.0" + +# Label needed in configMap to get (Helm installed) Grafana to load it as dashboard +LABEL="grafana_dashboard=1" + +# needs to be specified so there's something to override +NS="default" + +error_exit () +{ + name=${0##*/} + cat << EOF + +ERROR: $1! + +Convert given Grafana *.json dashboard specs to Grafana dashboard configMap +templates compatible with OPEA Dashoard Helm chart. + +Dashboard 'title' and 'uid' are overridden with values composed of +the OPEA Dashboard Helm chart values and file name, similarly to +the produced configMap name and namespace. + +Usage: + $name + +ERROR: $1! +EOF + exit 1 +} + +if [ -z "$(which jq)" ]; then + error_exit "'jq' required for dashboard checks, please install 'jq' first" +fi + +if [ -z "$(which kubectl)" ]; then + error_exit "'kubectl' required for dashboard conversion, please install 'kubernetes-client' first" +fi + +if ! kubectl version; then + error_exit "Broken/missing 'kubectl' cluster config (script does not need it, but kubectl still fails)" +fi + +uid="" +title="" + +echo +echo "Got following Grafana dashboards:" +for file in "$@"; do + if [ ! -f "$file" ]; then + error_exit "JSON file '$file' does not exist" + fi + if [ "${file%.json}" = "$file" ]; then + error_exit "JSON file '$file' does not exist" + fi + + # Both dashboard 'uid' and title needed + uid=$(jq .uid "$file" | tail -1 | tr -d '"') + if [ -z "$uid" ]; then + error_exit "'$file' dashboard has invalid JSON" + elif [ "$uid" = "null" ]; then + error_exit "'$file' dashboard has no 'uid' field (will be replaced with Helm variable)" + fi + + # ...but it should have a title. + title=$(jq .title "$file" | tail -1 | tr -d '"') + if [ "$title" = "null" ]; then + error_exit "'$file' dashboard has no 'title' field (will be replaced with Helm variable)" + fi + + echo "- file: $file, uid: '$uid', title: '$title'" +done + +echo +echo "Converting:" +for file in "$@"; do + base=${file##*/} + name=${base%.json} + dst="configmap-${name}.yaml" + + uid=$(jq .uid "$file" | tail -1 | tr -d '"') + title=$(jq .title "$file" | tail -1 | tr -d '"') + + # convert to k8s object name ("[a-z0-9][-a-z0-9]*[a-z0-9]"): + # - upper-case -> lowercase, '_' -> '-' + # - drop anything outside [-a-z] + # - drop '-' prefix & suffix and successive '-' chars + k8name=$(echo "$name" | tr A-Z_ a-z- | tr -d -c a-z- | sed -e 's/^-*//' -e 's/-*$//' -e 's/--*/-/g') + + echo "- $base -> $dst" + + echo "{{- if .Values.$name }}" > "$dst" + + echo "# $COPYRIGHT" >> "$dst" + echo "# $LICENSE" >> "$dst" + echo "#" >> "$dst" + echo "# ${0##*/}: $base -> $dst" >> "$dst" + + kubectl create cm -n "$NS" --from-file "$file" --dry-run=client -o yaml "$k8name" |\ + kubectl label -f- --local --dry-run=client -o yaml "$LABEL" |\ + grep -v -e "^ creationTimestamp:" >> "$dst" + + echo "{{- end }}" >> "$dst" + + # convert JSON content conflicting with Helm to Helm compatible format + # and add suitable Dashboard chart Helm variables to the configMap + sed -i \ + -e 's/\({{[a-z]\+}}\)/{{ printf "\1" }}/' \ + -e 's/name:.*$/name: {{ include "dashboard.fullname" . }}'"-${k8name}/" \ + -e 's/space:.*$/space: {{ .Values.global.prometheusNamespace }}/' \ + -e "s/${title}/{{ .Values.prefix }} $name/" \ + -e "s/${uid}/opea-"'{{ include "dashboard.fullname" . }}'"-${k8name}/" \ + "$dst" +done + +echo +echo "DONE!" diff --git a/helm-charts/common/dashboard/json/metrics.json b/helm-charts/common/dashboard/json/metrics.json new file mode 100644 index 000000000..671efc3d1 --- /dev/null +++ b/helm-charts/common/dashboard/json/metrics.json @@ -0,0 +1,2048 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 46, + "panels": [], + "title": "Requests", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 1 + }, + "id": 44, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(megaservice_request_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "E2E", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(te_request_duration_count{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Embed", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(te_request_duration_count{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Rerank", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(tgi_request_duration_count{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "TGI", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(vllm:e2e_request_latency_seconds_count{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "C" + } + ], + "title": "Rates", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 5, + "y": 1 + }, + "id": 33, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(megaservice_request_latency_sum{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]) / rate(megaservice_request_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "E2E", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(te_request_duration_sum{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval]) / rate(te_request_duration_count{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Embed", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(te_request_duration_sum{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval]) / rate(te_request_duration_count{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Rerank", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(tgi_request_duration_sum{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]) / rate(tgi_request_duration_count{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "TGI", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(vllm:e2e_request_latency_seconds_sum{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]) / rate(vllm:e2e_request_latency_seconds_count{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "C" + } + ], + "title": "Latency averages", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 12, + "y": 1 + }, + "id": 40, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (le)(increase(megaservice_request_latency_bucket{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "format": "heatmap", + "hide": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "E2E Latency histogram", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 19, + "y": 1 + }, + "id": 52, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(megaservice_request_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(megaservice_request_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(megaservice_request_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "E2E Latency quantiles", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 47, + "panels": [], + "title": "First tokens", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "t/s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 9 + }, + "id": 42, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(megaservice_first_token_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "E2E", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(vllm:time_to_first_token_seconds_count{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "A" + } + ], + "title": "Rates", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 5, + "y": 9 + }, + "id": 36, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(megaservice_first_token_latency_sum{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]) / rate(megaservice_first_token_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "E2E", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(vllm:time_to_first_token_seconds_sum{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]) / rate(vllm:time_to_first_token_seconds_count{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "B" + } + ], + "title": "Latency averages", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 12, + "y": 9 + }, + "id": 41, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (le)(increase(megaservice_first_token_latency_bucket{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "format": "heatmap", + "hide": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "E2E Latency histogram", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 19, + "y": 9 + }, + "id": 50, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(megaservice_first_token_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(megaservice_first_token_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(megaservice_first_token_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "E2E Latency quantiles", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 48, + "panels": [], + "title": "Next tokens", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "t/s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 17 + }, + "id": 43, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(megaservice_inter_token_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "E2E", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(tgi_request_generated_tokens_sum{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "TGI", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(vllm:time_per_output_token_seconds_count{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "B" + } + ], + "title": "Rates", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 5, + "y": 17 + }, + "id": 35, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(megaservice_inter_token_latency_sum{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]) / rate(megaservice_inter_token_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "E2E", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(tgi_request_mean_time_per_token_duration_sum{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]) / rate(tgi_request_mean_time_per_token_duration_count{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "TGI", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(vllm:time_per_output_token_seconds_sum{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]) / rate(vllm:time_per_output_token_seconds_count{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "B" + } + ], + "title": "Latency averages", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 12, + "y": 17 + }, + "id": 45, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (le)(increase(megaservice_inter_token_latency_bucket{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "format": "heatmap", + "hide": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "E2E Latency histogram", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 19, + "y": 17 + }, + "id": 51, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(megaservice_inter_token_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(megaservice_inter_token_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(megaservice_inter_token_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "E2E Latency quantiles", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 49, + "panels": [], + "title": "Inference services", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 25 + }, + "id": 22, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-tgi\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TGI: instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(tgi_queue_size{service=\"$release-tgi\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TGI: used", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-teirerank\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Rerank: instances", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(te_queue_size{service=\"$release-teirerank\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Rerank: used", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-tei\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Embed: instances", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(te_queue_size{service=\"$release-tei\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Embed: used", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM: instances", + "range": true, + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(vllm:cache_config_info{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM: used", + "range": true, + "refId": "H" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "MegaService: instances", + "range": true, + "refId": "I" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(megaservice_first_token_latency_count{service=\"$release\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "MegaService: used", + "range": true, + "refId": "J" + } + ], + "title": "Replicas", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 5, + "y": 25 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(tgi_queue_size{service=\"$release-tgi\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TGI", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(te_queue_size{service=\"$release-tei\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Embed", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(te_queue_size{service=\"$release-teirerank\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Rerank", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_running{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM (running)", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_waiting{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM (waiting)", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_swapped{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM (swapped to CPU)", + "range": true, + "refId": "F" + } + ], + "title": "Queue totals", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 12, + "y": 25 + }, + "id": 34, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(megaservice_inter_token_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]) / rate(megaservice_request_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "E2E", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "(sum by (service)(rate(vllm:generation_tokens_total{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))) / (sum by (service)(rate(vllm:request_success_total{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "B" + } + ], + "title": "Tokens / reply (average)", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "prometheus" + }, + "description": "Prometheus instance", + "hide": 0, + "includeAll": false, + "label": "", + "multi": false, + "name": "Metrics", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "definition": "label_values(megaservice_request_pending,namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(megaservice_request_pending,namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "chatqna", + "value": "chatqna" + }, + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "definition": "label_values(megaservice_request_pending{namespace=\"$namespace\", service!~\".*-uservice\", service!~\".*-usvc\"},service)", + "description": "Helm release name used as prefix for the services", + "hide": 0, + "includeAll": false, + "label": "Helm release", + "multi": false, + "name": "release", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(megaservice_request_pending{namespace=\"$namespace\", service!~\".*-uservice\", service!~\".*-usvc\"},service)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "OPEA ChatQnA metrics", + "uid": "opea-chatqna-dashboard-metrics", + "version": 1, + "weekStart": "" +} diff --git a/helm-charts/common/dashboard/json/scaling.json b/helm-charts/common/dashboard/json/scaling.json new file mode 100644 index 000000000..425df1a71 --- /dev/null +++ b/helm-charts/common/dashboard/json/scaling.json @@ -0,0 +1,2257 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 7, + "panels": [], + "title": "Scaling", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(sum by (pod)(http_requests_total{method=\"POST\",service=\"$release\",namespace=\"$namespace\"}))", + "hide": false, + "legendFormat": "used", + "range": true, + "refId": "C" + } + ], + "title": "Megaservice", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-teirerank\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TEI instances", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(te_queue_size{service=\"$release-teirerank\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TEI used", + "range": true, + "refId": "B" + } + ], + "title": "Rerank", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-retriever-usvc\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(sum by (pod)(http_requests_total{method=\"POST\",service=\"$release-retriever-usvc\",namespace=\"$namespace\"}))", + "hide": false, + "legendFormat": "Used", + "range": true, + "refId": "C" + } + ], + "title": "Retrieve", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-tei\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TEI instances", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(te_queue_size{service=\"$release-tei\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TEI used", + "range": true, + "refId": "D" + } + ], + "title": "Embed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 21, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-llm-uservice\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(sum by (pod)(http_requests_total{method=\"POST\",service=\"$release-llm-uservice\",namespace=\"$namespace\"}))", + "hide": false, + "legendFormat": "Used", + "range": true, + "refId": "C" + } + ], + "title": "LLM-uservice", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-tgi\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TGI instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(tgi_queue_size{service=\"$release-tgi\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TGI used", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM instances", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(vllm:num_requests_waiting{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM used", + "range": true, + "refId": "D" + } + ], + "title": "LLM", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 6, + "panels": [], + "title": "Completed requests", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 8 + }, + "id": 5, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(http_requests_total{method=\"POST\",status=\"2xx\",service=\"$release\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(http_requests_total{method=\"POST\",status=\"2xx\",service=\"$release\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "C" + } + ], + "title": "Megaservice", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 8 + }, + "id": 1, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(te_request_count{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(te_request_count{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "C" + } + ], + "title": "Rerank", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 8 + }, + "id": 19, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(http_requests_total{method=\"POST\",status=\"2xx\",service=\"$release-retriever-usvc\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(http_requests_total{method=\"POST\",status=\"2xx\",service=\"$release-retriever-usvc\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "C" + } + ], + "title": "Retrieve", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(te_request_count{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(te_request_count{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "C" + } + ], + "title": "Embed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 16, + "y": 8 + }, + "id": 22, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(http_requests_total{method=\"POST\",status=\"2xx\",service=\"$release-llm-uservice\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(http_requests_total{method=\"POST\",status=\"2xx\",service=\"$release-llm-uservice\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "C" + } + ], + "title": "LLM-uservice", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 8 + }, + "id": 2, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(tgi_request_count{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(tgi_request_count{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(vllm:request_success_total{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(vllm:request_success_total{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "D" + } + ], + "title": "LLM", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 12, + "panels": [], + "title": "Incomplete requests", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 16 + }, + "id": 15, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(megaservice_request_pending{service=\"$release\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "Pending total", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(rate(http_requests_total{method=\"POST\",status!=\"2xx\",service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Failed total", + "range": true, + "refId": "B" + } + ], + "title": "Megaservice: fail rate + pending count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 16 + }, + "id": 14, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(rate(te_request_count{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval])-rate(te_request_success{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "B" + } + ], + "title": "Rerank", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 16 + }, + "id": 20, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(rate(http_requests_total{method=\"POST\",status!=\"2xx\",service=\"$release-retriever-usvc\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Failed total", + "range": true, + "refId": "B" + } + ], + "title": "Retrieve", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 16 + }, + "id": 13, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(rate(te_request_count{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval])-rate(te_request_success{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "B" + } + ], + "title": "Embed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 16 + }, + "id": 23, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(rate(http_requests_total{method=\"POST\",status!=\"2xx\",service=\"$release-llm-uservice\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Failed total", + "range": true, + "refId": "B" + } + ], + "title": "LLM-uservice", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 16 + }, + "id": 11, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(rate(tgi_request_count{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval])-rate(tgi_request_success{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (err)(rate(tgi_request_failure{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "{{err}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_running{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "Running", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_waiting{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "Waiting", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_swapped{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "Swapped to CPU", + "range": true, + "refId": "E" + } + ], + "title": "LLM", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "prometheus" + }, + "description": "Prometheus instance", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "Metrics", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "definition": "label_values(megaservice_request_pending,namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(megaservice_request_pending,namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "chatqna", + "value": "chatqna" + }, + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "definition": "label_values(megaservice_request_pending{namespace=\"$namespace\", service!~\".*-uservice\", service!~\".*-usvc\"},service)", + "description": "Helm release name used as prefix for the services", + "hide": 0, + "includeAll": false, + "label": "Helm release", + "multi": false, + "name": "release", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(megaservice_request_pending{namespace=\"$namespace\", service!~\".*-uservice\", service!~\".*-usvc\"},service)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "OPEA ChatQnA scaling", + "uid": "opea-chatqna-dashboard-scaling", + "version": 1, + "weekStart": "" +} diff --git a/kubeai/grafana/README.txt b/kubeai/grafana/README.txt new file mode 100644 index 000000000..082061211 --- /dev/null +++ b/kubeai/grafana/README.txt @@ -0,0 +1,34 @@ +# Files + +## Dashboards + +- `vllm-scaling.*`: Cluster overview of how much and well vLLM is scaling +- `vllm-details.*`: More detailed per-model and/or per-instance vLLM metrics + +## File types + +- `*.yaml`: Grafana dashboard configMaps that Grafana will load automatically +- `*.json`: Grafana Dashboard specs from which configMaps are generated +- `*.png`: Screenshots of those dashboards (updated manually) + +## Other files + +- `convert-dashboard.sh`: convert dashboard `*.json` file to configMap `*.yaml` file +- `README.md`: this file + +## Dashboard formats + +Dashboard JSON files need to be loaded from the Grafana GUI manually, +their changes can be saved and Grafana maintains update history for +them, but those are lost if Grafana is uninstalled. + +Whereas Grafana will automatically load suitably labeled dashboard +configMaps, they persist even if Grafana gets re-installed, and one +can save their updates as JSON files. + +Update process for the dashboards is following: + +- Apply configMap to K8s so it's visible in Grafana dashboards list +- Update dashboard in Grafana +- Save it as JSON +- Convert JSON file to configMap YAML: `./convert-dashboard.sh *.json` diff --git a/kubeai/grafana/convert-dashboard.sh b/kubeai/grafana/convert-dashboard.sh new file mode 100755 index 000000000..691884522 --- /dev/null +++ b/kubeai/grafana/convert-dashboard.sh @@ -0,0 +1,113 @@ +#!/bin/sh +# +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e + +# OPEA CI requires copyright/license for the converted file +COPYRIGHT="Copyright (C) 2025 Intel Corporation" +LICENSE="SPDX-License-Identifier: Apache-2.0" + +# Label needed in configMap to get (Helm installed) Grafana to load it as dashboard +LABEL="grafana_dashboard=1" + +PREFIX="opea-" +NS="" + +error_exit () +{ + name=${0##*/} + cat << EOF + +ERROR: $1! + +Convert given Grafana *.json dashboard specs to Grafana (Helm install) *.yaml configMaps. + +Usage: + $name + +Example: + $name vllm.json + kubectl apply -n monitoring vllm.yaml + +=> Creates 'vllm.yaml' configMap named as '${PREFIX}vllm' for Grafana in 'monitoring' namespace. + +ERROR: $1! +EOF + exit 1 +} + +if [ -z "$(which jq)" ]; then + error_exit "'jq' required for dashboard checks, please install 'jq' first" +fi + +if [ -z "$(which kubectl)" ]; then + error_exit "'kubectl' required for dashboard conversion, please install 'kubernetes-client' first" +fi + +if ! kubectl version; then + error_exit "Broken/missing 'kubectl' cluster config (script does not need it, but kubectl still fails)" +fi + +echo +echo "Got following Grafana dashboards:" +for file in "$@"; do + if [ ! -f "$file" ]; then + error_exit "JSON file '$file' does not exist" + fi + if [ "${file%.json}" = "$file" ]; then + error_exit "JSON file '$file' does not exist" + fi + + # Dashboard 'uid' is optional as Grafana can generate one... + uid=$(jq .uid "$file" | tail -1 | tr -d '"') + if [ -z "$uid" ]; then + error_exit "'$file' dashboard has invalid JSON" + elif [ "$uid" = "null" ]; then + echo "WARNING: no dashboard 'uid', Grafana will assign new one on every load: $file" + elif echo "$uid" | grep -q -v '^[-0-9a-f]*$'; then + echo "DEBUG: dashboard 'uid' not in hex format: '$uid'?" + fi + + # ...but it should have a title. + title=$(jq .title "$file" | tail -1 | tr -d '"') + if [ "$title" = "null" ]; then + error_exit "'$file' dashboard has no 'title' field" + fi + + echo "- file: $file, uid: '$uid', title: '$title'" +done + +echo +echo "Converting:" +for file in "$@"; do + base=${file##*/} + name=${base%.json} + dst="${name}.yaml" + + # if no prefix, add one + if [ "${name#"$PREFIX"}" = "$name" ]; then + name="${PREFIX}${name}" + fi + + # convert to k8s object name ("[a-z0-9][-a-z0-9]*[a-z0-9]"): + # - upper-case -> lowercase, '_' -> '-' + # - drop anything outside [-a-z] + # - drop '-' prefix & suffix and successive '-' chars + k8name=$(echo "$name" | tr A-Z_ a-z- | tr -d -c a-z- | sed -e 's/^-*//' -e 's/-*$//' -e 's/--*/-/g') + + echo "- file: $dst, configMap: $k8name, title: $(jq .title "$file" | tail -1)" + + echo "# $COPYRIGHT" > "$dst" + echo "# $LICENSE" >> "$dst" + echo "#" >> "$dst" + echo "# ${0##*/}: $base -> $dst" >> "$dst" + + kubectl create cm -n "$NS" --from-file "$file" --dry-run=client -o yaml "$k8name" |\ + kubectl label -f- --local --dry-run=client -o yaml "$LABEL" |\ + grep -v -e "^ creationTimestamp:" >> "$dst" +done + +echo +echo "DONE!" diff --git a/kubeai/grafana/vllm-details.json b/kubeai/grafana/vllm-details.json new file mode 100644 index 000000000..819dee8b9 --- /dev/null +++ b/kubeai/grafana/vllm-details.json @@ -0,0 +1,1606 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "vLLM inference engine details", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "End to end request latency measured in seconds.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "P99", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "P95", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "P90", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "P50", + "range": true, + "refId": "D", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(vllm:e2e_request_latency_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])\n/\nrate(vllm:e2e_request_latency_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "Average", + "range": true, + "refId": "E" + } + ], + "title": "E2E Request Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Number of tokens processed per second", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(vllm:prompt_tokens_total{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "Prompt Tokens/Sec", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(vllm:generation_tokens_total{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "Generation Tokens/Sec", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Token Throughput", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Inter token latency in seconds.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "P99", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "P95", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "P90", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "P50", + "range": true, + "refId": "D", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(vllm:time_per_output_token_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])\n/\nrate(vllm:time_per_output_token_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "Mean", + "range": true, + "refId": "E" + } + ], + "title": "Time Per Output Token Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Number of requests in RUNNING, WAITING, and SWAPPED state", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "vllm:num_requests_running{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Running", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "vllm:num_requests_waiting{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Waiting", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "vllm:num_requests_swapped{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}", + "fullMetaSearch": false, + "hide": true, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Swapped", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Scheduler State", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "P50, P90, P95, and P99 TTFT latency in seconds.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "P99", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "P95", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "P90", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "P50", + "range": true, + "refId": "D", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(vllm:time_to_first_token_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])\n/\nrate(vllm:time_to_first_token_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "Average", + "range": true, + "refId": "E" + } + ], + "title": "Time To First Token Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Percentage of used cache blocks by vLLM.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "vllm:gpu_cache_usage_perc{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}", + "instant": false, + "legendFormat": "GPU", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "vllm:cpu_cache_usage_perc{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}", + "hide": false, + "instant": false, + "legendFormat": "CPU", + "range": true, + "refId": "B" + } + ], + "title": "Cache Utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Heatmap of request prompt length", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 12, + "options": { + "calculate": false, + "cellGap": 1, + "cellValues": { + "unit": "none" + }, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "min": 0, + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto", + "value": "Request count" + }, + "tooltip": { + "mode": "single", + "show": true, + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisLabel": "Prompt Length", + "axisPlacement": "left", + "reverse": false, + "unit": "none" + } + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(le) (increase(vllm:request_prompt_tokens_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval]))", + "format": "heatmap", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Request Prompt Length", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Heatmap of request generation length", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 13, + "options": { + "calculate": false, + "cellGap": 1, + "cellValues": { + "unit": "none" + }, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "min": 0, + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto", + "value": "Request count" + }, + "tooltip": { + "mode": "single", + "show": true, + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisLabel": "Generation Length", + "axisPlacement": "left", + "reverse": false, + "unit": "none" + } + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(le) (increase(vllm:request_generation_tokens_bucket{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval]))", + "format": "heatmap", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Request Generation Length", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Number of finished requests by their finish reason: either an EOS token was generated or the max sequence length was reached.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(finished_reason) (increase(vllm:request_success_total{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Finish Reason", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "seconds", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(vllm:request_queue_time_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Time", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Queue Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 15, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(vllm:request_prefill_time_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Prefill", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(vllm:request_decode_time_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "Decode", + "range": true, + "refId": "B" + } + ], + "title": "Requests Prefill and Decode Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(vllm:request_max_num_generation_tokens_sum{namespace=\"$namespace\",model_name=\"$model_name\",instance=~\"$instance\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Tokens", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Max Generation Token in Sequence Group", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Metrics", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "kubeai", + "value": "kubeai" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(vllm:num_requests_running,namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vllm:num_requests_running,namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "llama-3.1-8b-instruct-gaudi", + "value": "llama-3.1-8b-instruct-gaudi" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(vllm:num_requests_running{namespace=\"$namespace\"},model_name)", + "hide": 0, + "includeAll": false, + "label": "Model", + "multi": false, + "name": "model_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vllm:num_requests_running{namespace=\"$namespace\"},model_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(vllm:num_requests_running{namespace=\"$namespace\", model_name=\"$model_name\"},instance)", + "hide": 0, + "includeAll": true, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vllm:num_requests_running{namespace=\"$namespace\", model_name=\"$model_name\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 3, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "vLLM details", + "uid": "a74126aa-112d-506c-4137-969737e7f598", + "version": 1, + "weekStart": "" +} diff --git a/kubeai/grafana/vllm-scaling.json b/kubeai/grafana/vllm-scaling.json new file mode 100644 index 000000000..ca57988e1 --- /dev/null +++ b/kubeai/grafana/vllm-scaling.json @@ -0,0 +1,1010 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "vLLM inference engine scaling", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "count(vllm:num_requests_waiting{namespace=\"$namespace\",model_name=\"$model_name\"})", + "hide": false, + "instant": false, + "legendFormat": "Count", + "range": true, + "refId": "D" + } + ], + "title": "Engine pods", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 20, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(vllm:request_success_total{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(vllm:request_success_total{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "D" + } + ], + "title": "Pod request successes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 18, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_running{namespace=\"$namespace\",model_name=\"$model_name\"})", + "hide": false, + "instant": false, + "legendFormat": "Running", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_waiting{namespace=\"$namespace\",model_name=\"$model_name\"})", + "hide": false, + "instant": false, + "legendFormat": "Waiting", + "range": true, + "refId": "D" + } + ], + "title": "Scheduling totals", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 22, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "max(rate(vllm:e2e_request_latency_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]) / rate(vllm:e2e_request_latency_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Worst", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "min(rate(vllm:e2e_request_latency_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]) / rate(vllm:e2e_request_latency_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Best", + "range": true, + "refId": "D" + } + ], + "title": "Pod request latencies", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "t/r" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 21, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "(sum by (service)(rate(vllm:generation_tokens_total{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))) / (sum by (service)(rate(vllm:request_success_total{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval])))", + "hide": false, + "legendFormat": "Tokens / request", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(vllm:e2e_request_latency_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Requests", + "range": true, + "refId": "B" + } + ], + "title": "Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 23, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "max(rate(vllm:time_to_first_token_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]) / rate(vllm:time_to_first_token_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Worst", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "min(rate(vllm:time_to_first_token_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]) / rate(vllm:time_to_first_token_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Best", + "range": true, + "refId": "D" + } + ], + "title": "Pod first token latencies", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "t/s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 19, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (model_name)(rate(vllm:time_per_output_token_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Rate", + "range": true, + "refId": "B" + } + ], + "title": "Total output tokens", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 24, + "options": { + "legend": { + "calcs": ["min", "max"], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(rate(vllm:time_per_output_token_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]) / rate(vllm:time_per_output_token_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Worst", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(rate(vllm:time_per_output_token_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]) / rate(vllm:time_per_output_token_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Best", + "range": true, + "refId": "C" + } + ], + "title": "Pod per-token latencies", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Metrics", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "kubeai", + "value": "kubeai" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(vllm:num_requests_running,namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vllm:num_requests_running,namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "meta-llama/Meta-Llama-3-8B-Instruct", + "value": "meta-llama/Meta-Llama-3-8B-Instruct" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(vllm:num_requests_running{namespace=\"$namespace\"},model_name)", + "hide": 0, + "includeAll": false, + "label": "Model", + "multi": false, + "name": "model_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vllm:num_requests_running{namespace=\"$namespace\"},model_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "vLLM scaling", + "uid": "a7882a6a-121d-760c-8387-69973e77f002", + "version": 1, + "weekStart": "" +}