diff --git a/kubeai/README.md b/kubeai/README.md index 73e604e20..13ead7d69 100644 --- a/kubeai/README.md +++ b/kubeai/README.md @@ -4,6 +4,18 @@ For now, OPEA enables a subset of the KubeAI features. In the future more KubeAI service will be added. +- [KubeAI for OPEA](#kubeai-for-opea) + - [Features](#features) +- [Installation](#installation) + - [Prerequisites](#prerequisites) + - [Install KubeAI](#install-kubeai) +- [Deploying the Models](#deploying-the-models) + - [Text Generation with Llama-3 on CPU](#text-generation-with-llama-3-on-cpu) + - [Text Generation with Llama-3 on Gaudi](#text-generation-with-llama-3-on-gaudi) + - [Text Embeddings with BGE on CPU](#text-embeddings-with-bge-on-cpu) +- [Using the Models](#using-the-models) +- [Observability](#observability) + ## Features The following features are available at the moment. @@ -173,26 +185,37 @@ Enjoy the answer! With [Prometheus](../helm-charts/monitoring.md) running, install script can enable monitoring of the vLLM inference engine instances. -Script requires Prometheus Helm chart release name for that, e.g.: +Script requires Prometheus Helm chart release name for that, e.g. ``` release=prometheus-stack ./install.sh $release ``` -Install dashboard for vLLM metrics to same namespace as Grafana. +Port-forward Grafana. ``` -ns=monitoring -kubectl apply -n $ns -f grafana/vllm-metrics.yaml +kubectl port-forward -n $ns svc/$release-grafana 3000:80 ``` -Port-forward Grafana +Install "vLLM scaling" and "vLLM details" dashboards, to the same namespace as Grafana. ``` -kubectl port-forward -n $ns svc/$release-grafana 3000:80 +ns=monitoring +kubectl apply -n $ns -f grafana/vllm-scaling.yaml -f grafana/vllm-details.yaml ``` -And open web-browser to `http://localhost:3000` with `admin` / `prom-operator` given as the username / password for login. +Open web-browser to `http://localhost:3000` with `admin` / `prom-operator` given as the username / password for login, to view the dashboards. + +Both dashboards filter the viewed vLLM instances by the selected namespace (e.g. `kubeai`) and the model they use. + +The scaling dashboard shows trends both for sum of metrics across all these instances, as well as the best and worst per-instance metric values at a given moment. +![Scaling dashboard](grafana/vllm-scaling.png) + +Whereas details dashboard shows more detailed engine metrics for the selected vLLM instance (or all of them). +![Details dashboard](grafana/vllm-details.png) + +Note: -Note: metrics will be available only after first request has been processed. +- Dashboards should be visible in Grafana within a minute of them being applied, but +- vLLM metrics will be available only after the first inference request has been processed diff --git a/kubeai/grafana/vllm-details.png b/kubeai/grafana/vllm-details.png new file mode 100644 index 000000000..bc2a6b62d Binary files /dev/null and b/kubeai/grafana/vllm-details.png differ diff --git a/kubeai/grafana/vllm-metrics.yaml b/kubeai/grafana/vllm-details.yaml similarity index 99% rename from kubeai/grafana/vllm-metrics.yaml rename to kubeai/grafana/vllm-details.yaml index ba7683901..36c0df227 100644 --- a/kubeai/grafana/vllm-metrics.yaml +++ b/kubeai/grafana/vllm-details.yaml @@ -6,9 +6,9 @@ kind: ConfigMap metadata: labels: grafana_dashboard: "1" - name: opea-kubeai-vllm-metrics + name: opea-vllm-details data: - opea-kubeai-vllm-metrics.json: | + opea-vllm-details.json: | { "annotations": { "list": [ @@ -32,7 +32,7 @@ data: } ] }, - "description": "vLLM inference engine", + "description": "vLLM inference engine details", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, @@ -1612,7 +1612,7 @@ data: }, "timepicker": {}, "timezone": "", - "title": "KubeAI-vLLM", + "title": "vLLM details", "uid": "a74126aa-112d-506c-4137-969737e7f598", "version": 1, "weekStart": "" diff --git a/kubeai/grafana/vllm-scaling.png b/kubeai/grafana/vllm-scaling.png new file mode 100644 index 000000000..5c93e3244 Binary files /dev/null and b/kubeai/grafana/vllm-scaling.png differ diff --git a/kubeai/grafana/vllm-scaling.yaml b/kubeai/grafana/vllm-scaling.yaml new file mode 100644 index 000000000..f530cd52e --- /dev/null +++ b/kubeai/grafana/vllm-scaling.yaml @@ -0,0 +1,1042 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + grafana_dashboard: "1" + name: opea-vllm-scaling +data: + opea-vllm-scaling.json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "vLLM inference engine scaling", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "count(vllm:num_requests_waiting{namespace=\"$namespace\",model_name=\"$model_name\"})", + "hide": false, + "instant": false, + "legendFormat": "Count", + "range": true, + "refId": "D" + } + ], + "title": "Engine pods", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(vllm:request_success_total{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(vllm:request_success_total{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "D" + } + ], + "title": "Pod request successes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_running{namespace=\"$namespace\",model_name=\"$model_name\"})", + "hide": false, + "instant": false, + "legendFormat": "Running", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_waiting{namespace=\"$namespace\",model_name=\"$model_name\"})", + "hide": false, + "instant": false, + "legendFormat": "Waiting", + "range": true, + "refId": "D" + } + ], + "title": "Scheduling totals", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 22, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "max(rate(vllm:e2e_request_latency_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]) / rate(vllm:e2e_request_latency_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Worst", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "min(rate(vllm:e2e_request_latency_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]) / rate(vllm:e2e_request_latency_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Best", + "range": true, + "refId": "D" + } + ], + "title": "Pod request latencies", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "t/r" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "(sum by (service)(rate(vllm:generation_tokens_total{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))) / (sum by (service)(rate(vllm:request_success_total{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval])))", + "hide": false, + "legendFormat": "Tokens / request", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(vllm:e2e_request_latency_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Requests", + "range": true, + "refId": "B" + } + ], + "title": "Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 23, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "max(rate(vllm:time_to_first_token_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]) / rate(vllm:time_to_first_token_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Worst", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "min(rate(vllm:time_to_first_token_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]) / rate(vllm:time_to_first_token_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Best", + "range": true, + "refId": "D" + } + ], + "title": "Pod first token latencies", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "t/s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (model_name)(rate(vllm:time_per_output_token_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Rate", + "range": true, + "refId": "B" + } + ], + "title": "Total output tokens", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(rate(vllm:time_per_output_token_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]) / rate(vllm:time_per_output_token_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Worst", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(rate(vllm:time_per_output_token_seconds_sum{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]) / rate(vllm:time_per_output_token_seconds_count{namespace=\"$namespace\",model_name=\"$model_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Best", + "range": true, + "refId": "C" + } + ], + "title": "Pod per-token latencies", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Metrics", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "kubeai", + "value": "kubeai" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(vllm:num_requests_running,namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vllm:num_requests_running,namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "meta-llama/Meta-Llama-3-8B-Instruct", + "value": "meta-llama/Meta-Llama-3-8B-Instruct" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(vllm:num_requests_running{namespace=\"$namespace\"},model_name)", + "hide": 0, + "includeAll": false, + "label": "Model", + "multi": false, + "name": "model_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(vllm:num_requests_running{namespace=\"$namespace\"},model_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "vLLM scaling", + "uid": "a7882a6a-121d-760c-8387-69973e77f002", + "version": 1, + "weekStart": "" + }