From 9a88ffd08c64a6e83a4a2852f7efe5d23f26494f Mon Sep 17 00:00:00 2001 From: Eero Tamminen Date: Thu, 3 Jul 2025 18:59:57 +0300 Subject: [PATCH] Automated Grafana dashboard + Prometheus support for KubeAI - Install vLLM dashboards if Grafana pod is found - Install vLLM podMonitor if deployed Prometheus Helm release is found - More descriptive KubeAI Helm release name: "opea" -> "opea-kubeai" Signed-off-by: Eero Tamminen --- kubeai/README.md | 16 +++++++++------- kubeai/install.sh | 44 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/kubeai/README.md b/kubeai/README.md index 49a7a7b22..3be889f09 100644 --- a/kubeai/README.md +++ b/kubeai/README.md @@ -300,26 +300,28 @@ on the configuration options. # Observability -With [Prometheus](../helm-charts/monitoring.md) running, install script can enable monitoring of the vLLM inference engine instances. +With [kube-prometheus-stack](../helm-charts/monitoring.md) Helm chart already deployed, install script will automatically enable monitoring for the vLLM inference engine pods. -Script requires Prometheus Helm chart release name for that, e.g. +If script did not detect it, one can specify Prometheus Helm chart release manually: ``` release=prometheus-stack ./install.sh $release ``` -Port-forward Grafana. +If script finds also a (running) Grafana instance, it will install "vLLM scaling" and "vLLM details" dashboards for it. + +But they can be installed also manually afterwards: ``` -kubectl port-forward -n $ns svc/$release-grafana 3000:80 +ns=monitoring # Grafana namespace +kubectl apply -n $ns -f grafana/vllm-scaling.yaml -f grafana/vllm-details.yaml ``` -Install "vLLM scaling" and "vLLM details" dashboards, to the same namespace as Grafana. +Then port-forward Grafana. ``` -ns=monitoring -kubectl apply -n $ns -f grafana/vllm-scaling.yaml -f grafana/vllm-details.yaml +kubectl port-forward -n $ns svc/$release-grafana 3000:80 ``` Open web-browser to `http://localhost:3000` with `admin` / `prom-operator` given as the username / password for login, to view the dashboards. diff --git a/kubeai/install.sh b/kubeai/install.sh index 8bfde03ef..55804c37f 100755 --- a/kubeai/install.sh +++ b/kubeai/install.sh @@ -8,30 +8,32 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" error_exit () { name=${0##*/} cat <] $name [HF token file] [Prometheus release name] HuggingFace token for accessing models can be given either as an environment variable, or as a name of a file containing the token value. -If Prometheus Helm release name is given, Prometheus monitoring is -enabled for the inference engine instances, and vLLM dashboard -(configMap) is installed for Grafana. +If script finds deployed "kube-prometheus-stack" Helm chart release, +it enables Prometheus monitoring for the vLLM inference engine pods. +vLLM dashboards (configMaps) are installed if also a running Grafana +instance is detected. + +Prometheus release name can also be given as an argument, ERROR: $1! EOF exit 1 } -metrics="" +release="" for arg in "$@"; do if [ -f "$arg" ]; then echo "Using HF token from '$arg' file." HF_TOKEN=$(cat "$arg") else - echo "Enabling vLLM inference pod monitoring for '$arg' Prometheus Helm install." - metrics="--set metrics.prometheusOperator.vLLMPodMonitor.labels.release=$arg" - metrics="$metrics --set metrics.prometheusOperator.vLLMPodMonitor.enabled=true" + release="$arg" fi done @@ -39,7 +41,33 @@ if [ -z "$HF_TOKEN" ]; then error_exit "HF token missing" fi -helm upgrade --install opea -n kubeai kubeai/kubeai \ +if [ -z "$release" ]; then + if [ -z "$(which jq)" ]; then + error_exit "please install 'jq' to parse Helm releases info" + fi + # check whether cluster has deployed Prometheus Helm chart release, if none specified + release=$(helm list -A -o json | jq '.[] | select(.chart|match("^kube-prometheus-stack")) | select(.status=="deployed") | .name' | tr -d '"') +fi + +metrics="" +if [ -n "$release" ]; then + running="status.phase=Running" + grafana="app.kubernetes.io/name=grafana" + jsonpath="{.items[0].metadata.namespace}" + + # check for Grafana namespace + ns=$(kubectl get -A pod --field-selector="$running" --selector="$grafana" -o jsonpath="$jsonpath") + if [ -n "$ns" ]; then + echo "Grafana available, installing vLLM dashboards to '$ns' namespace." + kubectl apply -n $ns -f $DIR/grafana/vllm-scaling.yaml -f $DIR/grafana/vllm-details.yaml + fi + + echo "Enabling vLLM pod monitoring for '$release' Prometheus Helm install." + metrics="--set metrics.prometheusOperator.vLLMPodMonitor.labels.release=$release" + metrics="$metrics --set metrics.prometheusOperator.vLLMPodMonitor.enabled=true" +fi + +helm upgrade --install opea-kubeai -n kubeai kubeai/kubeai \ --create-namespace \ --set secrets.huggingface.token="$HF_TOKEN" \ -f $DIR/opea-values.yaml $metrics