diff --git a/helm-charts/agentqna/values.yaml b/helm-charts/agentqna/values.yaml index 7c7c26fc5..17cb2a7db 100644 --- a/helm-charts/agentqna/values.yaml +++ b/helm-charts/agentqna/values.yaml @@ -44,11 +44,6 @@ tolerations: [] affinity: {} -# This is just to avoid Helm errors when HPA is NOT used -# (use hpa-values.yaml files to actually enable HPA). -horizontalPodAutoscaler: - enabled: false - docretriever: image: repository: opea/doc-index-retriever diff --git a/helm-charts/chatqna/hpa-values.yaml b/helm-charts/chatqna/hpa-values.yaml index ccf17454b..6afd16d41 100644 --- a/helm-charts/chatqna/hpa-values.yaml +++ b/helm-charts/chatqna/hpa-values.yaml @@ -14,6 +14,15 @@ dashboard: autoscaling: enabled: true + minReplicas: 1 + maxReplicas: 4 + # ChatQnA becomes scaling bottleneck when gets close to 100% CPU usage + targetCPUUtilizationPercentage: 80 +resources: + # CPU side HPA won't work without resource requests + requests: + # ChatQnA does not thread currently + cpu: 1 global: # K8s custom metrics (used for scaling thresholds) are based on metrics from service monitoring diff --git a/helm-charts/chatqna/templates/horizontal-pod-autoscaler.yaml b/helm-charts/chatqna/templates/horizontal-pod-autoscaler.yaml new file mode 100644 index 000000000..67198aeb0 --- /dev/null +++ b/helm-charts/chatqna/templates/horizontal-pod-autoscaler.yaml @@ -0,0 +1,25 @@ +{{- if .Values.autoscaling.enabled }} +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "chatqna.fullname" . }} + labels: + {{- include "chatqna.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "chatqna.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} +{{- end }} diff --git a/helm-charts/common/dashboard/templates/configmap-metrics.yaml b/helm-charts/common/dashboard/templates/configmap-metrics.yaml index 081da18bd..fa8e5e012 100644 --- a/helm-charts/common/dashboard/templates/configmap-metrics.yaml +++ b/helm-charts/common/dashboard/templates/configmap-metrics.yaml @@ -1690,6 +1690,30 @@ data: "legendFormat": "vLLM: used", "range": true, "refId": "H" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "MegaService: instances", + "range": true, + "refId": "I" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(megaservice_first_token_latency_count{service=\"$release\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "MegaService: used", + "range": true, + "refId": "J" } ], "title": "Replicas",