ai-dynamo · ilana-n · Oct 30, 2025 · Oct 17, 2025 · Oct 20, 2025 · Oct 20, 2025
diff --git a/docs/tutorials/gpu-telemetry.md b/docs/tutorials/gpu-telemetry.md
@@ -28,15 +28,25 @@ AIPerf provides GPU telemetry collection with the `--gpu-telemetry` flag. Here's
 
 ### How the `--gpu-telemetry` Flag Works
 
-| Usage | Command | What Gets Collected (If Available) | Console Display | CSV/JSON Export |
-|-------|---------|---------------------|-----------------|-----------------|
-| **No flag** | `aiperf profile --model MODEL ...` | `http://localhost:9400/metrics` + `http://localhost:9401/metrics` | ❌ No | ✅ Yes |
-| **Flag only** | `aiperf profile --model MODEL ... --gpu-telemetry` | `http://localhost:9400/metrics` + `http://localhost:9401/metrics` | ✅ Yes | ✅ Yes |
-| **Custom URLs** | `aiperf profile --model MODEL ... --gpu-telemetry http://node1:9400/metrics http://node2:9400/metrics` | `http://localhost:9400/metrics` + `http://localhost:9401/metrics` + custom URLs | ✅ Yes | ✅ Yes |
+| Usage | Command | What Gets Collected (If Available) | Console Display | Dashboard View | CSV/JSON Export |
+|-------|---------|---------------------|-----------------|----------------|-----------------|
+| **No flag** | `aiperf profile --model MODEL ...` | `http://localhost:9400/metrics` + `http://localhost:9401/metrics` | ❌ No | ❌ No | ✅ Yes |
+| **Flag only** | `aiperf profile --model MODEL ... --gpu-telemetry` | `http://localhost:9400/metrics` + `http://localhost:9401/metrics` | ✅ Yes | ❌ No | ✅ Yes |
+| **Dashboard mode** | `aiperf profile --model MODEL ... --gpu-telemetry dashboard` | `http://localhost:9400/metrics` + `http://localhost:9401/metrics` | ✅ Yes | ✅ Yes | ✅ Yes |
+| **Custom URLs** | `aiperf profile --model MODEL ... --gpu-telemetry http://node1:9400/metrics http://node2:9400/metrics` | `http://localhost:9400/metrics` + `http://localhost:9401/metrics` + custom URLs | ✅ Yes | ❌ No | ✅ Yes |
+| **Dashboard + URLs** | `aiperf profile --model MODEL ... --gpu-telemetry dashboard http://node1:9400/metrics` | `http://localhost:9400/metrics` + `http://localhost:9401/metrics` + custom URLs | ✅ Yes | ✅ Yes | ✅ Yes |
 
 > [!IMPORTANT]
 > The default endpoints `http://localhost:9400/metrics` and `http://localhost:9401/metrics` are ALWAYS attempted for telemetry collection, regardless of whether the `--gpu-telemetry` flag is used. The flag primarily controls whether metrics are displayed on the console and allows you to specify additional custom DCGM exporter endpoints.
 
+### Real-Time Dashboard View
+
+Adding `dashboard` to the `--gpu-telemetry` flag enables a live terminal UI (TUI) that displays GPU metrics in real-time during your benchmark runs:
+
+```bash
+aiperf profile --model MODEL ... --gpu-telemetry dashboard
+```
+
 ---
 
 # 1: Using Dynamo
@@ -48,7 +58,7 @@ Dynamo includes DCGM out of the box on port 9401 - no extra setup needed!
 ```bash
 # Set environment variables
 export AIPERF_REPO_TAG="main"
-export DYNAMO_PREBUILT_IMAGE_TAG="nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0"
+export DYNAMO_PREBUILT_IMAGE_TAG="nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.1"
 export MODEL="Qwen/Qwen3-0.6B"
 
 # Download the Dynamo container
@@ -99,7 +109,7 @@ uv pip install ./aiperf
 
 ```bash
 # Wait for Dynamo API to be ready (up to 15 minutes)
-timeout 900 bash -c 'while [ "$(curl -s -o /dev/null -w "%{http_code}" localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\":\"Qwen/Qwen3-0.6B\",\"messages\":[{\"role\":\"user\",\"content\":\"a\"}],\"max_completion_tokens\":1}")" != "200" ]; do sleep 2; done' || { echo "Dynamo not ready after 15min"; exit 1; }
+timeout 900 bash -c 'while [ "$(curl -s -o /dev/null -w "%{http_code}" localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\":\"Qwen/Qwen3-0.6B\",\"messages\":[{\"role\":\"user\",\"content\":\"a\"}],\"max_completion_tokens\":1}")" != "200" ]; do sleep 2; done' || { echo "Dynamo not ready after 15min"; exit 1; }
 ```
 ```bash
 # Wait for DCGM Exporter to be ready (up to 2 minutes after Dynamo is ready)
@@ -116,7 +126,7 @@ aiperf profile \
     --endpoint-type chat \
     --endpoint /v1/chat/completions \
     --streaming \
-    --url localhost:8080 \
+    --url localhost:8000 \
     --synthetic-input-tokens-mean 100 \
     --synthetic-input-tokens-stddev 0 \
     --output-tokens-mean 200 \
@@ -131,6 +141,9 @@ aiperf profile \
     --gpu-telemetry
 ```
 
+> [!TIP]
+> The `dashboard` keyword enables a live terminal UI for real-time GPU telemetry visualization. Press `5` to maximize the GPU Telemetry panel during the benchmark run.
+
 ---
 
 # 2: Using Other Inference Server
@@ -279,6 +292,12 @@ aiperf profile \
     --gpu-telemetry
 ```
 
+> [!TIP]
+> The `dashboard` keyword enables a live terminal UI for real-time GPU telemetry visualization. Press `5` to maximize the GPU Telemetry panel during the benchmark run.
+
+> [!TIP]
+> The `dashboard` keyword enables a live terminal UI for real-time GPU telemetry visualization. Press `5` to maximize the GPU Telemetry panel during the benchmark run.
+
 ## Multi-Node GPU Telemetry Example
 
 For distributed setups with multiple nodes, you can collect GPU telemetry from all nodes simultaneously:
@@ -292,7 +311,7 @@ aiperf profile \
     --endpoint-type chat \
     --endpoint /v1/chat/completions \
     --streaming \
-    --url localhost:8080 \
+    --url localhost:8000 \
     --synthetic-input-tokens-mean 100 \
     --synthetic-input-tokens-stddev 0 \
     --output-tokens-mean 200 \

diff --git a/src/aiperf/common/config/user_config.py b/src/aiperf/common/config/user_config.py
@@ -19,7 +19,7 @@
 from aiperf.common.config.loadgen_config import LoadGeneratorConfig
 from aiperf.common.config.output_config import OutputConfig
 from aiperf.common.config.tokenizer_config import TokenizerConfig
-from aiperf.common.enums import CustomDatasetType
+from aiperf.common.enums import CustomDatasetType, GPUTelemetryMode
 from aiperf.common.enums.timing_enums import RequestRateMode, TimingMode
 from aiperf.common.utils import load_json_str
 
@@ -224,6 +224,43 @@ def _count_dataset_entries(self) -> int:
         ),
     ]
 
+    _gpu_telemetry_mode: GPUTelemetryMode = GPUTelemetryMode.SUMMARY
+    _gpu_telemetry_urls: list[str] = []
+
+    @model_validator(mode="after")
+    def _parse_gpu_telemetry_config(self) -> Self:
+        """Parse gpu_telemetry list into mode and URLs."""
+        if not self.gpu_telemetry:
+            return self
+
+        mode = GPUTelemetryMode.SUMMARY
+        urls = []
+
+        for item in self.gpu_telemetry:
+            if item in ["dashboard"]:
+                mode = GPUTelemetryMode.REALTIME_DASHBOARD
+            elif item.startswith("http"):
+                urls.append(item)
+
+        self._gpu_telemetry_mode = mode
+        self._gpu_telemetry_urls = urls
+        return self
+
+    @property
+    def gpu_telemetry_mode(self) -> GPUTelemetryMode:
+        """Get the GPU telemetry display mode (parsed from gpu_telemetry list)."""
+        return self._gpu_telemetry_mode
+
+    @gpu_telemetry_mode.setter
+    def gpu_telemetry_mode(self, value: GPUTelemetryMode) -> None:
+        """Set the GPU telemetry display mode."""
+        self._gpu_telemetry_mode = value
+
+    @property
+    def gpu_telemetry_urls(self) -> list[str]:
+        """Get the parsed GPU telemetry DCGM endpoint URLs."""
+        return self._gpu_telemetry_urls
+
     @model_validator(mode="after")
     def _compute_config(self) -> Self:
         """Compute additional configuration.

diff --git a/src/aiperf/common/enums/__init__.py b/src/aiperf/common/enums/__init__.py
@@ -96,6 +96,9 @@
 from aiperf.common.enums.system_enums import (
     SystemState,
 )
+from aiperf.common.enums.telemetry_enums import (
+    GPUTelemetryMode,
+)
 from aiperf.common.enums.timing_enums import (
     CreditPhase,
     RequestRateMode,
@@ -131,6 +134,7 @@
     "ExportLevel",
     "FrequencyMetricUnit",
     "FrequencyMetricUnitInfo",
+    "GPUTelemetryMode",
     "GenericMetricUnit",
     "ImageFormat",
     "LifecycleState",

diff --git a/src/aiperf/common/enums/command_enums.py b/src/aiperf/common/enums/command_enums.py
@@ -14,6 +14,7 @@ class CommandType(CaseInsensitiveStrEnum):
     SHUTDOWN = "shutdown"
     SHUTDOWN_WORKERS = "shutdown_workers"
     SPAWN_WORKERS = "spawn_workers"
+    START_REALTIME_TELEMETRY = "start_realtime_telemetry"
 
 
 class CommandResponseStatus(CaseInsensitiveStrEnum):

diff --git a/src/aiperf/common/enums/message_enums.py b/src/aiperf/common/enums/message_enums.py
@@ -41,6 +41,7 @@ class MessageType(CaseInsensitiveStrEnum):
     PROFILE_PROGRESS = "profile_progress"
     PROFILE_RESULTS = "profile_results"
     REALTIME_METRICS = "realtime_metrics"
+    REALTIME_TELEMETRY_METRICS = "realtime_telemetry_metrics"
     REGISTRATION = "registration"
     SERVICE_ERROR = "service_error"
     STATUS = "status"

diff --git a/src/aiperf/common/enums/telemetry_enums.py b/src/aiperf/common/enums/telemetry_enums.py
@@ -0,0 +1,11 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from aiperf.common.enums.base_enums import CaseInsensitiveStrEnum
+
+
+class GPUTelemetryMode(CaseInsensitiveStrEnum):
+    """GPU telemetry display mode."""
+
+    SUMMARY = "summary"
+    REALTIME_DASHBOARD = "realtime_dashboard"
diff --git a/src/aiperf/common/hooks.py b/src/aiperf/common/hooks.py
@@ -44,6 +44,7 @@ class AIPerfHook(CaseInsensitiveStrEnum):
     ON_INIT = "@on_init"
     ON_MESSAGE = "@on_message"
     ON_REALTIME_METRICS = "@on_realtime_metrics"
+    ON_REALTIME_TELEMETRY_METRICS = "@on_realtime_telemetry_metrics"
     ON_PROFILING_PROGRESS = "@on_profiling_progress"
     ON_PULL_MESSAGE = "@on_pull_message"
     ON_RECORDS_PROGRESS = "@on_records_progress"
@@ -348,6 +349,21 @@ def _on_realtime_metrics(self, metrics: list[MetricResult]) -> None:
     return _hook_decorator(AIPerfHook.ON_REALTIME_METRICS, func)
 
 
+def on_realtime_telemetry_metrics(func: Callable) -> Callable:
+    """Decorator to specify that the function is a hook that should be called when real-time GPU telemetry metrics are received.
+    See :func:`aiperf.common.hooks._hook_decorator`.
+
+    Example:
+    ```python
+    class MyPlugin(RealtimeMetricsMixin):
+        @on_realtime_telemetry_metrics
+        def _on_realtime_telemetry_metrics(self, metrics: list[MetricResult]) -> None:
+            pass
+    ```
+    """
+    return _hook_decorator(AIPerfHook.ON_REALTIME_TELEMETRY_METRICS, func)
+
+
 def on_pull_message(
     *message_types: MessageTypeT | Callable[[SelfT], Iterable[MessageTypeT]],
 ) -> Callable:

diff --git a/src/aiperf/common/messages/__init__.py b/src/aiperf/common/messages/__init__.py
@@ -31,6 +31,7 @@
     ShutdownCommand,
     ShutdownWorkersCommand,
     SpawnWorkersCommand,
+    StartRealtimeTelemetryCommand,
     TargetedServiceMessage,
 )
 from aiperf.common.messages.credit_messages import (
@@ -75,6 +76,7 @@
 )
 from aiperf.common.messages.telemetry_messages import (
     ProcessTelemetryResultMessage,
+    RealtimeTelemetryMetricsMessage,
     TelemetryRecordsMessage,
     TelemetryStatusMessage,
 )
@@ -127,13 +129,15 @@
     "ProfileStartCommand",
     "RealtimeMetricsCommand",
     "RealtimeMetricsMessage",
+    "RealtimeTelemetryMetricsMessage",
     "RecordsProcessingStatsMessage",
     "RegisterServiceCommand",
     "RegistrationMessage",
     "RequiresRequestNSMixin",
     "ShutdownCommand",
     "ShutdownWorkersCommand",
     "SpawnWorkersCommand",
+    "StartRealtimeTelemetryCommand",
     "StatusMessage",
     "TargetedServiceMessage",
     "TelemetryRecordsMessage",

diff --git a/src/aiperf/common/messages/command_messages.py b/src/aiperf/common/messages/command_messages.py
@@ -242,6 +242,17 @@ class RealtimeMetricsCommand(CommandMessage):
     command: CommandTypeT = CommandType.REALTIME_METRICS
 
 
+class StartRealtimeTelemetryCommand(CommandMessage):
+    """Command to start the realtime telemetry background task in RecordsManager.
+
+    This command is sent when the user dynamically enables the telemetry dashboard
+    by pressing the telemetry option in the UI. This always sets the GPU telemetry
+    mode to REALTIME_DASHBOARD.
+    """
+
+    command: CommandTypeT = CommandType.START_REALTIME_TELEMETRY
+
+
 class SpawnWorkersCommand(CommandMessage):
     command: CommandTypeT = CommandType.SPAWN_WORKERS
 

diff --git a/src/aiperf/common/messages/telemetry_messages.py b/src/aiperf/common/messages/telemetry_messages.py
@@ -5,7 +5,12 @@
 
 from aiperf.common.enums import MessageType
 from aiperf.common.messages.service_messages import BaseServiceMessage
-from aiperf.common.models import ErrorDetails, ProcessTelemetryResult, TelemetryRecord
+from aiperf.common.models import (
+    ErrorDetails,
+    MetricResult,
+    ProcessTelemetryResult,
+    TelemetryRecord,
+)
 from aiperf.common.types import MessageTypeT
 
 
@@ -19,6 +24,10 @@ class TelemetryRecordsMessage(BaseServiceMessage):
         ...,
         description="The ID of the telemetry data collector that collected the records.",
     )
+    dcgm_url: str = Field(
+        ...,
+        description="The DCGM endpoint URL that was contacted (e.g., 'http://localhost:9400/metrics')",
+    )
     records: list[TelemetryRecord] = Field(
         ..., description="The telemetry records collected from GPU monitoring"
     )
@@ -62,3 +71,13 @@ class TelemetryStatusMessage(BaseServiceMessage):
         default_factory=list,
         description="List of DCGM endpoint URLs that were reachable and will provide data",
     )
+
+
+class RealtimeTelemetryMetricsMessage(BaseServiceMessage):
+    """Message from the records manager to show real-time GPU telemetry metrics."""
+
+    message_type: MessageTypeT = MessageType.REALTIME_TELEMETRY_METRICS
+
+    metrics: list[MetricResult] = Field(
+        ..., description="The current real-time GPU telemetry metrics."
+    )
diff --git a/src/aiperf/common/mixins/__init__.py b/src/aiperf/common/mixins/__init__.py
@@ -44,6 +44,9 @@
 from aiperf.common.mixins.realtime_metrics_mixin import (
     RealtimeMetricsMixin,
 )
+from aiperf.common.mixins.realtime_telemetry_metrics_mixin import (
+    RealtimeTelemetryMetricsMixin,
+)
 from aiperf.common.mixins.reply_client_mixin import (
     ReplyClientMixin,
 )
@@ -67,6 +70,7 @@
     "ProgressTrackerMixin",
     "PullClientMixin",
     "RealtimeMetricsMixin",
+    "RealtimeTelemetryMetricsMixin",
     "ReplyClientMixin",
     "TaskManagerMixin",
     "WorkerTrackerMixin",

diff --git a/src/aiperf/common/mixins/realtime_telemetry_metrics_mixin.py b/src/aiperf/common/mixins/realtime_telemetry_metrics_mixin.py
@@ -0,0 +1,40 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+import asyncio
+
+from aiperf.common.config import ServiceConfig
+from aiperf.common.enums import MessageType
+from aiperf.common.hooks import AIPerfHook, on_message, provides_hooks
+from aiperf.common.messages import RealtimeTelemetryMetricsMessage
+from aiperf.common.mixins.message_bus_mixin import MessageBusClientMixin
+from aiperf.common.models import MetricResult
+from aiperf.controller.system_controller import SystemController
+
+
+@provides_hooks(AIPerfHook.ON_REALTIME_TELEMETRY_METRICS)
+class RealtimeTelemetryMetricsMixin(MessageBusClientMixin):
+    """A mixin that provides a hook for real-time GPU telemetry metrics."""
+
+    def __init__(
+        self, service_config: ServiceConfig, controller: SystemController, **kwargs
+    ):
+        super().__init__(service_config=service_config, controller=controller, **kwargs)
+        self._controller = controller
+        self._telemetry_metrics: list[MetricResult] = []
+        self._telemetry_metrics_lock = asyncio.Lock()
+
+    @on_message(MessageType.REALTIME_TELEMETRY_METRICS)
+    async def _on_realtime_telemetry_metrics(
+        self, message: RealtimeTelemetryMetricsMessage
+    ):
+        """Update the telemetry metrics from a real-time telemetry metrics message."""
+        self.debug(
+            f"Mixin received telemetry message with {len(message.metrics)} metrics, triggering hook"
+        )
+
+        async with self._telemetry_metrics_lock:
+            self._telemetry_metrics = message.metrics
+        await self.run_hooks(
+            AIPerfHook.ON_REALTIME_TELEMETRY_METRICS,
+            metrics=message.metrics,
+        )
diff --git a/src/aiperf/common/models/record_models.py b/src/aiperf/common/models/record_models.py
@@ -43,6 +43,10 @@ class MetricResult(JsonMetricResult):
         default=None,
         description="The total number of records used to calculate the metric",
     )
+    current: float | None = Field(
+        default=None,
+        description="The most recent value of the metric (used for realtime dashboard display only)",
+    )
 
     def to_display_unit(self) -> "MetricResult":
         """Convert the metric result to its display unit."""

diff --git a/src/aiperf/common/models/telemetry_models.py b/src/aiperf/common/models/telemetry_models.py
@@ -212,6 +212,7 @@ def to_metric_result(
             avg=float(np.mean(values)),
             std=float(np.std(values)),
             count=len(values),
+            current=float(data_points[-1][0]),
             p1=p1,
             p5=p5,
             p10=p10,

diff --git a/src/aiperf/exporters/csv_exporter.py b/src/aiperf/exporters/csv_exporter.py
@@ -38,7 +38,6 @@ class CsvExporter(AIPerfLoggerMixin):
 
     def __init__(self, exporter_config: ExporterConfig, **kwargs) -> None:
         super().__init__(**kwargs)
-        self.debug(lambda: f"Initializing CsvExporter with config: {exporter_config}")
         self._results = exporter_config.results
         self._telemetry_results = exporter_config.telemetry_results
         self._output_directory = exporter_config.user_config.output.artifact_directory