ai-dynamo
diff --git a/‎.coderabbit.yaml‎
Lines changed: 31 additions & 0 deletions b/‎.coderabbit.yaml‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 0 deletions b/‎README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎aiperf/__main__.py‎
Lines changed: 4 additions & 3 deletions b/‎aiperf/__main__.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎aiperf/clients/http/aiohttp_client.py‎
Lines changed: 5 additions & 4 deletions b/‎aiperf/clients/http/aiohttp_client.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎aiperf/clients/model_endpoint_info.py‎
Lines changed: 10 additions & 4 deletions b/‎aiperf/clients/model_endpoint_info.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎aiperf/common/config/input_config.py‎
Lines changed: 7 additions & 1 deletion b/‎aiperf/common/config/input_config.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎aiperf/common/config/user_config.py‎
Lines changed: 1 addition & 1 deletion b/‎aiperf/common/config/user_config.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aiperf/common/messages/telemetry_messages.py‎
Lines changed: 2 additions & 2 deletions b/‎aiperf/common/messages/telemetry_messages.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎aiperf/common/models/export_models.py‎
Lines changed: 1 addition & 1 deletion b/‎aiperf/common/models/export_models.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aiperf/common/models/telemetry_models.py‎
Lines changed: 2 additions & 2 deletions b/‎aiperf/common/models/telemetry_models.py‎
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,31 @@
+
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
+# Docs: https://docs.coderabbit.ai/getting-started/configure-coderabbit/
+language: "en-US"
+early_access: false
+reviews:
+  profile: "chill"
+  request_changes_workflow: false
+  high_level_summary: true
+  poem: true
+  review_status: false
+  collapse_walkthrough: false
+  auto_review:
+    enabled: true
+    drafts: false
+    auto_incremental_review: false
+  suggested_labels: false
+  suggested_reviewers: false
+  sequence_diagrams: false
+  related_issues: false
+  related_prs: false
+  finishing_touches:
+    docstrings:
+      enabled: false
+    unit_tests:
+      enabled: false
+chat:
+  auto_reply: true
@@ -27,6 +27,8 @@ Features
 ======================
 -->
 
+<img width="1724" height="670" alt="AIPerf UI Dashboard" src="https://github.com/user-attachments/assets/7eb40867-b1c1-4ebe-bd57-7619f2154bba" />
+
 ## Features
 
 - Scalable via multiprocess support
 
@@ -4,18 +4,19 @@
 import sys
 
 from aiperf.cli import app
-from aiperf.gpu_telemetry.constants import DEFAULT_DCGM_ENDPOINT
+from aiperf.gpu_telemetry.constants import DEFAULT_DCGM_ENDPOINTS
 
 
 def main() -> int:
     # TODO: HACK: Remove this once we can upgrade to v4 of cyclopts
     # This is a hack to allow the --gpu-telemetry flag to be used without a value
-    # and it will be set to the default endpoint, which will inform the telemetry
+    # and it will be set to the default endpoints, which will inform the telemetry
     # exporter to print the telemetry to the console
     if "--gpu-telemetry" in sys.argv:
         idx = sys.argv.index("--gpu-telemetry")
         if idx >= len(sys.argv) - 1 or sys.argv[idx + 1].startswith("-"):
-            sys.argv.insert(idx + 1, DEFAULT_DCGM_ENDPOINT)
+            for endpoint in reversed(DEFAULT_DCGM_ENDPOINTS):
+                sys.argv.insert(idx + 1, endpoint)
     return app(sys.argv[1:])
 
 
 
@@ -204,18 +204,19 @@ async def __aiter__(self) -> typing.AsyncIterator[tuple[str, int]]:
             if not first_byte:
                 break
 
+            # Read until we hit \n\n which delimits SSE messages
             chunk = await self.response.content.readuntil(b"\n\n")
 
             if not chunk:
                 break
             chunk = first_byte + chunk
 
             try:
+                decoded = chunk.decode("utf-8")
+                for sub_chunk in decoded.split("\n\n"):
+                    if sub_chunk:
+                        yield (sub_chunk, chunk_ns_first_byte)
                 # Use the fastest available decoder
-                yield (
-                    chunk.decode("utf-8").strip(),
-                    chunk_ns_first_byte,
-                )
             except UnicodeDecodeError:
                 # Handle potential encoding issues gracefully
                 yield (
 
@@ -138,11 +138,17 @@ def from_user_config(cls, user_config: UserConfig) -> "ModelEndpointInfo":
     def url(self) -> str:
         """Get the full URL for the endpoint."""
         url = self.endpoint.base_url.rstrip("/") if self.endpoint.base_url else ""
+
         if self.endpoint.custom_endpoint:
-            url += "/" + self.endpoint.custom_endpoint.lstrip("/")
-        elif path := self.endpoint.type.endpoint_path:
-            url += "/" + path.lstrip("/")
-        return url
+            path = self.endpoint.custom_endpoint.lstrip("/")
+        else:
+            if not self.endpoint.type.endpoint_path:
+                return url
+            path = self.endpoint.type.endpoint_path.lstrip("/")
+            if url.endswith("/v1") and path.startswith("v1/"):
+                path = path[3:]  # Remove the v1/ prefix
+
+        return f"{url}/{path}"
 
     @property
     def primary_model(self) -> ModelInfo:
 
@@ -79,13 +79,19 @@ def validate_goodput(self) -> Self:
         Runs after the model is constructed so we can inspect self.goodput directly.
         """
         if self.goodput:
+            from aiperf.common.enums import MetricType
             from aiperf.metrics.metric_registry import MetricRegistry
 
             for tag in self.goodput:
                 try:
-                    MetricRegistry.get_class(tag)
+                    metric_cls = MetricRegistry.get_class(tag)
                 except MetricTypeError as e:
                     raise ValueError(f"Unknown metric tag in --goodput: {tag}") from e
+                if metric_cls.type == MetricType.DERIVED:
+                    raise ValueError(
+                        f"Metric '{tag}' is a Derived metric and cannot be used for --goodput. "
+                        "Use a per-record metric instead (e.g., 'inter_token_latency', 'time_to_first_token')."
+                    )
 
         return self
 
 
@@ -215,7 +215,7 @@ def _count_dataset_entries(self) -> int:
         list[str] | None,
         Field(
             default=None,
-            description="Enable GPU telemetry console display and optionally specify custom DCGM exporter URLs (e.g., http://node1:9401/metrics http://node2:9401/metrics). Default localhost:9401 is always attempted",
+            description="Enable GPU telemetry console display and optionally specify custom DCGM exporter URLs (e.g., http://node1:9401/metrics http://node2:9401/metrics). Default localhost:9400 and localhost:9401 are always attempted",
         ),
         BeforeValidator(parse_str_or_list),
         CLIParameter(
 
@@ -66,9 +66,9 @@ class TelemetryStatusMessage(BaseServiceMessage):
     reason: str | None = Field(
         default=None, description="Reason why telemetry is disabled (if enabled=False)"
     )
-    endpoints_tested: list[str] = Field(
+    endpoints_configured: list[str] = Field(
         default_factory=list,
-        description="List of DCGM endpoint URLs that were tested for reachability",
+        description="List of DCGM endpoint URLs in the configured scope for display",
     )
     endpoints_reachable: list[str] = Field(
         default_factory=list,
 
@@ -41,7 +41,7 @@ class JsonMetricResult(AIPerfBaseModel):
 class TelemetrySummary(AIPerfBaseModel):
     """Summary information for telemetry collection."""
 
-    endpoints_tested: list[str]
+    endpoints_configured: list[str]
     endpoints_successful: list[str]
     start_time: datetime
     end_time: datetime
 
@@ -335,9 +335,9 @@ class TelemetryResults(AIPerfBaseModel):
         description="Start time of telemetry collection in nanoseconds"
     )
     end_ns: int = Field(description="End time of telemetry collection in nanoseconds")
-    endpoints_tested: list[str] = Field(
+    endpoints_configured: list[str] = Field(
         default_factory=list,
-        description="List of DCGM endpoint URLs that were tested for reachability",
+        description="List of DCGM endpoint URLs in configured scope for display",
     )
     endpoints_successful: list[str] = Field(
         default_factory=list,
Original file line number	Diff line number	Diff line change
`@@ -66,9 +66,9 @@ class TelemetryStatusMessage(BaseServiceMessage):`
`66`	`66`	`reason: str \| None = Field(`
`67`	`67`	`default=None, description="Reason why telemetry is disabled (if enabled=False)"`
`68`	`68`	`)`
`69`		`- endpoints_tested: list[str] = Field(`
	`69`	`+ endpoints_configured: list[str] = Field(`
`70`	`70`	`default_factory=list,`
`71`		`- description="List of DCGM endpoint URLs that were tested for reachability",`
	`71`	`+ description="List of DCGM endpoint URLs in the configured scope for display",`
`72`	`72`	`)`
`73`	`73`	`endpoints_reachable: list[str] = Field(`
`74`	`74`	`default_factory=list,`
Original file line number	Diff line number	Diff line change
`@@ -335,9 +335,9 @@ class TelemetryResults(AIPerfBaseModel):`
`335`	`335`	`description="Start time of telemetry collection in nanoseconds"`
`336`	`336`	`)`
`337`	`337`	`end_ns: int = Field(description="End time of telemetry collection in nanoseconds")`
`338`		`- endpoints_tested: list[str] = Field(`
	`338`	`+ endpoints_configured: list[str] = Field(`
`339`	`339`	`default_factory=list,`
`340`		`- description="List of DCGM endpoint URLs that were tested for reachability",`
	`340`	`+ description="List of DCGM endpoint URLs in configured scope for display",`
`341`	`341`	`)`
`342`	`342`	`endpoints_successful: list[str] = Field(`
`343`	`343`	`default_factory=list,`