Skip to content

Commit 7911183

Browse files
committed
fix: unit tests and simplify telemetry manager
1 parent 32acabb commit 7911183

File tree

2 files changed

+34
-35
lines changed

2 files changed

+34
-35
lines changed

aiperf/gpu_telemetry/telemetry_manager.py

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
import asyncio
5-
from urllib.parse import urlparse
65

76
from aiperf.common.base_component_service import BaseComponentService
87
from aiperf.common.config import ServiceConfig, UserConfig
@@ -80,24 +79,11 @@ def __init__(
8079
user_config.gpu_telemetry is not None
8180
)
8281

83-
# Normalize to list
84-
user_endpoints = (
85-
[]
86-
if user_config.gpu_telemetry is None
87-
else [user_config.gpu_telemetry]
88-
if isinstance(user_config.gpu_telemetry, str)
89-
else list(user_config.gpu_telemetry)
90-
)
82+
user_endpoints = user_config.gpu_telemetry_urls or []
83+
if isinstance(user_endpoints, str):
84+
user_endpoints = [user_endpoints]
9185

92-
# Validate and normalize URLs
93-
valid_endpoints = []
94-
for endpoint in user_endpoints:
95-
try:
96-
parsed = urlparse(endpoint)
97-
if parsed.scheme in ("http", "https") and parsed.netloc:
98-
valid_endpoints.append(self._normalize_dcgm_url(endpoint))
99-
except Exception:
100-
continue
86+
valid_endpoints = [self._normalize_dcgm_url(url) for url in user_endpoints]
10187

10288
# Store user-provided endpoints separately for display filtering (excluding auto-inserted defaults)
10389
self._user_provided_endpoints = [

tests/gpu_telemetry/test_telemetry_manager.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def _create_manager_with_mocked_base(self, user_config):
4747
def test_initialization_default_endpoint(self):
4848
"""Test initialization with no user-provided endpoints uses defaults."""
4949
mock_user_config = MagicMock(spec=UserConfig)
50+
mock_user_config.gpu_telemetry = None
5051
mock_user_config.gpu_telemetry_urls = None
5152

5253
manager = self._create_manager_with_mocked_base(mock_user_config)
@@ -56,6 +57,7 @@ def test_initialization_custom_endpoints(self):
5657
"""Test initialization with custom user-provided endpoints."""
5758
mock_user_config = MagicMock(spec=UserConfig)
5859
custom_endpoint = "http://gpu-node-01:9401/metrics"
60+
mock_user_config.gpu_telemetry = ["dashboard"] # User configured telemetry
5961
mock_user_config.gpu_telemetry_urls = [custom_endpoint]
6062

6163
manager = self._create_manager_with_mocked_base(mock_user_config)
@@ -69,6 +71,7 @@ def test_initialization_custom_endpoints(self):
6971
def test_initialization_string_endpoint(self):
7072
"""Test initialization converts single string endpoint to list and prepends defaults."""
7173
mock_user_config = MagicMock(spec=UserConfig)
74+
mock_user_config.gpu_telemetry = ["dashboard"] # User configured telemetry
7275
mock_user_config.gpu_telemetry_urls = "http://single-node:9401/metrics"
7376

7477
manager = self._create_manager_with_mocked_base(mock_user_config)
@@ -80,15 +83,16 @@ def test_initialization_string_endpoint(self):
8083
assert len(manager._dcgm_endpoints) == 3
8184

8285
def test_initialization_filters_invalid_urls(self):
83-
"""Test initialization filters out invalid URLs."""
86+
"""Test initialization with only valid URLs (invalid ones filtered by user_config validator)."""
8487
mock_user_config = MagicMock(spec=UserConfig)
85-
mock_user_config.gpu_telemetry_urls = [
86-
"http://valid:9401/metrics", # Valid
87-
"not-a-url", # Invalid - no scheme
88-
"ftp://wrong-scheme:9401", # Invalid - wrong scheme
89-
"http://another-valid:9401", # Valid
90-
"", # Invalid - empty
88+
# user_config validator would have already filtered out invalid URLs
89+
# so telemetry_manager only receives valid ones
90+
valid_urls = [
91+
"http://valid:9401/metrics",
92+
"http://another-valid:9401/metrics",
9193
]
94+
mock_user_config.gpu_telemetry = ["dashboard"] # User configured telemetry
95+
mock_user_config.gpu_telemetry_urls = valid_urls
9296

9397
manager = self._create_manager_with_mocked_base(mock_user_config)
9498

@@ -102,11 +106,13 @@ def test_initialization_filters_invalid_urls(self):
102106
def test_initialization_deduplicates_endpoints(self):
103107
"""Test initialization removes duplicate endpoints while preserving order."""
104108
mock_user_config = MagicMock(spec=UserConfig)
105-
mock_user_config.gpu_telemetry_urls = [
109+
urls_with_duplicates = [
106110
"http://node1:9401/metrics",
107111
"http://node2:9401/metrics",
108112
"http://node1:9401/metrics", # Duplicate
109113
]
114+
mock_user_config.gpu_telemetry = ["dashboard"] # User configured telemetry
115+
mock_user_config.gpu_telemetry_urls = urls_with_duplicates
110116

111117
manager = self._create_manager_with_mocked_base(mock_user_config)
112118

@@ -120,11 +126,13 @@ def test_initialization_deduplicates_endpoints(self):
120126
def test_user_provides_default_endpoint(self):
121127
"""Test that explicitly providing a default endpoint doesn't duplicate it."""
122128
mock_user_config = MagicMock(spec=UserConfig)
123-
mock_user_config.gpu_telemetry = [
129+
urls = [
124130
"http://localhost:9400/metrics", # This is a default
125131
"http://node1:9401/metrics",
126132
"http://localhost:9401/metrics", # This is also a default
127133
]
134+
mock_user_config.gpu_telemetry = ["dashboard"] # User configured telemetry
135+
mock_user_config.gpu_telemetry_urls = urls
128136

129137
manager = self._create_manager_with_mocked_base(mock_user_config)
130138

@@ -520,13 +528,12 @@ def _create_manager_with_mocked_base(self, user_config):
520528
return manager
521529

522530
def test_invalid_endpoints_filtered_during_init(self):
523-
"""Test that empty string and invalid URLs are filtered during initialization."""
531+
"""Test that only valid URLs reach telemetry_manager (invalid ones filtered by user_config validator)."""
524532
mock_user_config = MagicMock(spec=UserConfig)
525-
mock_user_config.gpu_telemetry_urls = [
526-
"", # Empty string - filtered
527-
"/metrics", # No scheme - filtered
528-
"http://valid:9401/metrics", # Valid
529-
]
533+
# user_config validator would have already filtered out invalid URLs
534+
# so telemetry_manager only receives valid ones
535+
mock_user_config.gpu_telemetry = ["dashboard"] # User configured telemetry
536+
mock_user_config.gpu_telemetry_urls = ["http://valid:9401/metrics"]
530537

531538
manager = self._create_manager_with_mocked_base(mock_user_config)
532539

@@ -572,6 +579,7 @@ def test_both_defaults_included_when_no_user_config(self):
572579
"""Test that both default endpoints (9400 and 9401) are included with no user config."""
573580
mock_user_config = MagicMock(spec=UserConfig)
574581
mock_user_config.gpu_telemetry = None
582+
mock_user_config.gpu_telemetry_urls = None
575583

576584
manager = self._create_manager_with_mocked_base(mock_user_config)
577585

@@ -585,16 +593,19 @@ def test_user_explicitly_configured_telemetry_flag(self):
585593
# Test with None (not configured)
586594
mock_user_config = MagicMock(spec=UserConfig)
587595
mock_user_config.gpu_telemetry = None
596+
mock_user_config.gpu_telemetry_urls = None
588597
manager = self._create_manager_with_mocked_base(mock_user_config)
589598
assert manager._user_explicitly_configured_telemetry is False
590599

591600
# Test with value (configured)
592-
mock_user_config.gpu_telemetry = "http://custom:9401"
601+
mock_user_config.gpu_telemetry = ["dashboard"] # User configured telemetry
602+
mock_user_config.gpu_telemetry_urls = ["http://custom:9401/metrics"]
593603
manager = self._create_manager_with_mocked_base(mock_user_config)
594604
assert manager._user_explicitly_configured_telemetry is True
595605

596606
# Test with empty list (configured)
597-
mock_user_config.gpu_telemetry = []
607+
mock_user_config.gpu_telemetry = [] # User explicitly passed --gpu-telemetry with no args
608+
mock_user_config.gpu_telemetry_urls = []
598609
manager = self._create_manager_with_mocked_base(mock_user_config)
599610
assert manager._user_explicitly_configured_telemetry is True
600611

@@ -607,6 +618,7 @@ def _create_test_manager(self):
607618
manager = TelemetryManager.__new__(TelemetryManager)
608619
manager.service_id = "test_manager"
609620
manager._collectors = {}
621+
manager._collector_id_to_url = {}
610622
manager._dcgm_endpoints = list(DEFAULT_DCGM_ENDPOINTS)
611623
manager._user_provided_endpoints = []
612624
manager._user_explicitly_configured_telemetry = False
@@ -750,6 +762,7 @@ def _create_test_manager(self, user_requested, user_endpoints):
750762
manager = TelemetryManager.__new__(TelemetryManager)
751763
manager.service_id = "test_manager"
752764
manager._collectors = {}
765+
manager._collector_id_to_url = {}
753766
manager._dcgm_endpoints = list(DEFAULT_DCGM_ENDPOINTS) + user_endpoints
754767
manager._user_provided_endpoints = user_endpoints
755768
manager._user_explicitly_configured_telemetry = user_requested

0 commit comments

Comments
 (0)