diff --git a/CHANGELOG.md b/CHANGELOG.md index 54f040011..e85c30c90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `apm install` now works on macOS git 2.53.0 (Homebrew): bare-cache commands switch to `--git-dir` to satisfy the `safe.bareRepository=explicit` default; fetched SHAs are pinned as synthetic refs so `git clone --local --shared` no longer silently omits them. (#1268) - Set the unit-test hermetic HOME at conftest import time so a single xdist worker on the `windows-2025-vs2026` runner can no longer race fixture setup and re-trigger the 53 `Path.home()` failures the session-scoped autouse fixture was supposed to prevent. (#1271) - Override `Path.home()` itself in the root test conftest so the 46 remaining Windows `RuntimeError: Could not determine home directory` failures on xdist worker `gw2` cannot recur regardless of which conftest the worker imports first; per-test `monkeypatch.setenv("HOME", ...)` continues to work because the override consults env vars before falling back to the hermetic tmp dir. (#1272) +- Retry the `apm mcp search` and `apm mcp show` integration tests on the documented "Could not reach MCP registry" transient (with backoff and a final skip) so a brief `api.mcp.github.com` outage no longer red-marks the Windows integration job. (#1274) ## [0.13.0] - 2026-05-11 diff --git a/tests/integration/test_mcp_registry_e2e.py b/tests/integration/test_mcp_registry_e2e.py index 562bd25e1..da3e30ecf 100644 --- a/tests/integration/test_mcp_registry_e2e.py +++ b/tests/integration/test_mcp_registry_e2e.py @@ -17,12 +17,18 @@ import shutil # noqa: F401 import subprocess import tempfile +import time from pathlib import Path from unittest import mock # noqa: F401 import pytest import toml +# Phrase the CLI prints when the upstream MCP registry +# (https://api.mcp.github.com) is transiently unreachable. Tests treat this +# as retryable rather than a real product failure. +_REGISTRY_TRANSIENT_MARKER = "could not reach mcp registry" + def _is_registry_healthy() -> bool: """Check if GitHub MCP server has proper package configuration. @@ -87,6 +93,48 @@ def run_command(cmd, check=True, capture_output=True, timeout=180, cwd=None, inp pytest.fail(f"Command failed: {cmd}\nStdout: {e.stdout}\nStderr: {e.stderr}") +def run_mcp_command_with_retry(cmd, timeout=30, attempts=4, backoff_seconds=5): + """Run an MCP-registry-dependent command, retrying on transient registry outages. + + The upstream MCP registry (api.mcp.github.com) occasionally returns 5xx / + refuses connections for a few seconds at a time. The CLI surfaces this with + the marker phrase "Could not reach MCP registry". Treat that exact failure + mode as retryable; treat any other non-zero exit as a real failure. + """ + last_result = None + for attempt in range(1, attempts + 1): + result = subprocess.run( + cmd, + shell=True, + check=False, + capture_output=True, + text=True, + timeout=timeout, + encoding="utf-8", + errors="replace", + ) + last_result = result + if result.returncode == 0: + return result + + combined = f"{result.stdout}\n{result.stderr}".lower() + if _REGISTRY_TRANSIENT_MARKER not in combined: + pytest.fail(f"Command failed: {cmd}\nStdout: {result.stdout}\nStderr: {result.stderr}") + + if attempt < attempts: + sleep_for = backoff_seconds * attempt + print( + f"[retry] MCP registry transiently unavailable on attempt {attempt}/" + f"{attempts}; sleeping {sleep_for}s before retry" + ) + time.sleep(sleep_for) + + pytest.skip( + f"MCP registry (api.mcp.github.com) unreachable after {attempts} attempts; " + f"last stdout: {last_result.stdout if last_result else ''}" + ) + + @pytest.fixture(scope="module") def temp_e2e_home(): """Create a temporary home directory for E2E testing.""" @@ -140,9 +188,8 @@ def test_mcp_search_command(self, temp_e2e_home, apm_binary): """Test MCP registry search functionality.""" print("\n=== Testing MCP Registry Search ===") - # Test search for GitHub MCP server - result = run_command(f"{apm_binary} mcp search github", timeout=30) - assert result.returncode == 0, f"MCP search failed: {result.stderr}" + # Test search for GitHub MCP server (retry on transient registry outage) + result = run_mcp_command_with_retry(f"{apm_binary} mcp search github", timeout=30) # Verify output contains expected results output = result.stdout.lower() @@ -151,9 +198,10 @@ def test_mcp_search_command(self, temp_e2e_home, apm_binary): print(f"[OK] MCP search found GitHub servers:\n{result.stdout[:500]}...") - # Test search with limit - result = run_command(f"{apm_binary} mcp search filesystem --limit 3", timeout=30) - assert result.returncode == 0, f"MCP search with limit failed: {result.stderr}" + # Test search with limit (retry on transient registry outage) + result = run_mcp_command_with_retry( + f"{apm_binary} mcp search filesystem --limit 3", timeout=30 + ) print("[OK] MCP search with limit works") @@ -161,10 +209,12 @@ def test_mcp_show_command(self, temp_e2e_home, apm_binary): """Test MCP registry server details functionality.""" print("\n=== Testing MCP Registry Show ===") - # Test show GitHub MCP server details + # Test show GitHub MCP server details. The upstream registry + # (api.mcp.github.com) is occasionally unavailable for a few seconds; + # the CLI surfaces that as "Could not reach MCP registry" -- retry on + # that exact marker rather than treating it as a product regression. github_server = "io.github.github/github-mcp-server" - result = run_command(f"{apm_binary} mcp show {github_server}", timeout=30) - assert result.returncode == 0, f"MCP show failed: {result.stderr}" + result = run_mcp_command_with_retry(f"{apm_binary} mcp show {github_server}", timeout=30) # Verify output contains server details output = result.stdout.lower()