From 4d5135f671434e05d8ba77e85616f3e8ab0369f3 Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Mon, 6 Apr 2026 07:05:05 +0530 Subject: [PATCH 01/14] support virtual packages on generic git hosts (Gitea) --- src/apm_cli/deps/github_downloader.py | 17 ++++++++++++++++- src/apm_cli/models/dependency/reference.py | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index 9776f7efb..146482d83 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -1056,6 +1056,21 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re # All raw attempts failed — fall through to API path which # handles private repos, rate-limit messaging, and SAML errors. + # Try raw URL for generic hosts (Gitea, GitLab, etc.) + if host.lower() not in ("github.com",) and not host.lower().endswith(".ghe.com"): + raw_url = f"https://{host}/{owner}/{repo}/raw/{ref}/{file_path}" + raw_headers = {} + if token: + raw_headers['Authorization'] = f'token {token}' + try: + response = self._resilient_get(raw_url, headers=raw_headers, timeout=30) + if response.status_code == 200: + if verbose_callback: + verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") + return response.content + except: + pass + # --- Contents API path (authenticated, enterprise, or raw fallback) --- # Build GitHub API URL - format differs by host type if host == "github.com": @@ -1063,7 +1078,7 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re elif host.lower().endswith(".ghe.com"): api_url = f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" else: - api_url = f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + api_url = f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" # Set up authentication headers headers = { diff --git a/src/apm_cli/models/dependency/reference.py b/src/apm_cli/models/dependency/reference.py index 1c6df16b9..e41e1e174 100644 --- a/src/apm_cli/models/dependency/reference.py +++ b/src/apm_cli/models/dependency/reference.py @@ -580,7 +580,7 @@ def _detect_virtual_package(cls, dependency_str: str): for seg in path_segments ) has_collection = "collections" in path_segments - if has_virtual_ext or has_collection: + if has_virtual_ext or has_collection or len(path_segments) > 2: min_base_segments = 2 else: min_base_segments = len(path_segments) From 3dedd942da20d34c09bb6d372f0a2a95e9e16d1c Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Tue, 7 Apr 2026 13:04:21 +0530 Subject: [PATCH 02/14] Addressed reviewed corrections --- src/apm_cli/deps/github_downloader.py | 88 +++++++++---- src/apm_cli/models/dependency/reference.py | 15 ++- src/apm_cli/utils/github_host.py | 17 +++ tests/test_github_downloader.py | 142 +++++++++++++++++++++ tests/unit/test_generic_git_urls.py | 59 +++++++++ tests/unit/test_github_host.py | 23 ++++ 6 files changed, 315 insertions(+), 29 deletions(-) diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index 3792a542c..49cf373c0 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -1068,25 +1068,35 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re if verbose_callback: verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") return response.content - except: + except (requests.RequestException, OSError): pass # --- Contents API path (authenticated, enterprise, or raw fallback) --- - # Build GitHub API URL - format differs by host type + # Build API URL candidates - format differs by host type if host == "github.com": - api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + api_url_candidates = [ + f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + ] elif host.lower().endswith(".ghe.com"): - api_url = f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + api_url_candidates = [ + f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + ] else: - api_url = f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" - + # Generic host: negotiate API version (Gitea=v1, older Gitea/Gogs=v3, GitLab=v4) + api_url_candidates = [ + f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", + f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", + f"https://{host}/api/v4/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", + ] + api_url = api_url_candidates[0] + # Set up authentication headers headers = { 'Accept': 'application/vnd.github.v3.raw' # Returns raw content directly } if token: headers['Authorization'] = f'token {token}' - + # Try to download with the specified ref try: response = self._resilient_get(api_url, headers=headers, timeout=30) @@ -1096,33 +1106,59 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re return response.content except requests.exceptions.HTTPError as e: if e.response.status_code == 404: - # Try fallback branches if the specified ref fails + # For generic hosts, try remaining API version candidates before ref fallback + for candidate_url in api_url_candidates[1:]: + try: + candidate_resp = self._resilient_get(candidate_url, headers=headers, timeout=30) + candidate_resp.raise_for_status() + if verbose_callback: + verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") + return candidate_resp.content + except requests.exceptions.HTTPError as ce: + if ce.response.status_code != 404: + raise RuntimeError( + f"Failed to download {file_path}: HTTP {ce.response.status_code}" + ) + # 404 on this version too -- try next + + # All API versions returned 404 -- try fallback ref if ref not in ["main", "master"]: # If original ref failed, don't try fallbacks - it might be a specific version raise RuntimeError(f"File not found: {file_path} at ref '{ref}' in {dep_ref.repo_url}") - + # Try the other default branch fallback_ref = "master" if ref == "main" else "main" - - # Build fallback API URL + + # Build fallback URL candidates if host == "github.com": - fallback_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}" + fallback_url_candidates = [ + f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}" + ] elif host.lower().endswith(".ghe.com"): - fallback_url = f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}" + fallback_url_candidates = [ + f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}" + ] else: - fallback_url = f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}" - - try: - response = self._resilient_get(fallback_url, headers=headers, timeout=30) - response.raise_for_status() - if verbose_callback: - verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") - return response.content - except requests.exceptions.HTTPError: - raise RuntimeError( - f"File not found: {file_path} in {dep_ref.repo_url} " - f"(tried refs: {ref}, {fallback_ref})" - ) + fallback_url_candidates = [ + f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", + f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", + f"https://{host}/api/v4/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", + ] + + for fallback_url in fallback_url_candidates: + try: + response = self._resilient_get(fallback_url, headers=headers, timeout=30) + response.raise_for_status() + if verbose_callback: + verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") + return response.content + except requests.exceptions.HTTPError: + pass # Try next version or ref + + raise RuntimeError( + f"File not found: {file_path} in {dep_ref.repo_url} " + f"(tried refs: {ref}, {fallback_ref})" + ) elif e.response.status_code == 401 or e.response.status_code == 403: # Distinguish rate limiting from auth failure. # GitHub returns 403 with X-RateLimit-Remaining: 0 when the diff --git a/src/apm_cli/models/dependency/reference.py b/src/apm_cli/models/dependency/reference.py index e41e1e174..742c83ebf 100644 --- a/src/apm_cli/models/dependency/reference.py +++ b/src/apm_cli/models/dependency/reference.py @@ -11,6 +11,7 @@ is_artifactory_path, is_azure_devops_hostname, is_github_hostname, + is_gitlab_hostname, is_supported_git_host, parse_artifactory_path, unsupported_host_error, @@ -580,10 +581,18 @@ def _detect_virtual_package(cls, dependency_str: str): for seg in path_segments ) has_collection = "collections" in path_segments - if has_virtual_ext or has_collection or len(path_segments) > 2: + # GitLab supports nested groups (group/subgroup/repo), so the full + # path is the repo -- no shorthand subdirectory splitting. + # Use https://gitlab.com/group/subgroup/repo.git for GitLab nested + # groups; shorthand subdirectory syntax is not supported for GitLab. + # All other generic hosts (Gitea, Bitbucket, self-hosted, etc.) use + # the owner/repo convention, so extra segments are a virtual subdir. + if has_virtual_ext or has_collection: min_base_segments = 2 - else: + elif is_gitlab_hostname(validated_host): min_base_segments = len(path_segments) + else: + min_base_segments = 2 else: min_base_segments = 2 @@ -734,7 +743,7 @@ def _parse_standard_url( user_repo = "/".join(parts[1:]) else: user_repo = "/".join(parts[1:3]) - elif len(parts) >= 2 and "." not in parts[0]: + elif len(parts) >= 2 and ("." not in parts[0] or validated_host is not None): if not host: host = default_host() if is_azure_devops_hostname(host) and len(parts) >= 3: diff --git a/src/apm_cli/utils/github_host.py b/src/apm_cli/utils/github_host.py index d45fdabc2..728df3bd9 100644 --- a/src/apm_cli/utils/github_host.py +++ b/src/apm_cli/utils/github_host.py @@ -29,6 +29,23 @@ def is_azure_devops_hostname(hostname: Optional[str]) -> bool: return False +def is_gitlab_hostname(hostname: Optional[str]) -> bool: + """Return True if hostname is GitLab (cloud or self-hosted). + + GitLab supports nested groups (group/subgroup/repo), so paths with + more than two segments should be treated as repo paths, not virtual + subdirectory packages. + + Accepts: + - gitlab.com + - Any hostname starting with 'gitlab.' (common self-hosted convention) + """ + if not hostname: + return False + h = hostname.lower() + return h == "gitlab.com" or h.startswith("gitlab.") + + def is_github_hostname(hostname: Optional[str]) -> bool: """Return True if hostname should be treated as GitHub (cloud or enterprise). diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index b6e0ea316..de2fb1fb3 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -1629,5 +1629,147 @@ def test_try_raw_download_returns_content_on_200(self): assert result == b'hello world' +# --------------------------------------------------------------------------- +# Generic host (Gitea / GitLab) download tests +# --------------------------------------------------------------------------- + +def _make_resp(status_code: int, content: bytes = b"") -> Mock: + """Build a minimal mock requests.Response.""" + resp = Mock() + resp.status_code = status_code + resp.content = content + if status_code >= 400: + resp.raise_for_status = Mock( + side_effect=requests_lib.exceptions.HTTPError(response=resp) + ) + else: + resp.raise_for_status = Mock() + return resp + + +class TestGiteaRawUrlDownload: + """Gitea raw URL path: /{owner}/{repo}/raw/{ref}/{file}.""" + + def setup_method(self): + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + self.downloader = GitHubPackageDownloader() + + def test_raw_url_succeeds_on_first_attempt(self): + """Raw URL returns 200 -- content returned without calling the API.""" + dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo") + expected = b"# README content" + raw_ok = _make_resp(200, expected) + + with patch.object(self.downloader, "_resilient_get", return_value=raw_ok) as mock_get: + result = self.downloader.download_raw_file(dep_ref, "README.md", "main") + + assert result == expected + first_url = mock_get.call_args_list[0][0][0] + assert first_url == "https://gitea.myorg.com/owner/repo/raw/main/README.md" + assert mock_get.call_count == 1 + + def test_raw_url_with_token_adds_auth_header(self): + """Token is forwarded as Authorization header in the raw URL request. + + Token resolution is lazy, so the env patch must stay active for the + duration of the download call. + """ + dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo") + raw_ok = _make_resp(200, b"data") + + with patch.dict(os.environ, {"GITHUB_APM_PAT": "gta-tok"}, clear=True): + with _CRED_FILL_PATCH: + downloader = GitHubPackageDownloader() + with patch.object(downloader, "_resilient_get", return_value=raw_ok) as mock_get: + downloader.download_raw_file(dep_ref, "README.md", "main") + + raw_headers = mock_get.call_args_list[0][1].get("headers", {}) + assert "Authorization" in raw_headers + + def test_falls_back_to_api_v1_when_raw_returns_non_200(self): + """When the raw URL returns 404, the API v1 path is tried next.""" + dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo") + expected = b"file via API" + + with patch.object( + self.downloader, "_resilient_get", + side_effect=[_make_resp(404), _make_resp(200, expected)] + ) as mock_get: + result = self.downloader.download_raw_file(dep_ref, "README.md", "main") + + assert result == expected + urls = [c[0][0] for c in mock_get.call_args_list] + assert urls[0] == "https://gitea.myorg.com/owner/repo/raw/main/README.md" + assert "/api/v1/" in urls[1] + + +class TestGitLabApiVersionNegotiation: + """API version negotiation: v1 -> v3 -> v4 for generic hosts.""" + + def setup_method(self): + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + self.downloader = GitHubPackageDownloader() + + def test_gitlab_v4_reached_after_v1_and_v3_return_404(self): + """GitLab uses /api/v4/ -- negotiation must try v1, v3, then v4.""" + dep_ref = DependencyReference.parse("gitlab.myorg.com/owner/repo") + expected = b"gitlab file content" + + side_effects = [ + _make_resp(404), # raw URL + _make_resp(404), # v1 + _make_resp(404), # v3 + _make_resp(200, expected), # v4 + ] + with patch.object(self.downloader, "_resilient_get", side_effect=side_effects) as mock_get: + result = self.downloader.download_raw_file(dep_ref, "skill.md", "main") + + assert result == expected + urls = [c[0][0] for c in mock_get.call_args_list] + assert "/api/v1/" in urls[1] + assert "/api/v3/" in urls[2] + assert "/api/v4/" in urls[3] + + def test_gitea_v1_succeeds_without_trying_v3_or_v4(self): + """When v1 returns 200, v3 and v4 must never be called.""" + dep_ref = DependencyReference.parse("gitea.example.com/owner/repo") + expected = b"gitea content" + + with patch.object( + self.downloader, "_resilient_get", + side_effect=[_make_resp(404), _make_resp(200, expected)] + ) as mock_get: + result = self.downloader.download_raw_file(dep_ref, "file.md", "main") + + assert result == expected + urls = [c[0][0] for c in mock_get.call_args_list] + assert all("/api/v3/" not in u and "/api/v4/" not in u for u in urls) + + def test_all_api_versions_404_raises_runtime_error(self): + """When every API version returns 404 for both refs, a clear error is raised.""" + dep_ref = DependencyReference.parse("git.example.com/owner/repo") + # raw + v1 + v3 + v4 for 'main', then v1 + v3 + v4 for 'master' fallback + side_effects = [_make_resp(404)] * 8 + + with patch.object(self.downloader, "_resilient_get", side_effect=side_effects): + with pytest.raises(RuntimeError, match="File not found"): + self.downloader.download_raw_file(dep_ref, "missing.md", "main") + + def test_github_com_uses_api_github_com_not_api_v4(self): + """github.com must still use api.github.com, never /api/v4/.""" + dep_ref = DependencyReference.parse("owner/repo") + expected = b"github content" + api_ok = _make_resp(200, expected) + + with patch.object(self.downloader, "_try_raw_download", return_value=None): + with patch.object(self.downloader, "_resilient_get", return_value=api_ok) as mock_get: + result = self.downloader.download_raw_file(dep_ref, "README.md", "main") + + assert result == expected + url_called = mock_get.call_args_list[0][0][0] + assert url_called.startswith("https://api.github.com/") + assert "/api/v4/" not in url_called + + if __name__ == '__main__': pytest.main([__file__]) \ No newline at end of file diff --git a/tests/unit/test_generic_git_urls.py b/tests/unit/test_generic_git_urls.py index 31e35381a..1dbbfe888 100644 --- a/tests/unit/test_generic_git_urls.py +++ b/tests/unit/test_generic_git_urls.py @@ -682,3 +682,62 @@ def test_https_nested_group_with_virtual_ext_rejected(self): """HTTPS URLs can't embed virtual paths even with nested groups.""" with pytest.raises(ValueError, match="virtual file extension"): DependencyReference.parse("https://gitlab.com/group/subgroup/file.prompt.md") + + +class TestGiteaVirtualPackageDetection: + """Gitea-specific virtual package detection -- supplements TestFQDNVirtualPaths + and TestNestedGroupSupport with Gitea host fixtures and regression guards + for the len(path_segments) > 2 over-trigger.""" + + # --- Must NOT be virtual (nested-group repo, no virtual indicators) --- + + def test_three_segment_gitea_path_is_not_virtual(self): + """group/subgroup/repo on Gitea is a nested-group repo, not virtual.""" + dep = DependencyReference.parse("gitea.myorg.com/group/subgroup/repo") + assert dep.host == "gitea.myorg.com" + assert dep.repo_url == "group/subgroup/repo" + assert dep.is_virtual is False + + def test_two_segment_gitea_path_is_not_virtual(self): + """Simple owner/repo on a Gitea host is never virtual.""" + dep = DependencyReference.parse("gitea.myorg.com/owner/repo") + assert dep.host == "gitea.myorg.com" + assert dep.repo_url == "owner/repo" + assert dep.is_virtual is False + + def test_four_segment_generic_path_without_indicators_is_not_virtual(self): + """Deep nested groups without file extensions or /collections/ are not virtual.""" + dep = DependencyReference.parse("git.company.internal/team/skills/brand-guidelines") + assert dep.is_virtual is False + assert dep.repo_url == "team/skills/brand-guidelines" + + # --- Must be virtual (explicit virtual indicators) --- + + def test_gitea_virtual_file_extension(self): + """Path with virtual file extension on Gitea is detected as virtual.""" + dep = DependencyReference.parse("gitea.myorg.com/owner/repo/file.prompt.md") + assert dep.host == "gitea.myorg.com" + assert dep.repo_url == "owner/repo" + assert dep.virtual_path == "file.prompt.md" + assert dep.is_virtual is True + assert dep.is_virtual_file() is True + + def test_gitea_collections_path_is_virtual(self): + """Path with /collections/ on Gitea is detected as a virtual collection.""" + dep = DependencyReference.parse("gitea.myorg.com/owner/repo/collections/security") + assert dep.host == "gitea.myorg.com" + assert dep.repo_url == "owner/repo" + assert dep.virtual_path == "collections/security" + assert dep.is_virtual is True + assert dep.is_virtual_collection() is True + + def test_dict_format_virtual_on_gitea(self): + """Dict format with path= on Gitea host yields a virtual package.""" + dep = DependencyReference.parse_from_dict({ + "git": "gitea.myorg.com/owner/repo", + "path": "prompts/review.prompt.md", + }) + assert dep.host == "gitea.myorg.com" + assert dep.repo_url == "owner/repo" + assert dep.virtual_path == "prompts/review.prompt.md" + assert dep.is_virtual is True diff --git a/tests/unit/test_github_host.py b/tests/unit/test_github_host.py index 90bc8a71d..a675dbe1e 100644 --- a/tests/unit/test_github_host.py +++ b/tests/unit/test_github_host.py @@ -71,6 +71,29 @@ def test_is_github_hostname_defaults(): assert not github_host.is_github_hostname("example.com") +def test_is_gitlab_hostname(): + """is_gitlab_hostname() matches gitlab.com and self-hosted gitlab.* instances. + + This drives _detect_virtual_package: GitLab supports nested groups so the + full path is kept as the repo URL. All other generic hosts (Gitea, + Bitbucket, etc.) use the owner/repo convention (2 base segments). + """ + # Cloud and conventional self-hosted GitLab + assert github_host.is_gitlab_hostname("gitlab.com") + assert github_host.is_gitlab_hostname("gitlab.mycompany.com") + assert github_host.is_gitlab_hostname("gitlab.internal") + assert github_host.is_gitlab_hostname("GITLAB.COM") # case-insensitive + + # Non-GitLab hosts must NOT match + assert not github_host.is_gitlab_hostname("github.com") + assert not github_host.is_gitlab_hostname("bitbucket.org") + assert not github_host.is_gitlab_hostname("gitea.myorg.com") + assert not github_host.is_gitlab_hostname("git.company.internal") + assert not github_host.is_gitlab_hostname("dev.azure.com") + assert not github_host.is_gitlab_hostname(None) + assert not github_host.is_gitlab_hostname("") + + def test_is_azure_devops_hostname(): """Test Azure DevOps hostname detection.""" # Valid Azure DevOps hosts From 13dbf73b71455130410964a1984376e4fde97792 Mon Sep 17 00:00:00 2001 From: ganesanviji Date: Thu, 9 Apr 2026 16:09:00 +0530 Subject: [PATCH 03/14] Update src/apm_cli/deps/github_downloader.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/apm_cli/deps/github_downloader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index cd9097b9a..bd377c763 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -1237,11 +1237,10 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" ] else: - # Generic host: negotiate API version (Gitea=v1, older Gitea/Gogs=v3, GitLab=v4) + # Generic host: negotiate Gitea/Gogs-style contents API versions. api_url_candidates = [ f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", - f"https://{host}/api/v4/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", ] api_url = api_url_candidates[0] From e4557767a5e1e26511331ca01de013c0c7a004a0 Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Wed, 15 Apr 2026 05:54:25 +0530 Subject: [PATCH 04/14] Review comments addressed --- src/apm_cli/deps/github_downloader.py | 3 +-- tests/test_github_downloader.py | 33 +++++++++++++++------------ 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index bd377c763..098552484 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -1295,8 +1295,7 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re else: fallback_url_candidates = [ f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", - f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", - f"https://{host}/api/v4/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", + f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", ] for fallback_url in fallback_url_candidates: diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index de2fb1fb3..62a92abc6 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -1703,23 +1703,28 @@ def test_falls_back_to_api_v1_when_raw_returns_non_200(self): assert "/api/v1/" in urls[1] -class TestGitLabApiVersionNegotiation: - """API version negotiation: v1 -> v3 -> v4 for generic hosts.""" +class TestGiteaGogsApiVersionNegotiation: + """API version negotiation: raw URL -> v1 -> v3 for Gitea/Gogs generic hosts. + + The implementation intentionally stops at v3. GitLab uses a completely + different API shape (/api/v4/projects/:id/repository/files/...) that is + not compatible with the GitHub Contents-style endpoint negotiated here; + GitLab support is limited to git-clone operations only. + """ def setup_method(self): with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: self.downloader = GitHubPackageDownloader() - def test_gitlab_v4_reached_after_v1_and_v3_return_404(self): - """GitLab uses /api/v4/ -- negotiation must try v1, v3, then v4.""" - dep_ref = DependencyReference.parse("gitlab.myorg.com/owner/repo") - expected = b"gitlab file content" + def test_v1_falls_back_to_v3_for_generic_hosts(self): + """When Gitea raw URL and v1 both return 404, v3 is tried and succeeds.""" + dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo") + expected = b"gitea v3 file content" side_effects = [ _make_resp(404), # raw URL _make_resp(404), # v1 - _make_resp(404), # v3 - _make_resp(200, expected), # v4 + _make_resp(200, expected), # v3 ] with patch.object(self.downloader, "_resilient_get", side_effect=side_effects) as mock_get: result = self.downloader.download_raw_file(dep_ref, "skill.md", "main") @@ -1728,10 +1733,10 @@ def test_gitlab_v4_reached_after_v1_and_v3_return_404(self): urls = [c[0][0] for c in mock_get.call_args_list] assert "/api/v1/" in urls[1] assert "/api/v3/" in urls[2] - assert "/api/v4/" in urls[3] + assert len(mock_get.call_args_list) == 3 - def test_gitea_v1_succeeds_without_trying_v3_or_v4(self): - """When v1 returns 200, v3 and v4 must never be called.""" + def test_gitea_v1_succeeds_without_trying_v3(self): + """When v1 returns 200, v3 must never be called.""" dep_ref = DependencyReference.parse("gitea.example.com/owner/repo") expected = b"gitea content" @@ -1743,13 +1748,13 @@ def test_gitea_v1_succeeds_without_trying_v3_or_v4(self): assert result == expected urls = [c[0][0] for c in mock_get.call_args_list] - assert all("/api/v3/" not in u and "/api/v4/" not in u for u in urls) + assert all("/api/v3/" not in u for u in urls) def test_all_api_versions_404_raises_runtime_error(self): """When every API version returns 404 for both refs, a clear error is raised.""" dep_ref = DependencyReference.parse("git.example.com/owner/repo") - # raw + v1 + v3 + v4 for 'main', then v1 + v3 + v4 for 'master' fallback - side_effects = [_make_resp(404)] * 8 + # raw(main) + v1(main) + v3(main) = 3 calls, then v1(master) + v3(master) = 2 calls + side_effects = [_make_resp(404)] * 5 with patch.object(self.downloader, "_resilient_get", side_effect=side_effects): with pytest.raises(RuntimeError, match="File not found"): From 00f88e7ed3e3379147e153a1b27e7ea932da4274 Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Fri, 24 Apr 2026 05:43:12 +0530 Subject: [PATCH 05/14] Update reference.py --- src/apm_cli/models/dependency/reference.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/apm_cli/models/dependency/reference.py b/src/apm_cli/models/dependency/reference.py index 742c83ebf..bed1dbddb 100644 --- a/src/apm_cli/models/dependency/reference.py +++ b/src/apm_cli/models/dependency/reference.py @@ -11,7 +11,6 @@ is_artifactory_path, is_azure_devops_hostname, is_github_hostname, - is_gitlab_hostname, is_supported_git_host, parse_artifactory_path, unsupported_host_error, @@ -581,18 +580,10 @@ def _detect_virtual_package(cls, dependency_str: str): for seg in path_segments ) has_collection = "collections" in path_segments - # GitLab supports nested groups (group/subgroup/repo), so the full - # path is the repo -- no shorthand subdirectory splitting. - # Use https://gitlab.com/group/subgroup/repo.git for GitLab nested - # groups; shorthand subdirectory syntax is not supported for GitLab. - # All other generic hosts (Gitea, Bitbucket, self-hosted, etc.) use - # the owner/repo convention, so extra segments are a virtual subdir. if has_virtual_ext or has_collection: min_base_segments = 2 - elif is_gitlab_hostname(validated_host): - min_base_segments = len(path_segments) else: - min_base_segments = 2 + min_base_segments = len(path_segments) else: min_base_segments = 2 From 77c077d694920297cbb56dfd58e0dd7405412385 Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Fri, 24 Apr 2026 05:55:58 +0530 Subject: [PATCH 06/14] restore: re-add TestVirtualFilePackageYamlGeneration and TestSCPPortDetection These two test classes were accidentally removed from the branch. Restoring them from upstream main (8665f4b) to ensure full coverage is preserved alongside the Gitea virtual package detection changes. --- tests/test_github_downloader.py | 175 ++++++++++++++++++++++++++++ tests/unit/test_generic_git_urls.py | 115 ++++++++++++++++++ 2 files changed, 290 insertions(+) diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index b2d1b2efc..585c85215 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -1656,6 +1656,181 @@ def test_try_raw_download_returns_content_on_200(self): assert result == b'hello world' +class TestVirtualFilePackageYamlGeneration: + """Tests that apm.yml for virtual packages is always valid YAML.""" + + def _make_dep_ref(self, virtual_path): + """Helper: build a minimal DependencyReference for a virtual file.""" + from apm_cli.models.apm_package import DependencyReference + dep_ref = Mock(spec=DependencyReference) + dep_ref.is_virtual = True + dep_ref.virtual_path = virtual_path + dep_ref.reference = "main" + dep_ref.repo_url = "github/awesome-copilot" + dep_ref.get_virtual_package_name.return_value = "awesome-copilot-swe-subagent" + dep_ref.to_github_url.return_value = f"https://github.com/github/awesome-copilot/blob/main/{virtual_path}" + dep_ref.is_virtual_file.return_value = True + dep_ref.VIRTUAL_FILE_EXTENSIONS = [".prompt.md", ".instructions.md", ".chatmode.md", ".agent.md"] + return dep_ref + + def _make_collection_dep_ref(self, virtual_path): + """Helper: build a minimal DependencyReference for a virtual collection.""" + from apm_cli.models.apm_package import DependencyReference + dep_ref = Mock(spec=DependencyReference) + dep_ref.is_virtual = True + dep_ref.virtual_path = virtual_path + dep_ref.reference = "main" + dep_ref.repo_url = "github/my-org" + dep_ref.get_virtual_package_name.return_value = "my-org-my-collection" + dep_ref.to_github_url.return_value = f"https://github.com/github/my-org/blob/main/{virtual_path}" + dep_ref.is_virtual_collection.return_value = True + return dep_ref + + def test_yaml_with_colon_in_description(self, tmp_path): + """apm.yml must be valid when the agent description contains a colon.""" + import yaml + + agent_content = ( + b"---\n" + b"name: 'SWE'\n" + b"description: 'Senior software engineer subagent for implementation tasks:" + b" feature development, debugging, refactoring, and testing.'\n" + b"tools: ['vscode']\n" + b"---\n\n## Body\n" + ) + + dep_ref = self._make_dep_ref("agents/swe-subagent.agent.md") + target_path = tmp_path / "pkg" + + downloader = GitHubPackageDownloader() + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + with patch.object(downloader, "download_raw_file", return_value=agent_content): + downloader.download_virtual_file_package(dep_ref, target_path) + + apm_yml_path = target_path / "apm.yml" + assert apm_yml_path.exists(), "apm.yml was not created" + + content = apm_yml_path.read_text(encoding="utf-8") + parsed = yaml.safe_load(content) # must not raise + + expected = ( + "Senior software engineer subagent for implementation tasks:" + " feature development, debugging, refactoring, and testing." + ) + assert parsed["description"] == expected + + def test_yaml_with_colon_in_name(self, tmp_path): + """apm.yml must be valid even when the package name contains a colon.""" + import yaml + + dep_ref = self._make_dep_ref("agents/my-agent.agent.md") + dep_ref.get_virtual_package_name.return_value = "org-name: special" + + agent_content = b"---\nname: 'plain'\ndescription: 'plain'\n---\n" + target_path = tmp_path / "pkg" + + downloader = GitHubPackageDownloader() + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + with patch.object(downloader, "download_raw_file", return_value=agent_content): + downloader.download_virtual_file_package(dep_ref, target_path) + + content = (target_path / "apm.yml").read_text(encoding="utf-8") + parsed = yaml.safe_load(content) + assert parsed["name"] == "org-name: special" + + def test_yaml_without_special_characters_still_valid(self, tmp_path): + """apm.yml generation must still work for ordinary descriptions.""" + import yaml + + agent_content = ( + b"---\n" + b"name: 'Simple Agent'\n" + b"description: 'A simple agent without special chars'\n" + b"---\n" + ) + + dep_ref = self._make_dep_ref("agents/simple.agent.md") + target_path = tmp_path / "pkg" + + downloader = GitHubPackageDownloader() + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + with patch.object(downloader, "download_raw_file", return_value=agent_content): + downloader.download_virtual_file_package(dep_ref, target_path) + + content = (target_path / "apm.yml").read_text(encoding="utf-8") + parsed = yaml.safe_load(content) + assert parsed["description"] == "A simple agent without special chars" + + def test_collection_yaml_with_colon_in_description(self, tmp_path): + """apm.yml for collection packages must be valid when description contains a colon.""" + import yaml + + collection_manifest = ( + b"id: my-collection\n" + b"name: My Collection\n" + b"description: 'A collection for tasks: feature development, debugging.'\n" + b"items:\n" + b" - path: agents/my-agent.agent.md\n" + b" kind: agent\n" + ) + agent_file = b"---\nname: My Agent\n---\n## Body\n" + + dep_ref = self._make_collection_dep_ref("collections/my-collection") + target_path = tmp_path / "pkg" + + downloader = GitHubPackageDownloader() + + def _fake_download(dep_ref_arg, path, ref): + if "collection" in path: + return collection_manifest + return agent_file + + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + with patch.object(downloader, "download_raw_file", side_effect=_fake_download): + downloader.download_collection_package(dep_ref, target_path) + + content = (target_path / "apm.yml").read_text(encoding="utf-8") + parsed = yaml.safe_load(content) # must not raise + + assert parsed["description"] == "A collection for tasks: feature development, debugging." + + def test_collection_yaml_with_colon_in_tags(self, tmp_path): + """apm.yml for collection packages must be valid when tags contain a colon.""" + import yaml + + collection_manifest = ( + b"id: tagged-collection\n" + b"name: Tagged\n" + b"description: Normal description\n" + b"tags:\n" + b" - 'scope: engineering'\n" + b" - plain-tag\n" + b"items:\n" + b" - path: agents/my-agent.agent.md\n" + b" kind: agent\n" + ) + agent_file = b"---\nname: My Agent\n---\n## Body\n" + + dep_ref = self._make_collection_dep_ref("collections/tagged-collection") + target_path = tmp_path / "pkg" + + downloader = GitHubPackageDownloader() + + def _fake_download(dep_ref_arg, path, ref): + if "collection" in path: + return collection_manifest + return agent_file + + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + with patch.object(downloader, "download_raw_file", side_effect=_fake_download): + downloader.download_collection_package(dep_ref, target_path) + + content = (target_path / "apm.yml").read_text(encoding="utf-8") + parsed = yaml.safe_load(content) + + assert parsed["tags"] == ["scope: engineering", "plain-tag"] + + # --------------------------------------------------------------------------- # Generic host (Gitea / GitLab) download tests # --------------------------------------------------------------------------- diff --git a/tests/unit/test_generic_git_urls.py b/tests/unit/test_generic_git_urls.py index 61246f871..8a9538661 100644 --- a/tests/unit/test_generic_git_urls.py +++ b/tests/unit/test_generic_git_urls.py @@ -783,6 +783,121 @@ def test_https_nested_group_with_virtual_ext_rejected(self): DependencyReference.parse("https://gitlab.com/group/subgroup/file.prompt.md") +class TestSCPPortDetection: + """Detect port-like first path segment in SCP shorthand (git@host:port/path). + + SCP shorthand uses ':' as the path separator and cannot carry a port. + When the first path segment is a valid TCP port (1-65535), APM should + raise a ValueError with an actionable suggestion to use ssh:// instead. + """ + + def test_scp_with_port_7999_raises(self): + """Bitbucket Datacenter: git@host:7999/project/repo.git.""" + with pytest.raises(ValueError, match="ssh://"): + DependencyReference.parse("git@bitbucket.example.com:7999/project/repo.git") + + def test_scp_with_port_22_raises(self): + """Default SSH port 22 should still be detected.""" + with pytest.raises(ValueError, match="ssh://"): + DependencyReference.parse("git@host.example.com:22/owner/repo.git") + + def test_scp_with_port_65535_raises(self): + """Max valid TCP port should trigger detection.""" + with pytest.raises(ValueError, match="ssh://"): + DependencyReference.parse("git@host.example.com:65535/owner/repo.git") + + def test_scp_with_port_1_raises(self): + """Min valid TCP port should trigger detection.""" + with pytest.raises(ValueError, match="ssh://"): + DependencyReference.parse("git@host.example.com:1/owner/repo.git") + + def test_scp_with_leading_zeros_raises(self): + """Leading zeros: 007999 -> int 7999, still a valid port.""" + with pytest.raises(ValueError, match="ssh://"): + DependencyReference.parse("git@host.example.com:007999/project/repo.git") + + def test_scp_port_only_no_path_raises(self): + """git@host:7999 with no repo path after the port.""" + with pytest.raises(ValueError, match="no repository path follows"): + DependencyReference.parse("git@host.example.com:7999") + + def test_scp_port_trailing_slash_no_path_raises(self): + """git@host:7999/ -- trailing slash but empty remaining path.""" + with pytest.raises(ValueError, match="no repository path follows"): + DependencyReference.parse("git@host.example.com:7999/") + + def test_scp_port_with_ref_raises_and_preserves_ref(self): + """Port-like segment with #ref should be caught; suggestion preserves the ref.""" + with pytest.raises( + ValueError, + match=r"ssh://git@host\.example\.com:7999/project/repo\.git#main", + ): + DependencyReference.parse("git@host.example.com:7999/project/repo.git#main") + + def test_scp_port_with_alias_raises_and_preserves_alias(self): + """Port-like segment with @alias should be caught; suggestion preserves the alias.""" + with pytest.raises( + ValueError, + match=r"ssh://git@host\.example\.com:7999/project/repo\.git@my-alias", + ): + DependencyReference.parse("git@host.example.com:7999/project/repo.git@my-alias") + + def test_scp_port_with_ref_and_alias_preserves_both(self): + """Suggestion should include both #ref and @alias when present.""" + with pytest.raises( + ValueError, + match=r"ssh://git@host\.example\.com:7999/project/repo\.git#v1\.0@my-alias", + ): + DependencyReference.parse("git@host.example.com:7999/project/repo.git#v1.0@my-alias") + + def test_suggestion_includes_git_suffix(self): + """When the user wrote .git, the suggestion should preserve it.""" + with pytest.raises( + ValueError, + match=r"ssh://git@host\.example\.com:7999/project/repo\.git", + ): + DependencyReference.parse("git@host.example.com:7999/project/repo.git") + + def test_suggestion_omits_git_suffix_when_absent(self): + """When the user omitted .git, the suggestion should not add it.""" + with pytest.raises(ValueError) as excinfo: + DependencyReference.parse("git@host.example.com:7999/project/repo") + msg = str(excinfo.value) + assert "ssh://git@host.example.com:7999/project/repo" in msg + assert not msg.endswith(".git") + + def test_port_zero_not_detected(self): + """Port 0 is invalid -- should NOT trigger port detection, parses as org name.""" + dep = DependencyReference.parse("git@host.example.com:0/repo") + assert dep.repo_url == "0/repo" + assert dep.port is None + + def test_port_out_of_range_not_detected(self): + """99999 > 65535 -- not a valid port, should NOT trigger port detection.""" + dep = DependencyReference.parse("git@host.example.com:99999/repo") + assert dep.repo_url == "99999/repo" + assert dep.port is None + + def test_normal_org_name_not_detected(self): + """Non-numeric org name should parse normally.""" + dep = DependencyReference.parse("git@gitlab.com:acme/repo.git") + assert dep.repo_url == "acme/repo" + assert dep.port is None + + def test_alphanumeric_first_segment_not_detected(self): + """'v2' is not purely numeric -- should parse normally.""" + dep = DependencyReference.parse("git@gitlab.com:v2/repo.git") + assert dep.repo_url == "v2/repo" + assert dep.port is None + + def test_ssh_protocol_with_port_still_works(self): + """ssh:// URL form with port must continue working (regression guard).""" + dep = DependencyReference.parse("ssh://git@bitbucket.example.com:7999/project/repo.git") + assert dep.host == "bitbucket.example.com" + assert dep.port == 7999 + assert dep.repo_url == "project/repo" + + class TestGiteaVirtualPackageDetection: """Gitea-specific virtual package detection -- supplements TestFQDNVirtualPaths and TestNestedGroupSupport with Gitea host fixtures and regression guards From 8b56ffaaac88265021b958998470e2958c00e04e Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Sat, 25 Apr 2026 13:38:43 +0530 Subject: [PATCH 07/14] Review concerns addressed --- CHANGELOG.md | 1 + src/apm_cli/utils/github_host.py | 17 ----------------- tests/unit/test_github_host.py | 23 ----------------------- 3 files changed, 1 insertion(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e3496861..55c1e9a7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - CI: add `APM Self-Check` to `ci.yml` for `apm audit --ci`, regeneration-drift validation, and `merge-gate.yml` `EXPECTED_CHECKS` coverage. (#885) +- Virtual package support for self-hosted Git services (Gitea, Gogs): `apm install` now resolves subdirectory packages and raw file downloads from generic Git hosts via raw URL and API version negotiation (v1/v3). GitLab nested-group paths (`group/subgroup/repo`) are treated as full repo URLs (dict form required for virtual packages). -- by @ganesanviji (#587) ### Changed diff --git a/src/apm_cli/utils/github_host.py b/src/apm_cli/utils/github_host.py index d821fc679..eda296bc4 100644 --- a/src/apm_cli/utils/github_host.py +++ b/src/apm_cli/utils/github_host.py @@ -29,23 +29,6 @@ def is_azure_devops_hostname(hostname: Optional[str]) -> bool: return False -def is_gitlab_hostname(hostname: Optional[str]) -> bool: - """Return True if hostname is GitLab (cloud or self-hosted). - - GitLab supports nested groups (group/subgroup/repo), so paths with - more than two segments should be treated as repo paths, not virtual - subdirectory packages. - - Accepts: - - gitlab.com - - Any hostname starting with 'gitlab.' (common self-hosted convention) - """ - if not hostname: - return False - h = hostname.lower() - return h == "gitlab.com" or h.startswith("gitlab.") - - def is_github_hostname(hostname: Optional[str]) -> bool: """Return True if hostname should be treated as GitHub (cloud or enterprise). diff --git a/tests/unit/test_github_host.py b/tests/unit/test_github_host.py index 59070b49b..f767ad477 100644 --- a/tests/unit/test_github_host.py +++ b/tests/unit/test_github_host.py @@ -71,29 +71,6 @@ def test_is_github_hostname_defaults(): assert not github_host.is_github_hostname("example.com") -def test_is_gitlab_hostname(): - """is_gitlab_hostname() matches gitlab.com and self-hosted gitlab.* instances. - - This drives _detect_virtual_package: GitLab supports nested groups so the - full path is kept as the repo URL. All other generic hosts (Gitea, - Bitbucket, etc.) use the owner/repo convention (2 base segments). - """ - # Cloud and conventional self-hosted GitLab - assert github_host.is_gitlab_hostname("gitlab.com") - assert github_host.is_gitlab_hostname("gitlab.mycompany.com") - assert github_host.is_gitlab_hostname("gitlab.internal") - assert github_host.is_gitlab_hostname("GITLAB.COM") # case-insensitive - - # Non-GitLab hosts must NOT match - assert not github_host.is_gitlab_hostname("github.com") - assert not github_host.is_gitlab_hostname("bitbucket.org") - assert not github_host.is_gitlab_hostname("gitea.myorg.com") - assert not github_host.is_gitlab_hostname("git.company.internal") - assert not github_host.is_gitlab_hostname("dev.azure.com") - assert not github_host.is_gitlab_hostname(None) - assert not github_host.is_gitlab_hostname("") - - def test_is_azure_devops_hostname(): """Test Azure DevOps hostname detection.""" # Valid Azure DevOps hosts From 77e2d0225e038e2249a7b9d9fbfa49a52fd4ad49 Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Wed, 6 May 2026 05:51:23 +0530 Subject: [PATCH 08/14] review changes addressed --- CHANGELOG.md | 11 ++++------- tests/test_github_downloader.py | 10 +++++----- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c90639107..78a52e95f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Virtual package support for self-hosted Git services (Gitea, Gogs): `apm install` now resolves subdirectory packages and raw file downloads from generic Git hosts via raw URL and API version negotiation (v1/v3). GitLab nested-group paths (`group/subgroup/repo`) are treated as full repo URLs (dict form required for virtual packages). -- by @ganesanviji (#587) + ### Fixed - Docs site auto-deploys again after bot-cut releases by correctly detecting tag-push context in `docs.yml`. (#953) @@ -49,14 +53,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `apm-primitives-architect` agent: reusable persona for designing and critiquing `.apm/` skill bundles. (#882) - `apm-triage-panel` skill: three-persona panel (DevX UX, Supply Chain Security, APM CEO; conditional OSS Growth Hacker) for issue triage producing single labelled-decision comment with structured JSON tail. Mirrors `apm-review-panel` orchestration model. (#915) - CI: add `APM Self-Check` to `ci.yml` for `apm audit --ci`, regeneration-drift validation, and `merge-gate.yml` `EXPECTED_CHECKS` coverage. (#885) -- Virtual package support for self-hosted Git services (Gitea, Gogs): `apm install` now resolves subdirectory packages and raw file downloads from generic Git hosts via raw URL and API version negotiation (v1/v3). GitLab nested-group paths (`group/subgroup/repo`) are treated as full repo URLs (dict form required for virtual packages). -- by @ganesanviji (#587) -- **Gemini CLI** as a supported APM target (`--target gemini`): auto-detects `.gemini/`, writes MCP config to `.gemini/settings.json`, and adds `apm runtime setup|remove gemini`. (#917) - Experimental `cowork` target for Microsoft 365 Copilot Cowork custom-skill deployment via OneDrive (`apm experimental enable cowork`; `apm install --target cowork --global`; persisted via `apm config set cowork-skills-dir`). (#913) -- `apm experimental` command group (`list` / `enable` / `disable` / `reset`) lets you opt into new behaviour before it graduates to default. Ships with the `verbose-version` flag. (#849) -- `apm audit --ci` now verifies hash integrity of locally deployed `.apm/` files so hand-edits and config drift fail CI instead of slipping through. (#887) -- `includes:` manifest field (`auto` or list) gives you explicit control over which local `.apm/` files are deployed; pair with `policy.manifest.require_explicit_includes` to block silent expansion. Audit raises an `includes-consent` advisory while you migrate. (#887) -- `apm-triage-panel` skill: three-persona issue triage panel (DevX UX, Supply Chain Security, APM CEO) emitting a single labelled-decision comment, mirroring `apm-review-panel`. (#915) -- `apm-primitives-architect` persona for designing and critiquing `.apm/` skill bundles, plus a `pr-description-skill` that enforces self-sufficient PR bodies (TL;DR/Problem/Approach/Implementation/Diagrams/Trade-offs/Benefits/Validation/How-to-test) with anchored citations and validated mermaid. (#882, #884) - New docs guide [`dev-only-primitives`](https://danielmeppiel.github.io/awd-cli/guides/dev-only-primitives/): canonical pattern for maintainer-only primitives that must not ride into your published bundle. (#949) - Maintainer tooling: PGS project-board sync workflow keeps issues in lockstep with labels/milestones; `APM Self-Check` CI job dogfoods `apm audit --ci` and regeneration-drift gates. (#919, #885) diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index 585c85215..92db489ad 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -1902,7 +1902,7 @@ def test_falls_back_to_api_v1_when_raw_returns_non_200(self): assert result == expected urls = [c[0][0] for c in mock_get.call_args_list] assert urls[0] == "https://gitea.myorg.com/owner/repo/raw/main/README.md" - assert "/api/v1/" in urls[1] + assert urlparse(urls[1]).path.startswith("/api/v1/") class TestGiteaGogsApiVersionNegotiation: @@ -1933,8 +1933,8 @@ def test_v1_falls_back_to_v3_for_generic_hosts(self): assert result == expected urls = [c[0][0] for c in mock_get.call_args_list] - assert "/api/v1/" in urls[1] - assert "/api/v3/" in urls[2] + assert urlparse(urls[1]).path.startswith("/api/v1/") + assert urlparse(urls[2]).path.startswith("/api/v3/") assert len(mock_get.call_args_list) == 3 def test_gitea_v1_succeeds_without_trying_v3(self): @@ -1950,7 +1950,7 @@ def test_gitea_v1_succeeds_without_trying_v3(self): assert result == expected urls = [c[0][0] for c in mock_get.call_args_list] - assert all("/api/v3/" not in u for u in urls) + assert not any(urlparse(u).path.startswith("/api/v3/") for u in urls) def test_all_api_versions_404_raises_runtime_error(self): """When every API version returns 404 for both refs, a clear error is raised.""" @@ -1975,7 +1975,7 @@ def test_github_com_uses_api_github_com_not_api_v4(self): assert result == expected url_called = mock_get.call_args_list[0][0][0] assert url_called.startswith("https://api.github.com/") - assert "/api/v4/" not in url_called + assert not urlparse(url_called).path.startswith("/api/v4/") if __name__ == '__main__': From 59308a9fd4931e4e0b04773dead8ffc2a0106b4f Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Wed, 6 May 2026 06:11:38 +0530 Subject: [PATCH 09/14] Merge branch 'main' of https://github.com/microsoft/apm into feat/genric-host-gitea-private Resolved conflicts: - CHANGELOG.md: keep #587 entry under [Unreleased], merge new [0.12.2] release - src/apm_cli/deps/github_downloader.py: fix _ssh_attempt_allowed to properly delegate to validation module (remove misplaced CDN/Gitea code from method body) - src/apm_cli/models/dependency/reference.py: use parts[1:] for generic hosts (supports nested groups on Gitea/Bitbucket) - tests/test_github_downloader.py: keep TestGiteaRawUrlDownload and TestGiteaGogsApiVersionNegotiation; restore TestRefExistsViaLsRemote Post-merge fixes: - src/apm_cli/deps/download_strategies.py: add Gitea raw URL + api/v1/v3 negotiation to download_github_file (replaces bare /api/v3 fallback) - tests/unit/test_generic_git_urls.py: update test_gitea_collections_path_is_virtual to use is_virtual_subdirectory() (is_virtual_collection was removed in #1094) --- src/apm_cli/deps/download_strategies.py | 100 ++++++++++++++++++------ tests/unit/test_generic_git_urls.py | 4 +- 2 files changed, 78 insertions(+), 26 deletions(-) diff --git a/src/apm_cli/deps/download_strategies.py b/src/apm_cli/deps/download_strategies.py index f30f0f827..751bf6823 100644 --- a/src/apm_cli/deps/download_strategies.py +++ b/src/apm_cli/deps/download_strategies.py @@ -634,14 +634,43 @@ def download_github_file( # All raw attempts failed -- fall through to API path which # handles private repos, rate-limit messaging, and SAML errors. + # --- Generic host: raw URL first, then API version negotiation --- + # For non-GitHub non-GHE hosts (Gitea, Gogs, self-hosted git), try the + # raw URL path first, then negotiate API versions v1 -> v3. + if host.lower() != "github.com" and not host.lower().endswith(".ghe.com"): + raw_url = f"https://{host}/{owner}/{repo}/raw/{ref}/{file_path}" + raw_headers: dict[str, str] = {} + if token: + raw_headers["Authorization"] = f"token {token}" + try: + response = self._host._resilient_get(raw_url, headers=raw_headers, timeout=30) + if response.status_code == 200: + if verbose_callback: + verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") + return response.content + except (requests.RequestException, OSError): + pass + # --- Contents API path (authenticated, enterprise, or raw fallback) --- - # Build GitHub API URL - format differs by host type + # Build API URL candidates - format differs by host type if host == "github.com": - api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + api_url_candidates = [ + f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + ] elif host.lower().endswith(".ghe.com"): - api_url = f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + api_url_candidates = [ + f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + ] else: - api_url = f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + # Generic host: negotiate Gitea/Gogs-style contents API versions. + # v1 is native Gitea/Gogs; v3 is a Gogs compatibility alias. + # GitLab uses /api/v4/projects/:id/repository/files (different shape) + # so it is not included -- GitLab support is limited to git-clone only. + api_url_candidates = [ + f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", + f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", + ] + api_url = api_url_candidates[0] # Set up authentication headers headers: dict[str, str] = { @@ -659,6 +688,24 @@ def download_github_file( return response.content except requests.exceptions.HTTPError as e: if e.response.status_code == 404: + # For generic hosts, try remaining API version candidates before ref fallback + for candidate_url in api_url_candidates[1:]: + try: + candidate_resp = self._host._resilient_get( + candidate_url, headers=headers, timeout=30 + ) + candidate_resp.raise_for_status() + if verbose_callback: + verbose_callback( + f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}" + ) + return candidate_resp.content + except requests.exceptions.HTTPError as ce: + if ce.response.status_code != 404: + raise RuntimeError( # noqa: B904 + f"Failed to download {file_path}: HTTP {ce.response.status_code}" + ) + # Try fallback branches if the specified ref fails if ref not in ["main", "master"]: raise RuntimeError( # noqa: B904 @@ -668,34 +715,39 @@ def download_github_file( # Try the other default branch fallback_ref = "master" if ref == "main" else "main" - # Build fallback API URL + # Build fallback URL candidates (same structure as primary) if host == "github.com": - fallback_url = ( + fallback_url_candidates = [ f"https://api.github.com/repos/{owner}/{repo}" f"/contents/{file_path}?ref={fallback_ref}" - ) + ] elif host.lower().endswith(".ghe.com"): - fallback_url = ( + fallback_url_candidates = [ f"https://api.{host}/repos/{owner}/{repo}" f"/contents/{file_path}?ref={fallback_ref}" - ) + ] else: - fallback_url = ( - f"https://{host}/api/v3/repos/{owner}/{repo}" - f"/contents/{file_path}?ref={fallback_ref}" - ) + fallback_url_candidates = [ + f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", + f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", + ] - try: - response = self._host._resilient_get(fallback_url, headers=headers, timeout=30) - response.raise_for_status() - if verbose_callback: - verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") - return response.content - except requests.exceptions.HTTPError: - raise RuntimeError( # noqa: B904 - f"File not found: {file_path} in {dep_ref.repo_url} " - f"(tried refs: {ref}, {fallback_ref})" - ) + for fallback_url in fallback_url_candidates: + try: + response = self._host._resilient_get(fallback_url, headers=headers, timeout=30) + response.raise_for_status() + if verbose_callback: + verbose_callback( + f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}" + ) + return response.content + except requests.exceptions.HTTPError: + pass + + raise RuntimeError( # noqa: B904 + f"File not found: {file_path} in {dep_ref.repo_url} " + f"(tried refs: {ref}, {fallback_ref})" + ) elif e.response.status_code in (401, 403): # Distinguish rate limiting from auth failure. is_rate_limit = False diff --git a/tests/unit/test_generic_git_urls.py b/tests/unit/test_generic_git_urls.py index 1e5827f02..3aaa60107 100644 --- a/tests/unit/test_generic_git_urls.py +++ b/tests/unit/test_generic_git_urls.py @@ -929,13 +929,13 @@ def test_gitea_virtual_file_extension(self): assert dep.is_virtual_file() is True def test_gitea_collections_path_is_virtual(self): - """Path with /collections/ on Gitea is detected as a virtual collection.""" + """Path with /collections/ on Gitea is detected as a virtual subdirectory package.""" dep = DependencyReference.parse("gitea.myorg.com/owner/repo/collections/security") assert dep.host == "gitea.myorg.com" assert dep.repo_url == "owner/repo" assert dep.virtual_path == "collections/security" assert dep.is_virtual is True - assert dep.is_virtual_collection() is True + assert dep.is_virtual_subdirectory() is True def test_dict_format_virtual_on_gitea(self): """Dict format with path= on Gitea host yields a virtual package.""" From c006e9a1e2a1c195e14fd8ca68704ada1b710a86 Mon Sep 17 00:00:00 2001 From: danielmeppiel Date: Fri, 8 May 2026 21:58:45 +0200 Subject: [PATCH 10/14] style: apply ruff format to fix CI lint Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/apm_cli/deps/download_strategies.py | 4 +++- tests/test_github_downloader.py | 21 +++++++++++---------- tests/unit/test_generic_git_urls.py | 10 ++++++---- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/apm_cli/deps/download_strategies.py b/src/apm_cli/deps/download_strategies.py index 751bf6823..a50507db9 100644 --- a/src/apm_cli/deps/download_strategies.py +++ b/src/apm_cli/deps/download_strategies.py @@ -734,7 +734,9 @@ def download_github_file( for fallback_url in fallback_url_candidates: try: - response = self._host._resilient_get(fallback_url, headers=headers, timeout=30) + response = self._host._resilient_get( + fallback_url, headers=headers, timeout=30 + ) response.raise_for_status() if verbose_callback: verbose_callback( diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index ecaa18c3a..aeb7139bc 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -2062,15 +2062,14 @@ def _always_fail(*args, **kwargs): # Generic host (Gitea / GitLab) download tests # --------------------------------------------------------------------------- + def _make_resp(status_code: int, content: bytes = b"") -> Mock: """Build a minimal mock requests.Response.""" resp = Mock() resp.status_code = status_code resp.content = content if status_code >= 400: - resp.raise_for_status = Mock( - side_effect=requests_lib.exceptions.HTTPError(response=resp) - ) + resp.raise_for_status = Mock(side_effect=requests_lib.exceptions.HTTPError(response=resp)) else: resp.raise_for_status = Mock() return resp @@ -2121,8 +2120,9 @@ def test_falls_back_to_api_v1_when_raw_returns_non_200(self): expected = b"file via API" with patch.object( - self.downloader, "_resilient_get", - side_effect=[_make_resp(404), _make_resp(200, expected)] + self.downloader, + "_resilient_get", + side_effect=[_make_resp(404), _make_resp(200, expected)], ) as mock_get: result = self.downloader.download_raw_file(dep_ref, "README.md", "main") @@ -2151,9 +2151,9 @@ def test_v1_falls_back_to_v3_for_generic_hosts(self): expected = b"gitea v3 file content" side_effects = [ - _make_resp(404), # raw URL - _make_resp(404), # v1 - _make_resp(200, expected), # v3 + _make_resp(404), # raw URL + _make_resp(404), # v1 + _make_resp(200, expected), # v3 ] with patch.object(self.downloader, "_resilient_get", side_effect=side_effects) as mock_get: result = self.downloader.download_raw_file(dep_ref, "skill.md", "main") @@ -2170,8 +2170,9 @@ def test_gitea_v1_succeeds_without_trying_v3(self): expected = b"gitea content" with patch.object( - self.downloader, "_resilient_get", - side_effect=[_make_resp(404), _make_resp(200, expected)] + self.downloader, + "_resilient_get", + side_effect=[_make_resp(404), _make_resp(200, expected)], ) as mock_get: result = self.downloader.download_raw_file(dep_ref, "file.md", "main") diff --git a/tests/unit/test_generic_git_urls.py b/tests/unit/test_generic_git_urls.py index a5753ea25..a0a3530df 100644 --- a/tests/unit/test_generic_git_urls.py +++ b/tests/unit/test_generic_git_urls.py @@ -953,10 +953,12 @@ def test_gitea_collections_path_is_virtual(self): def test_dict_format_virtual_on_gitea(self): """Dict format with path= on Gitea host yields a virtual package.""" - dep = DependencyReference.parse_from_dict({ - "git": "gitea.myorg.com/owner/repo", - "path": "prompts/review.prompt.md", - }) + dep = DependencyReference.parse_from_dict( + { + "git": "gitea.myorg.com/owner/repo", + "path": "prompts/review.prompt.md", + } + ) assert dep.host == "gitea.myorg.com" assert dep.repo_url == "owner/repo" assert dep.virtual_path == "prompts/review.prompt.md" From 13fd8cdec78d38b8949f5cff761550674ef0c7e7 Mon Sep 17 00:00:00 2001 From: Daniel Meppiel Date: Fri, 8 May 2026 22:25:36 +0200 Subject: [PATCH 11/14] fix(deps): guard token forwarding to non-GitHub hosts; decode Gitea API envelope Address apm-review-panel needs_rework findings on PR #587: Security (BLOCKING): - Stop forwarding env-var bearer tokens (GITHUB_APM_PAT, GITHUB_TOKEN, GH_TOKEN) as Authorization headers to arbitrary non-GitHub hosts. Tokens are forwarded to a generic host only when one of: (a) source is git's host-scoped credential helper, (b) source is a per-host GITHUB_APM_PAT_ env var (explicit opt-in), or (c) the host matches a non-empty GITHUB_HOST env var (declared GHES). Bare GITHUB_APM_PAT against an arbitrary FQDN no longer leaks. Correctness: - Decode Gitea/Gogs Contents API JSON envelope ({"content": , "encoding": "base64"}) instead of returning the JSON bytes as file content. - Re-raise non-404 RequestException in the ref-fallback API loop (was silently swallowed; primary loop already re-raised). Refactor: - Extract _build_contents_api_urls, _build_generic_host_auth_headers, _extract_contents_api_payload, _build_unsupported_or_missing_error. - Use is_github_hostname() for host-class checks; extend with GITHUB_HOST env var for custom-domain GHES. - 401/403 phrasing now host-class-aware (rate-limit / SAML hints only on GitHub family). Tests: - Remove vicious test test_raw_url_with_token_adds_auth_header that pinned the PAT-leak as desired behavior. - Add 16 e2e tests covering: env-var token does NOT leak to gitea host, GitHub still gets token, ghe.com still gets token, git-credential-fill still authenticates, GITHUB_HOST opt-in, Gitea JSON envelope decode, raw bytes passthrough, v1->v3 API negotiation, fallback loop reraise on non-404 (both primary + ref-fallback), descriptive unsupported-host error, verbose logs. Docs: - README, docs/guides/dependencies, packages/apm-guide dependencies reference: name Gitea/Gogs explicitly; note GitLab nested-group repos still require object form for virtual paths. - CHANGELOG: tighten [Unreleased] entry to one user-facing line. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 18 +- README.md | 2 +- docs/src/content/docs/guides/dependencies.md | 2 +- .../.apm/skills/apm-usage/dependencies.md | 2 + src/apm_cli/deps/download_strategies.py | 287 ++++++++++++---- tests/test_github_downloader.py | 319 ++++++++++++++++-- 6 files changed, 531 insertions(+), 99 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f1a4775d..4dfbc99ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- Virtual package support for self-hosted Git services (Gitea, Gogs): `apm install` now resolves subdirectory packages and raw file downloads from generic Git hosts via raw URL and API version negotiation (v1/v3). GitLab nested-group paths (`group/subgroup/repo`) are treated as full repo URLs (dict form required for virtual packages). -- by @ganesanviji (#587) +- Virtual subdirectory and raw-file packages now resolve from self-hosted Git services (Gitea, Gogs) via raw URL with API v1/v3 fallback. (#587) - `shared/apm.md` gh-aw shared workflow exposes a `target:` import input (default `all`) so consumer workflows can ship slim, single-harness bundles instead of always packing every layout. (#1184) ### Fixed @@ -234,17 +234,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- **Gemini CLI** as a supported APM target (`--target gemini`). APM auto-detects `.gemini/` directories and writes MCP server configuration to `.gemini/settings.json`. Includes `apm runtime setup gemini` / `apm runtime remove gemini` support. (#917) -- New `pr-description-skill` skill bundle: enforces a 10-section PR body shape (TL;DR / Problem / Approach / Implementation / Diagrams / Trade-offs / Benefits / Validation / How to test, plus the `Co-authored-by` trailer) with a cite-or-omit rule for every WHY-claim, GFM-rendered output, ASCII-only template source, and validated mermaid diagrams. Captures the meta-pattern from PR #882 as a reusable scaffold so future PR bodies meet the same bar without per-PR specialist subagent intervention. (#884) -- `apm experimental` command group -- a feature-flag registry with `list` / `enable` / `disable` / `reset` subcommands. Opt in to new behaviour before it graduates to default. Ships with one built-in flag (`verbose-version`) and a contributor recipe for proposing new flags. (#849) -- `includes:` manifest field (auto | list) for explicit governance of local `.apm/` content. Closes audit-blindness gap (#887). -- `apm audit --ci` now verifies hash integrity of locally deployed files, detecting hand-edits and config drift. (#887) -- `policy.manifest.require_explicit_includes` policy field enforces explicit `includes` lists (rejects `auto` + undeclared). (#887) -- `includes-consent` advisory appears in `apm audit` CLI/JSON output when local content is deployed without an explicit `includes:` declaration (#887) -- `apm-primitives-architect` agent: reusable persona for designing and critiquing `.apm/` skill bundles. (#882) -- `apm-triage-panel` skill: three-persona panel (DevX UX, Supply Chain Security, APM CEO; conditional OSS Growth Hacker) for issue triage producing single labelled-decision comment with structured JSON tail. Mirrors `apm-review-panel` orchestration model. (#915) -- CI: add `APM Self-Check` to `ci.yml` for `apm audit --ci`, regeneration-drift validation, and `merge-gate.yml` `EXPECTED_CHECKS` coverage. (#885) +- **Gemini CLI** as a supported APM target (`--target gemini`): auto-detects `.gemini/`, writes MCP config to `.gemini/settings.json`, and adds `apm runtime setup|remove gemini`. (#917) - Experimental `cowork` target for Microsoft 365 Copilot Cowork custom-skill deployment via OneDrive (`apm experimental enable cowork`; `apm install --target cowork --global`; persisted via `apm config set cowork-skills-dir`). (#913) +- `apm experimental` command group (`list` / `enable` / `disable` / `reset`) lets you opt into new behaviour before it graduates to default. Ships with the `verbose-version` flag. (#849) +- `apm audit --ci` now verifies hash integrity of locally deployed `.apm/` files so hand-edits and config drift fail CI instead of slipping through. (#887) +- `includes:` manifest field (`auto` or list) gives you explicit control over which local `.apm/` files are deployed; pair with `policy.manifest.require_explicit_includes` to block silent expansion. Audit raises an `includes-consent` advisory while you migrate. (#887) +- `apm-triage-panel` skill: three-persona issue triage panel (DevX UX, Supply Chain Security, APM CEO) emitting a single labelled-decision comment, mirroring `apm-review-panel`. (#915) +- `apm-primitives-architect` persona for designing and critiquing `.apm/` skill bundles, plus a `pr-description-skill` that enforces self-sufficient PR bodies (TL;DR/Problem/Approach/Implementation/Diagrams/Trade-offs/Benefits/Validation/How-to-test) with anchored citations and validated mermaid. (#882, #884) - New docs guide [`dev-only-primitives`](https://danielmeppiel.github.io/awd-cli/guides/dev-only-primitives/): canonical pattern for maintainer-only primitives that must not ride into your published bundle. (#949) - Maintainer tooling: PGS project-board sync workflow keeps issues in lockstep with labels/milestones; `APM Self-Check` CI job dogfoods `apm audit --ci` and regeneration-drift gates. (#919, #885) diff --git a/README.md b/README.md index a3d934993..394682122 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ One command, no configuration -- VS Code and GitHub Copilot read the file automa One `apm.yml` describes every primitive your agents need — instructions, skills, prompts, agents, hooks, plugins, MCP servers — and `apm install` reproduces the exact same setup across every client on every machine. `apm.lock.yaml` pins the resolved tree the way `package-lock.json` does for npm. - **[One manifest for everything](https://microsoft.github.io/apm/reference/primitive-types/)** — declared once, deployed across Copilot, Claude, Cursor, OpenCode, Codex, Gemini, Windsurf -- **[Install from anywhere](https://microsoft.github.io/apm/guides/dependencies/)** — GitHub, GitLab, Bitbucket, Azure DevOps, GitHub Enterprise, any git host +- **[Install from anywhere](https://microsoft.github.io/apm/guides/dependencies/)** — GitHub, GitLab, Bitbucket, Azure DevOps, GitHub Enterprise, Gitea, Gogs, any git host - **[Transitive dependencies](https://microsoft.github.io/apm/guides/dependencies/)** — packages can depend on packages; APM resolves the full tree - **[Author plugins](https://microsoft.github.io/apm/guides/plugins/)** — build Copilot, Claude, and Cursor plugins with dependency management, then export standard `plugin.json` - **[Marketplaces](https://microsoft.github.io/apm/guides/marketplaces/)** — install plugins from curated registries in one command, deployed across all targets and locked diff --git a/docs/src/content/docs/guides/dependencies.md b/docs/src/content/docs/guides/dependencies.md index 7090e6e7c..5fda882d9 100644 --- a/docs/src/content/docs/guides/dependencies.md +++ b/docs/src/content/docs/guides/dependencies.md @@ -15,7 +15,7 @@ APM dependencies are git repositories containing `.apm/` directories with contex - **Build on tested context** instead of starting from scratch - **Maintain consistency** across multiple repositories and teams -APM supports any git-accessible host — GitHub, GitLab, Bitbucket, self-hosted instances, and more. +APM supports any git-accessible host — GitHub, GitLab, Bitbucket, Gitea, Gogs, self-hosted instances, and more. For self-hosted Gitea/Gogs, virtual subdirectory and raw-file packages resolve via the `/{owner}/{repo}/raw/{ref}/{path}` URL with a Contents API v1/v3 fallback. GitLab nested-group repos require the object form (see below). ## Dependency Types diff --git a/packages/apm-guide/.apm/skills/apm-usage/dependencies.md b/packages/apm-guide/.apm/skills/apm-usage/dependencies.md index 75fd09a98..edaf937ac 100644 --- a/packages/apm-guide/.apm/skills/apm-usage/dependencies.md +++ b/packages/apm-guide/.apm/skills/apm-usage/dependencies.md @@ -129,6 +129,8 @@ Virtual packages reference a subset of a repository. Classification is by extension only. A path like `owner/repo/collections/security` (no extension) is a Subdirectory; the actual shape -- APM package (incl. dep-only `apm.yml` with no `.apm/`), skill bundle, or plugin -- is resolved at fetch time by probing for `apm.yml`. +**Self-hosted Git hosts (Gitea, Gogs):** virtual packages resolve via the host's `/{owner}/{repo}/raw/{ref}/{path}` URL with a Contents API v1/v3 fallback. GitLab nested-group repos (`group/subgroup/repo`) require the object form (`git: `, `path: `) -- shorthand is ambiguous on >2-segment paths. + > **Removed (#1094):** the legacy `.collection.yml` / `.collection.yaml` virtual-package form is no longer supported. Convert any `.collection.yml` to an `apm.yml` with a `dependencies:` section, then reference the resulting subdirectory as a regular subdirectory virtual package. ## Canonical storage rules diff --git a/src/apm_cli/deps/download_strategies.py b/src/apm_cli/deps/download_strategies.py index a50507db9..7e32a97bc 100644 --- a/src/apm_cli/deps/download_strategies.py +++ b/src/apm_cli/deps/download_strategies.py @@ -7,6 +7,8 @@ operations to it (Facade/Delegate pattern). """ +import base64 +import json import os import random import sys @@ -637,60 +639,62 @@ def download_github_file( # --- Generic host: raw URL first, then API version negotiation --- # For non-GitHub non-GHE hosts (Gitea, Gogs, self-hosted git), try the # raw URL path first, then negotiate API versions v1 -> v3. - if host.lower() != "github.com" and not host.lower().endswith(".ghe.com"): + is_github_host = is_github_hostname(host) or ( + os.environ.get("GITHUB_HOST", "").strip().lower() == (host or "").lower() + and bool(os.environ.get("GITHUB_HOST", "").strip()) + ) + if not is_github_host: raw_url = f"https://{host}/{owner}/{repo}/raw/{ref}/{file_path}" - raw_headers: dict[str, str] = {} - if token: - raw_headers["Authorization"] = f"token {token}" + raw_headers = self._build_generic_host_auth_headers(host, file_ctx, accept=None) + if verbose_callback: + verbose_callback(f"Trying raw URL on generic host {host}: {raw_url}") try: response = self._host._resilient_get(raw_url, headers=raw_headers, timeout=30) if response.status_code == 200: if verbose_callback: verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") return response.content - except (requests.RequestException, OSError): - pass + except (requests.RequestException, OSError) as raw_err: + if verbose_callback: + verbose_callback( + f"Raw URL on {host} failed: {type(raw_err).__name__}; " + f"falling back to Contents API." + ) # --- Contents API path (authenticated, enterprise, or raw fallback) --- # Build API URL candidates - format differs by host type - if host == "github.com": - api_url_candidates = [ - f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" - ] - elif host.lower().endswith(".ghe.com"): - api_url_candidates = [ - f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" - ] - else: - # Generic host: negotiate Gitea/Gogs-style contents API versions. - # v1 is native Gitea/Gogs; v3 is a Gogs compatibility alias. - # GitLab uses /api/v4/projects/:id/repository/files (different shape) - # so it is not included -- GitLab support is limited to git-clone only. - api_url_candidates = [ - f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", - f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", - ] + api_url_candidates = self._build_contents_api_urls(host, owner, repo, file_path, ref) api_url = api_url_candidates[0] # Set up authentication headers - headers: dict[str, str] = { - "Accept": "application/vnd.github.v3.raw" # Returns raw content - } - if token: - headers["Authorization"] = f"token {token}" + # GitHub family: use GitHub raw-media accept header. Generic hosts + # ignore it and may return JSON envelopes -- handle that on read. + accept = "application/vnd.github.v3.raw" if is_github_host else "application/json" + if is_github_host: + headers: dict[str, str] = {"Accept": accept} + if token: + headers["Authorization"] = f"token {token}" + else: + headers = self._build_generic_host_auth_headers(host, file_ctx, accept=accept) # Try to download with the specified ref try: + if verbose_callback and not is_github_host: + verbose_callback(f"Trying Contents API on {host}: {api_url}") response = self._host._resilient_get(api_url, headers=headers, timeout=30) response.raise_for_status() if verbose_callback: verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") - return response.content + return self._extract_contents_api_payload(response, is_github_host) except requests.exceptions.HTTPError as e: if e.response.status_code == 404: # For generic hosts, try remaining API version candidates before ref fallback for candidate_url in api_url_candidates[1:]: try: + if verbose_callback: + verbose_callback( + f"Contents API 404; trying next candidate: {candidate_url}" + ) candidate_resp = self._host._resilient_get( candidate_url, headers=headers, timeout=30 ) @@ -699,7 +703,7 @@ def download_github_file( verbose_callback( f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}" ) - return candidate_resp.content + return self._extract_contents_api_payload(candidate_resp, is_github_host) except requests.exceptions.HTTPError as ce: if ce.response.status_code != 404: raise RuntimeError( # noqa: B904 @@ -709,28 +713,21 @@ def download_github_file( # Try fallback branches if the specified ref fails if ref not in ["main", "master"]: raise RuntimeError( # noqa: B904 - f"File not found: {file_path} at ref '{ref}' in {dep_ref.repo_url}" + self._build_unsupported_or_missing_error( + host, + dep_ref.repo_url, + file_path, + ref, + api_url_candidates, + is_github_host=is_github_host, + ) ) # Try the other default branch fallback_ref = "master" if ref == "main" else "main" - - # Build fallback URL candidates (same structure as primary) - if host == "github.com": - fallback_url_candidates = [ - f"https://api.github.com/repos/{owner}/{repo}" - f"/contents/{file_path}?ref={fallback_ref}" - ] - elif host.lower().endswith(".ghe.com"): - fallback_url_candidates = [ - f"https://api.{host}/repos/{owner}/{repo}" - f"/contents/{file_path}?ref={fallback_ref}" - ] - else: - fallback_url_candidates = [ - f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", - f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", - ] + fallback_url_candidates = self._build_contents_api_urls( + host, owner, repo, file_path, fallback_ref + ) for fallback_url in fallback_url_candidates: try: @@ -742,23 +739,36 @@ def download_github_file( verbose_callback( f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}" ) - return response.content - except requests.exceptions.HTTPError: - pass + return self._extract_contents_api_payload(response, is_github_host) + except requests.exceptions.HTTPError as fe: + if fe.response.status_code != 404: + raise RuntimeError( # noqa: B904 + f"Failed to download {file_path}: HTTP {fe.response.status_code}" + ) raise RuntimeError( # noqa: B904 - f"File not found: {file_path} in {dep_ref.repo_url} " - f"(tried refs: {ref}, {fallback_ref})" + self._build_unsupported_or_missing_error( + host, + dep_ref.repo_url, + file_path, + ref, + api_url_candidates, + is_github_host=is_github_host, + fallback_ref=fallback_ref, + ) ) elif e.response.status_code in (401, 403): # Distinguish rate limiting from auth failure. + # X-RateLimit-* headers are GitHub-specific; treat as + # rate-limit only when the host is in the GitHub family. is_rate_limit = False - try: - rl_remaining = e.response.headers.get("X-RateLimit-Remaining") - if rl_remaining is not None and int(rl_remaining) == 0: - is_rate_limit = True - except (TypeError, ValueError): - pass + if is_github_host: + try: + rl_remaining = e.response.headers.get("X-RateLimit-Remaining") + if rl_remaining is not None and int(rl_remaining) == 0: + is_rate_limit = True + except (TypeError, ValueError): + pass if is_rate_limit: error_msg = f"GitHub API rate limit exceeded for {dep_ref.repo_url}. " @@ -782,9 +792,9 @@ def download_github_file( ) raise RuntimeError(error_msg) # noqa: B904 - # Token may lack SSO/SAML authorization for this org. # Retry without auth -- the repo might be public. - if token and not host.lower().endswith(".ghe.com"): + # GHES/GHE-DR don't support unauthenticated org-scoped retries. + if token and is_github_host and not host.lower().endswith(".ghe.com"): try: unauth_headers: dict[str, str] = {"Accept": "application/vnd.github.v3.raw"} response = self._host._resilient_get( @@ -795,7 +805,7 @@ def download_github_file( verbose_callback( f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}" ) - return response.content + return self._extract_contents_api_payload(response, is_github_host) except requests.exceptions.HTTPError: pass # Fall through to the original error @@ -811,17 +821,166 @@ def download_github_file( port=dep_ref.port if dep_ref else None, dep_url=dep_ref.repo_url if dep_ref else None, ) - elif token and not host.lower().endswith(".ghe.com"): + elif is_github_host and not host.lower().endswith(".ghe.com"): error_msg += ( "Both authenticated and unauthenticated access " "were attempted. The repository may be private, " "or your token may lack SSO/SAML authorization " "for this organization." ) - else: + elif is_github_host: error_msg += "Please check your GitHub token permissions." + else: + # Generic host: don't claim SSO/SAML or "GitHub token". + error_msg += ( + f"Host {host} rejected the request. " + "Verify the repository exists and that any token " + "configured via 'git credential' has access." + ) raise RuntimeError(error_msg) # noqa: B904 else: raise RuntimeError(f"Failed to download {file_path}: HTTP {e.response.status_code}") # noqa: B904 except requests.exceptions.RequestException as e: raise RuntimeError(f"Network error downloading {file_path}: {e}") # noqa: B904 + + # ------------------------------------------------------------------ + # Helpers for download_github_file + # ------------------------------------------------------------------ + + @staticmethod + def _build_contents_api_urls( + host: str, owner: str, repo: str, file_path: str, ref: str + ) -> list[str]: + """Return the ordered list of Contents-API URL candidates for *host*. + + - github.com -> single api.github.com candidate + - *.ghe.com (GHE Cloud / GHE Data Residency) -> single api. candidate + - generic host -> Gitea-native /api/v1/ then Gogs-compat /api/v3/ + + GitLab uses /api/v4/projects/:id/repository/files/... which has a + different shape; it is intentionally NOT included. GitLab support + is limited to git-clone operations. + """ + if is_github_hostname(host): + if host.lower() == "github.com": + return [ + f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + ] + return [f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}"] + return [ + f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", + f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", + ] + + @staticmethod + def _build_generic_host_auth_headers( + host: str, auth_ctx, *, accept: str | None = None + ) -> dict[str, str]: + """Build HTTP headers for a generic-host (non-GitHub) request. + + SECURITY GUARD: Only attach Authorization when the token is + unambiguously intended for this host. A token resolved from a + global env var (GITHUB_APM_PAT, GITHUB_TOKEN, GH_TOKEN) MUST NOT + be sent to an arbitrary non-GitHub host -- doing so leaks the + user's GitHub PAT to whatever FQDN is in the dependency line. + The clone path at ``get_clone_url`` already enforces the same + guard via ``is_github_hostname``; this mirrors it for HTTP file + downloads. + + Forwarding is allowed when: + - source == ``git-credential-fill``: git's credential helper + looks tokens up by host, so they are host-scoped by + construction. + - source == ``GITHUB_APM_PAT_``: per-org env var is + explicit user opt-in for that org's host. + - the user opted into this host as their GitHub Enterprise + Server via ``GITHUB_HOST=``: the token is intended for + this host, even if the FQDN is not under ``*.ghe.com``. + """ + headers: dict[str, str] = {} + if accept: + headers["Accept"] = accept + if auth_ctx is None or not getattr(auth_ctx, "token", None): + return headers + source = getattr(auth_ctx, "source", None) or "" + configured_host = os.environ.get("GITHUB_HOST", "").strip().lower() + host_lower = (host or "").lower() + host_scoped = source == "git-credential-fill" + org_scoped = source.startswith("GITHUB_APM_PAT_") + configured_ghes = bool(configured_host) and host_lower == configured_host + if host_scoped or org_scoped or configured_ghes: + headers["Authorization"] = f"token {auth_ctx.token}" + return headers + + @staticmethod + def _extract_contents_api_payload(response, is_github_host: bool) -> bytes: + """Decode a Contents-API response into raw file bytes. + + - GitHub family: ``Accept: application/vnd.github.v3.raw`` returns + the file bytes directly; pass through ``response.content``. + - Generic hosts (Gitea, Gogs): the raw-media accept header is + ignored and the server returns a JSON envelope of the form:: + + {"content": "", "encoding": "base64", ...} + + Decode ``content`` as base64 and return the resulting bytes. + Some Gitea installations also emit ``encoding: ""`` with raw + content -- pass that through unchanged. If the response is not + a JSON envelope at all (custom proxy, raw bytes), fall back to + ``response.content``. + """ + if is_github_host: + return response.content + + body = response.content + try: + ctype = str((response.headers or {}).get("Content-Type") or "").lower() + except (AttributeError, TypeError): + ctype = "" + if "json" not in ctype and not ( + isinstance(body, (bytes, bytearray)) and body.lstrip().startswith(b"{") + ): + return body + try: + payload = json.loads(body.decode("utf-8")) + except (ValueError, UnicodeDecodeError, AttributeError): + return body + if not isinstance(payload, dict) or "content" not in payload: + return body + encoding = (payload.get("encoding") or "").lower() + content_field = payload.get("content") or "" + if encoding == "base64": + try: + return base64.b64decode(content_field, validate=False) + except (ValueError, TypeError): + return body + # Non-base64 envelope (rare): return literal content if it's a string, + # otherwise fall back to the raw body. + if isinstance(content_field, str): + return content_field.encode("utf-8") + return body + + @staticmethod + def _build_unsupported_or_missing_error( + host: str, + repo_url: str, + file_path: str, + ref: str, + api_url_candidates: list[str], + *, + is_github_host: bool, + fallback_ref: str | None = None, + ) -> str: + """Build a discoverable error when no Contents-API candidate hits 200.""" + ref_part = f"(tried refs: {ref}, {fallback_ref})" if fallback_ref else f"at ref '{ref}'" + if is_github_host: + return f"File not found: {file_path} in {repo_url} {ref_part}" + # Non-GitHub host: name what was tried so users can diagnose + # GitLab / unsupported-host cases without re-reading source. + tried = ", ".join(["raw"] + [u.split("/api/")[1].split("/")[0] for u in api_url_candidates]) + return ( + f"File not found on generic host {host}: {file_path} in " + f"{repo_url} {ref_part}. Tried URL families: {tried}. " + "If this is GitLab, virtual subdirectory packages are not " + "supported (use the dict-form full repo URL instead)." + ) diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index aeb7139bc..9d7b6d1df 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -1431,14 +1431,22 @@ def test_credential_fill_for_non_default_host(self): assert actual_headers.get("Authorization") == "token enterprise-token" def test_non_default_host_uses_global_token(self): - """Global env vars (GITHUB_APM_PAT) are now tried for all hosts, not just the default.""" + """Global env vars (GITHUB_APM_PAT) must NOT leak to an arbitrary non-GitHub host. + + SECURITY: forwarding a GitHub PAT to ``ghes.company.com`` (or any + other FQDN) without explicit user opt-in exfiltrates the token. + The user must opt in via ``GITHUB_HOST=`` (declares the + host as their GitHub Enterprise Server) or via a per-org env var + ``GITHUB_APM_PAT_`` -- bare ``GITHUB_APM_PAT`` against a + custom-domain host gets no Authorization header. + """ with ( patch.dict(os.environ, {"GITHUB_APM_PAT": "default-host-pat"}, clear=True), patch( "apm_cli.core.token_manager.GitHubTokenManager.resolve_credential_from_git", - ) as mock_cred, + return_value=None, + ), ): - mock_cred.return_value = "enterprise-cred" downloader = GitHubPackageDownloader() assert downloader.github_token == "default-host-pat" @@ -1451,19 +1459,59 @@ def test_non_default_host_uses_global_token(self): mock_response_200.status_code = 200 mock_response_200.content = b"enterprise content" mock_response_200.raise_for_status = Mock() + mock_response_200.headers = {} with patch.object( downloader, "_resilient_get", return_value=mock_response_200 ) as mock_get: + # The raw-URL path runs first; mock returns 200 immediately. result = downloader._download_github_file(dep_ref, "SKILL.md", "main") assert result == b"enterprise content" - actual_headers = mock_get.call_args[1].get("headers") or mock_get.call_args[0][1] - # Global PAT is now used for non-default hosts too - assert actual_headers.get("Authorization") == "token default-host-pat" + for call in mock_get.call_args_list: + req_headers = call[1].get("headers", {}) or {} + assert "Authorization" not in req_headers, ( + f"PAT leaked to {call[0][0]} without GITHUB_HOST opt-in: {req_headers!r}" + ) - # Credential fill is not reached because the global env var is found first - mock_cred.assert_not_called() + def test_global_token_forwarded_when_github_host_is_configured(self): + """When ``GITHUB_HOST=`` is set, the global PAT IS forwarded. + + This is the explicit user opt-in: declaring a custom domain as + their GitHub Enterprise Server. The complement to + ``test_non_default_host_uses_global_token``. + """ + with ( + patch.dict( + os.environ, + {"GITHUB_APM_PAT": "ghes-pat", "GITHUB_HOST": "ghes.company.com"}, + clear=True, + ), + patch( + "apm_cli.core.token_manager.GitHubTokenManager.resolve_credential_from_git", + return_value=None, + ), + ): + downloader = GitHubPackageDownloader() + dep_ref = DependencyReference( + repo_url="owner/repo", + host="ghes.company.com", + ) + + mock_response_200 = Mock() + mock_response_200.status_code = 200 + mock_response_200.content = b"enterprise content" + mock_response_200.raise_for_status = Mock() + mock_response_200.headers = {} + + with patch.object( + downloader, "_resilient_get", return_value=mock_response_200 + ) as mock_get: + result = downloader._download_github_file(dep_ref, "SKILL.md", "main") + assert result == b"enterprise content" + + first_call_headers = mock_get.call_args_list[0][1].get("headers", {}) + assert first_call_headers.get("Authorization") == "token ghes-pat" def test_error_message_mentions_gh_auth_login(self): """Error message should mention 'gh auth login' when no token is available.""" @@ -2063,11 +2111,25 @@ def _always_fail(*args, **kwargs): # --------------------------------------------------------------------------- -def _make_resp(status_code: int, content: bytes = b"") -> Mock: - """Build a minimal mock requests.Response.""" +def _make_resp( + status_code: int, + content: bytes = b"", + *, + content_type: str = "", + headers: dict | None = None, +) -> Mock: + """Build a minimal mock requests.Response. + + Set content_type='application/json' (or include 'json' in headers + Content-Type) when simulating a Gitea/Gogs JSON envelope. + """ resp = Mock() resp.status_code = status_code resp.content = content + hdrs = dict(headers or {}) + if content_type and "Content-Type" not in hdrs: + hdrs["Content-Type"] = content_type + resp.headers = hdrs if status_code >= 400: resp.raise_for_status = Mock(side_effect=requests_lib.exceptions.HTTPError(response=resp)) else: @@ -2075,6 +2137,20 @@ def _make_resp(status_code: int, content: bytes = b"") -> Mock: return resp +def _gitea_json_envelope(file_bytes: bytes) -> bytes: + """Encode *file_bytes* as a Gitea/Gogs Contents-API JSON envelope.""" + import base64 as _b64 + import json as _json + + return _json.dumps( + { + "name": "skill.md", + "encoding": "base64", + "content": _b64.b64encode(file_bytes).decode("ascii"), + } + ).encode("utf-8") + + class TestGiteaRawUrlDownload: """Gitea raw URL path: /{owner}/{repo}/raw/{ref}/{file}.""" @@ -2096,23 +2172,87 @@ def test_raw_url_succeeds_on_first_attempt(self): assert first_url == "https://gitea.myorg.com/owner/repo/raw/main/README.md" assert mock_get.call_count == 1 - def test_raw_url_with_token_adds_auth_header(self): - """Token is forwarded as Authorization header in the raw URL request. + def test_no_token_sent_to_non_github_host_via_env_var(self): + """SECURITY: GITHUB_APM_PAT MUST NOT leak to a non-GitHub host. - Token resolution is lazy, so the env patch must stay active for the - duration of the download call. + Regression trap for the PAT exfiltration vector. The clone path at + ``get_clone_url`` (download_strategies.py:262-279) only embeds a + token when ``is_github_hostname(host)``; the file-download path + must mirror that guard. """ - dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo") + dep_ref = DependencyReference.parse("gitea.evil.example.com/owner/repo") raw_ok = _make_resp(200, b"data") - with patch.dict(os.environ, {"GITHUB_APM_PAT": "gta-tok"}, clear=True): + with patch.dict( + os.environ, + { + "GITHUB_APM_PAT": "ghp_supersecret", + "GITHUB_TOKEN": "ghp_other", + }, + clear=True, + ): with _CRED_FILL_PATCH: downloader = GitHubPackageDownloader() with patch.object(downloader, "_resilient_get", return_value=raw_ok) as mock_get: downloader.download_raw_file(dep_ref, "README.md", "main") + # Inspect EVERY HTTP call made for this download. + for call in mock_get.call_args_list: + req_headers = call[1].get("headers", {}) or {} + assert "Authorization" not in req_headers, ( + f"PAT leaked to {call[0][0]}: headers={req_headers!r}" + ) + + def test_token_still_sent_when_host_is_github(self): + """github.com receives the Authorization header (regression trap).""" + dep_ref = DependencyReference.parse("owner/repo") # default host + api_ok = _make_resp(200, b"data") + + with patch.dict(os.environ, {"GITHUB_APM_PAT": "ghp_real_gh"}, clear=True): + with _CRED_FILL_PATCH: + downloader = GitHubPackageDownloader() + with patch.object(downloader, "_try_raw_download", return_value=None): + with patch.object(downloader, "_resilient_get", return_value=api_ok) as mock_get: + downloader.download_raw_file(dep_ref, "README.md", "main") + + api_headers = mock_get.call_args_list[0][1].get("headers", {}) + assert api_headers.get("Authorization") == "token ghp_real_gh" + + def test_token_still_sent_when_host_is_ghe(self): + """*.ghe.com (GHE Cloud / Data Residency) receives the token too.""" + dep_ref = DependencyReference.parse("acme.ghe.com/owner/repo") + api_ok = _make_resp(200, b"data") + + with patch.dict(os.environ, {"GITHUB_APM_PAT": "ghp_ghe"}, clear=True): + with _CRED_FILL_PATCH: + downloader = GitHubPackageDownloader() + with patch.object(downloader, "_resilient_get", return_value=api_ok) as mock_get: + downloader.download_raw_file(dep_ref, "README.md", "main") + + api_headers = mock_get.call_args_list[0][1].get("headers", {}) + assert api_headers.get("Authorization") == "token ghp_ghe" + + def test_git_credential_helper_token_sent_to_generic_host(self): + """Host-scoped credentials (git credential helper) ARE sent to generic hosts. + + The credential helper is host-scoped by construction, so forwarding + is safe and necessary for private Gitea/Gogs repos. This is the + symmetric case to the security guard test above. + """ + dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo") + raw_ok = _make_resp(200, b"data") + + with patch.dict(os.environ, {}, clear=True): + with patch( + "apm_cli.core.token_manager.GitHubTokenManager.resolve_credential_from_git", + return_value="gitea-host-scoped-token", + ): + downloader = GitHubPackageDownloader() + with patch.object(downloader, "_resilient_get", return_value=raw_ok) as mock_get: + downloader.download_raw_file(dep_ref, "README.md", "main") + raw_headers = mock_get.call_args_list[0][1].get("headers", {}) - assert "Authorization" in raw_headers + assert raw_headers.get("Authorization") == "token gitea-host-scoped-token" def test_falls_back_to_api_v1_when_raw_returns_non_200(self): """When the raw URL returns 404, the API v1 path is tried next.""" @@ -2131,6 +2271,30 @@ def test_falls_back_to_api_v1_when_raw_returns_non_200(self): assert urls[0] == "https://gitea.myorg.com/owner/repo/raw/main/README.md" assert urlparse(urls[1]).path.startswith("/api/v1/") + def test_raw_url_request_exception_falls_through_to_api(self): + """RequestException on the raw URL path must not abort -- API path runs. + + Regression trap for the ``except (RequestException, OSError)`` + swallow at the raw-URL try block. Previously this only had unit + coverage that pinned the swallow itself; this test exercises the + downstream "fallthrough must reach the API path" promise. + """ + dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo") + expected = b"recovered via api" + api_ok = _make_resp(200, expected) + + side_effects = [ + requests_lib.exceptions.ConnectionError("boom"), + api_ok, + ] + with patch.object(self.downloader, "_resilient_get", side_effect=side_effects) as mock_get: + result = self.downloader.download_raw_file(dep_ref, "README.md", "main") + + assert result == expected + urls = [c[0][0] for c in mock_get.call_args_list] + assert urls[0].endswith("/owner/repo/raw/main/README.md") + assert urlparse(urls[1]).path.startswith("/api/v1/") + class TestGiteaGogsApiVersionNegotiation: """API version negotiation: raw URL -> v1 -> v3 for Gitea/Gogs generic hosts. @@ -2149,11 +2313,16 @@ def test_v1_falls_back_to_v3_for_generic_hosts(self): """When Gitea raw URL and v1 both return 404, v3 is tried and succeeds.""" dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo") expected = b"gitea v3 file content" + envelope_resp = _make_resp( + 200, + _gitea_json_envelope(expected), + content_type="application/json", + ) side_effects = [ _make_resp(404), # raw URL _make_resp(404), # v1 - _make_resp(200, expected), # v3 + envelope_resp, # v3 ] with patch.object(self.downloader, "_resilient_get", side_effect=side_effects) as mock_get: result = self.downloader.download_raw_file(dep_ref, "skill.md", "main") @@ -2168,11 +2337,16 @@ def test_gitea_v1_succeeds_without_trying_v3(self): """When v1 returns 200, v3 must never be called.""" dep_ref = DependencyReference.parse("gitea.example.com/owner/repo") expected = b"gitea content" + envelope_resp = _make_resp( + 200, + _gitea_json_envelope(expected), + content_type="application/json", + ) with patch.object( self.downloader, "_resilient_get", - side_effect=[_make_resp(404), _make_resp(200, expected)], + side_effect=[_make_resp(404), envelope_resp], ) as mock_get: result = self.downloader.download_raw_file(dep_ref, "file.md", "main") @@ -2180,16 +2354,94 @@ def test_gitea_v1_succeeds_without_trying_v3(self): urls = [c[0][0] for c in mock_get.call_args_list] assert not any(urlparse(u).path.startswith("/api/v3/") for u in urls) - def test_all_api_versions_404_raises_runtime_error(self): - """When every API version returns 404 for both refs, a clear error is raised.""" + def test_gitea_api_decodes_json_envelope_into_file_bytes(self): + """API path returns Gitea ``{content,encoding}`` envelope -> decoded bytes.""" + dep_ref = DependencyReference.parse("gitea.example.com/owner/repo") + expected = b"# Hello from Gitea base64\n" + envelope_resp = _make_resp( + 200, + _gitea_json_envelope(expected), + content_type="application/json; charset=utf-8", + ) + + with patch.object( + self.downloader, + "_resilient_get", + side_effect=[_make_resp(404), envelope_resp], + ): + result = self.downloader.download_raw_file(dep_ref, "skill.md", "main") + + assert result == expected, "Gitea JSON envelope must be base64-decoded" + + def test_gitea_api_passthrough_when_server_returns_raw_bytes(self): + """Some Gitea proxies serve raw bytes; passthrough must still work.""" + dep_ref = DependencyReference.parse("gitea.example.com/owner/repo") + expected = b"raw markdown bytes" + # No JSON content-type; body is not a JSON envelope. + raw_resp = _make_resp(200, expected, content_type="text/plain") + + with patch.object( + self.downloader, + "_resilient_get", + side_effect=[_make_resp(404), raw_resp], + ): + result = self.downloader.download_raw_file(dep_ref, "skill.md", "main") + + assert result == expected + + def test_fallback_candidate_loop_reraises_non_404(self): + """500 on a candidate URL must surface as RuntimeError, not silent skip. + + Pins the symmetry-fix between the primary loop (already re-raised + non-404) and the fallback-ref loop (previously swallowed all + HTTPErrors via bare ``pass``). + """ + dep_ref = DependencyReference.parse("gitea.example.com/owner/repo") + + # raw=404, v1=404 (forces ref-fallback), v1@master=500 + side_effects = [ + _make_resp(404), # raw main + _make_resp(404), # v1 main + _make_resp(404), # v3 main + _make_resp(500), # v1 master -- must re-raise + ] + with patch.object(self.downloader, "_resilient_get", side_effect=side_effects): + with pytest.raises(RuntimeError, match=r"HTTP 500"): + self.downloader.download_raw_file(dep_ref, "missing.md", "main") + + def test_primary_candidate_loop_reraises_non_404(self): + """500 on the v3 fallback in the primary loop also re-raises.""" + dep_ref = DependencyReference.parse("gitea.example.com/owner/repo") + + side_effects = [ + _make_resp(404), # raw + _make_resp(404), # v1 + _make_resp(500), # v3 -- must re-raise + ] + with patch.object(self.downloader, "_resilient_get", side_effect=side_effects): + with pytest.raises(RuntimeError, match=r"HTTP 500"): + self.downloader.download_raw_file(dep_ref, "missing.md", "main") + + def test_all_api_versions_404_raises_descriptive_error(self): + """When every API version returns 404 for both refs, a clear error is raised. + + The error must name the host, the file path, and which API + families were attempted -- so users staring at a GitLab or + unsupported-host failure see an actionable signal. + """ dep_ref = DependencyReference.parse("git.example.com/owner/repo") # raw(main) + v1(main) + v3(main) = 3 calls, then v1(master) + v3(master) = 2 calls side_effects = [_make_resp(404)] * 5 with patch.object(self.downloader, "_resilient_get", side_effect=side_effects): - with pytest.raises(RuntimeError, match="File not found"): + with pytest.raises(RuntimeError) as excinfo: self.downloader.download_raw_file(dep_ref, "missing.md", "main") + msg = str(excinfo.value) + assert "git.example.com" in msg + assert "missing.md" in msg + assert "GitLab" in msg, "Error should hint at GitLab unsupported case" + def test_github_com_uses_api_github_com_not_api_v4(self): """github.com must still use api.github.com, never /api/v4/.""" dep_ref = DependencyReference.parse("owner/repo") @@ -2205,6 +2457,29 @@ def test_github_com_uses_api_github_com_not_api_v4(self): assert url_called.startswith("https://api.github.com/") assert not urlparse(url_called).path.startswith("/api/v4/") + def test_verbose_callback_logs_each_attempt(self): + """--verbose surfaces raw -> v1 -> v3 chain so users can diagnose failures.""" + dep_ref = DependencyReference.parse("gitea.example.com/owner/repo") + expected = b"ok" + envelope_resp = _make_resp( + 200, _gitea_json_envelope(expected), content_type="application/json" + ) + side_effects = [ + _make_resp(404), # raw URL + _make_resp(404), # v1 + envelope_resp, # v3 + ] + captured: list[str] = [] + with patch.object(self.downloader, "_resilient_get", side_effect=side_effects): + self.downloader.download_raw_file( + dep_ref, "skill.md", "main", verbose_callback=captured.append + ) + + joined = "\n".join(captured) + assert "Trying raw URL" in joined + assert "Trying Contents API" in joined or "trying next candidate" in joined + assert "/api/v3/" in joined + if __name__ == "__main__": pytest.main([__file__]) From 3928c242a822c237500ed69f36e38b28b235afb2 Mon Sep 17 00:00:00 2001 From: Daniel Meppiel Date: Fri, 8 May 2026 22:36:12 +0200 Subject: [PATCH 12/14] Address round-2 panel follow-ups for Gitea/Gogs support - Extract _is_configured_ghes() helper to centralise the GITHUB_HOST opt-in check; use it in both routing classification and the auth header builder so the rule cannot drift. - Thread is_github_host into _build_contents_api_urls so GHES hosts declared via GITHUB_HOST skip the wasted Gitea v1 round-trip. - Widen the 401/403 generic-host error to enumerate all three token sources (git credential helper, per-org GITHUB_APM_PAT_, GITHUB_HOST opt-in). - Include file_path and ref in the raw-URL fallback verbose log for easier diagnosis. - Bump requests floor to >=2.31.0 to mitigate CVE-2023-32681 (cross-host redirect Authorization leak); matters now that the PR widens cross-host download surface. - Doc polish: slim the dependencies.md intro, move v1/v3 fallback detail next to Virtual Packages, normalise punctuation, add a cross-link to the GitHub Authentication Setup section. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/src/content/docs/guides/dependencies.md | 4 +- .../.apm/skills/apm-usage/dependencies.md | 2 +- pyproject.toml | 2 +- src/apm_cli/deps/download_strategies.py | 56 ++++++++++++++----- uv.lock | 2 +- 5 files changed, 47 insertions(+), 19 deletions(-) diff --git a/docs/src/content/docs/guides/dependencies.md b/docs/src/content/docs/guides/dependencies.md index 5fda882d9..57dccb107 100644 --- a/docs/src/content/docs/guides/dependencies.md +++ b/docs/src/content/docs/guides/dependencies.md @@ -15,7 +15,7 @@ APM dependencies are git repositories containing `.apm/` directories with contex - **Build on tested context** instead of starting from scratch - **Maintain consistency** across multiple repositories and teams -APM supports any git-accessible host — GitHub, GitLab, Bitbucket, Gitea, Gogs, self-hosted instances, and more. For self-hosted Gitea/Gogs, virtual subdirectory and raw-file packages resolve via the `/{owner}/{repo}/raw/{ref}/{path}` URL with a Contents API v1/v3 fallback. GitLab nested-group repos require the object form (see below). +APM supports any git-accessible host — GitHub, GitLab, Bitbucket, Gitea, Gogs, self-hosted instances, and more. See [GitHub Authentication Setup](#github-authentication-setup) below for how tokens flow to non-GitHub hosts via the git credential helper. ## Dependency Types @@ -36,6 +36,8 @@ APM supports multiple dependency types: **Virtual File Packages** download a single file (like a prompt or instruction) and integrate it directly. +For self-hosted **Gitea** and **Gogs**, virtual subdirectory and file packages resolve via the `/{owner}/{repo}/raw/{ref}/{path}` URL first, then fall back to the Contents API (v1 native, v3 Gogs-compat). GitLab is not yet supported for virtual packages -- use git-clone-based dependencies for GitLab repos. + ### Claude Skills Claude Skills are packages with a `SKILL.md` file that describe capabilities for AI agents. APM can install them and transform them for your target platform: diff --git a/packages/apm-guide/.apm/skills/apm-usage/dependencies.md b/packages/apm-guide/.apm/skills/apm-usage/dependencies.md index edaf937ac..d6903997e 100644 --- a/packages/apm-guide/.apm/skills/apm-usage/dependencies.md +++ b/packages/apm-guide/.apm/skills/apm-usage/dependencies.md @@ -129,7 +129,7 @@ Virtual packages reference a subset of a repository. Classification is by extension only. A path like `owner/repo/collections/security` (no extension) is a Subdirectory; the actual shape -- APM package (incl. dep-only `apm.yml` with no `.apm/`), skill bundle, or plugin -- is resolved at fetch time by probing for `apm.yml`. -**Self-hosted Git hosts (Gitea, Gogs):** virtual packages resolve via the host's `/{owner}/{repo}/raw/{ref}/{path}` URL with a Contents API v1/v3 fallback. GitLab nested-group repos (`group/subgroup/repo`) require the object form (`git: `, `path: `) -- shorthand is ambiguous on >2-segment paths. +**Gitea and Gogs (self-hosted or vendor-hosted):** virtual packages resolve via the host's `/{owner}/{repo}/raw/{ref}/{path}` URL first, then fall back to the Contents API (v1 native, v3 Gogs-compat). GitLab nested-group repos (`group/subgroup/repo`) require the object form (`git: `, `path: `) -- shorthand is ambiguous on >2-segment paths. > **Removed (#1094):** the legacy `.collection.yml` / `.collection.yaml` virtual-package form is no longer supported. Convert any `.collection.yml` to an `apm.yml` with a `dependencies:` section, then reference the resulting subdirectory as a regular subdirectory virtual package. diff --git a/pyproject.toml b/pyproject.toml index 02716890c..149c98d6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ "click>=8.0.0", "colorama>=0.4.6", "pyyaml>=6.0.0", - "requests>=2.28.0", + "requests>=2.31.0", "python-frontmatter>=1.0.0", "llm>=0.17.0", "llm-github-models>=0.1.0", diff --git a/src/apm_cli/deps/download_strategies.py b/src/apm_cli/deps/download_strategies.py index 7e32a97bc..7da01d91a 100644 --- a/src/apm_cli/deps/download_strategies.py +++ b/src/apm_cli/deps/download_strategies.py @@ -639,10 +639,7 @@ def download_github_file( # --- Generic host: raw URL first, then API version negotiation --- # For non-GitHub non-GHE hosts (Gitea, Gogs, self-hosted git), try the # raw URL path first, then negotiate API versions v1 -> v3. - is_github_host = is_github_hostname(host) or ( - os.environ.get("GITHUB_HOST", "").strip().lower() == (host or "").lower() - and bool(os.environ.get("GITHUB_HOST", "").strip()) - ) + is_github_host = is_github_hostname(host) or self._is_configured_ghes(host) if not is_github_host: raw_url = f"https://{host}/{owner}/{repo}/raw/{ref}/{file_path}" raw_headers = self._build_generic_host_auth_headers(host, file_ctx, accept=None) @@ -657,13 +654,15 @@ def download_github_file( except (requests.RequestException, OSError) as raw_err: if verbose_callback: verbose_callback( - f"Raw URL on {host} failed: {type(raw_err).__name__}; " - f"falling back to Contents API." + f"Raw URL on {host} failed for {file_path}@{ref}: " + f"{type(raw_err).__name__}; falling back to Contents API." ) # --- Contents API path (authenticated, enterprise, or raw fallback) --- # Build API URL candidates - format differs by host type - api_url_candidates = self._build_contents_api_urls(host, owner, repo, file_path, ref) + api_url_candidates = self._build_contents_api_urls( + host, owner, repo, file_path, ref, is_github_host=is_github_host + ) api_url = api_url_candidates[0] # Set up authentication headers @@ -834,8 +833,11 @@ def download_github_file( # Generic host: don't claim SSO/SAML or "GitHub token". error_msg += ( f"Host {host} rejected the request. " - "Verify the repository exists and that any token " - "configured via 'git credential' has access." + "Verify the repository exists and that the token has " + "access. Tokens are sourced from your git credential " + "helper, a per-org GITHUB_APM_PAT_ env var, or " + f"GITHUB_HOST={host} when this host is your GitHub " + "Enterprise Server." ) raise RuntimeError(error_msg) # noqa: B904 else: @@ -847,21 +849,47 @@ def download_github_file( # Helpers for download_github_file # ------------------------------------------------------------------ + @staticmethod + def _is_configured_ghes(host: str) -> bool: + """Return True when *host* matches the user's declared GHES via GITHUB_HOST. + + ``GITHUB_HOST=`` is the documented opt-in for treating + a non-``*.ghe.com`` FQDN as GitHub-family. Centralised so the routing + check, header builder, and Contents-API URL builder cannot drift. + """ + configured = os.environ.get("GITHUB_HOST", "").strip().lower() + if not configured: + return False + return (host or "").lower() == configured + @staticmethod def _build_contents_api_urls( - host: str, owner: str, repo: str, file_path: str, ref: str + host: str, + owner: str, + repo: str, + file_path: str, + ref: str, + *, + is_github_host: bool | None = None, ) -> list[str]: """Return the ordered list of Contents-API URL candidates for *host*. - github.com -> single api.github.com candidate - - *.ghe.com (GHE Cloud / GHE Data Residency) -> single api. candidate + - *.ghe.com (GHE Cloud / GHE Data Residency) or GITHUB_HOST-declared + GHES -> single api. candidate (skips Gitea v1 round-trip) - generic host -> Gitea-native /api/v1/ then Gogs-compat /api/v3/ GitLab uses /api/v4/projects/:id/repository/files/... which has a different shape; it is intentionally NOT included. GitLab support is limited to git-clone operations. + + ``is_github_host`` lets the caller pass its already-computed + classification (which honours ``GITHUB_HOST``); when omitted we + fall back to ``is_github_hostname`` plus the GHES env-var check. """ - if is_github_hostname(host): + if is_github_host is None: + is_github_host = is_github_hostname(host) or DownloadDelegate._is_configured_ghes(host) + if is_github_host: if host.lower() == "github.com": return [ f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" @@ -903,11 +931,9 @@ def _build_generic_host_auth_headers( if auth_ctx is None or not getattr(auth_ctx, "token", None): return headers source = getattr(auth_ctx, "source", None) or "" - configured_host = os.environ.get("GITHUB_HOST", "").strip().lower() - host_lower = (host or "").lower() host_scoped = source == "git-credential-fill" org_scoped = source.startswith("GITHUB_APM_PAT_") - configured_ghes = bool(configured_host) and host_lower == configured_host + configured_ghes = DownloadDelegate._is_configured_ghes(host) if host_scoped or org_scoped or configured_ghes: headers["Authorization"] = f"token {auth_ctx.token}" return headers diff --git a/uv.lock b/uv.lock index 5d448ed8f..39a9a0595 100644 --- a/uv.lock +++ b/uv.lock @@ -228,7 +228,7 @@ requires-dist = [ { name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.0.0" }, { name = "python-frontmatter", specifier = ">=1.0.0" }, { name = "pyyaml", specifier = ">=6.0.0" }, - { name = "requests", specifier = ">=2.28.0" }, + { name = "requests", specifier = ">=2.31.0" }, { name = "rich", specifier = ">=13.0.0" }, { name = "rich-click", specifier = ">=1.7.0" }, { name = "ruamel-yaml", specifier = ">=0.18.0" }, From 3c76c96c71b429b2ba70b4fe3318a55eb4dbd8b2 Mon Sep 17 00:00:00 2001 From: Daniel Meppiel Date: Fri, 8 May 2026 22:38:41 +0200 Subject: [PATCH 13/14] Regenerate NOTICE for requests floor bump Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- NOTICE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NOTICE b/NOTICE index c2fa9f37b..828c17e75 100644 --- a/NOTICE +++ b/NOTICE @@ -139,7 +139,7 @@ SOFTWARE. ## Component. requests -- Version requirement: `>=2.28.0` +- Version requirement: `>=2.31.0` - Upstream: https://github.com/psf/requests - SPDX: `Apache-2.0` - Notes: Apache-2.0 requires forwarding the upstream NOTICE file verbatim. From d7d7b9066ae468a8aa1a7a30481044bf84ef4c87 Mon Sep 17 00:00:00 2001 From: Daniel Meppiel Date: Fri, 8 May 2026 22:43:29 +0200 Subject: [PATCH 14/14] Fix CodeQL py/incomplete-url-substring-sanitization in 404 test Per .github/instructions/tests.instructions.md, URL/host assertions in tests must extract URL tokens and compare via urllib.parse, never via substring match. The 404-error test asserted 'git.example.com' as a substring of the error message, which CodeQL flags. Embed a canonical raw URL (https:////raw//) in the not-found error and parse it in the test via urlparse. The URL is also more useful UX -- terminals render it clickable. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/apm_cli/deps/download_strategies.py | 5 +++-- tests/test_github_downloader.py | 9 +++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/apm_cli/deps/download_strategies.py b/src/apm_cli/deps/download_strategies.py index 7da01d91a..9074f0c08 100644 --- a/src/apm_cli/deps/download_strategies.py +++ b/src/apm_cli/deps/download_strategies.py @@ -1004,9 +1004,10 @@ def _build_unsupported_or_missing_error( # Non-GitHub host: name what was tried so users can diagnose # GitLab / unsupported-host cases without re-reading source. tried = ", ".join(["raw"] + [u.split("/api/")[1].split("/")[0] for u in api_url_candidates]) + canonical_url = f"https://{host}/{repo_url}/raw/{ref}/{file_path}" return ( - f"File not found on generic host {host}: {file_path} in " - f"{repo_url} {ref_part}. Tried URL families: {tried}. " + f"File not found on generic host {host}: {canonical_url} {ref_part}. " + f"Tried URL families: {tried}. " "If this is GitLab, virtual subdirectory packages are not " "supported (use the dict-form full repo URL instead)." ) diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index 9d7b6d1df..677255e77 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -2438,8 +2438,13 @@ def test_all_api_versions_404_raises_descriptive_error(self): self.downloader.download_raw_file(dep_ref, "missing.md", "main") msg = str(excinfo.value) - assert "git.example.com" in msg - assert "missing.md" in msg + # Use urlparse on the canonical URL embedded in the error message + # (per tests.instructions.md: never substring-match URLs). + url_tokens = [tok.strip("(),.;'\"") for tok in msg.split() if "://" in tok] + hosts = {urlparse(t).hostname for t in url_tokens} + assert hosts == {"git.example.com"}, f"Host not surfaced in error: {msg!r}" + paths = [urlparse(t).path for t in url_tokens] + assert any("missing.md" in p for p in paths) assert "GitLab" in msg, "Error should hint at GitLab unsupported case" def test_github_com_uses_api_github_com_not_api_v4(self):