diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fb3872f0..aaa2580db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- ADO full HTTPS URLs with sub-path virtual packages (e.g. `https://dev.azure.com/org/proj/_git/repo/sub/path`) are now parsed correctly instead of being rejected. (#1254) - Fixed `apm install` crash (exit code 128) when a mono-repo package depends on a sibling pinned to a non-HEAD commit; installs now resolve with a single in-place fetch, and multiple SHA-pinned references to the same repository share a single cached clone. (#1258) - MCP server token injection now requires both an allowlisted server name and a verified HTTPS GitHub hostname, preventing PAT exfiltration via poisoned registry entries. (#1239) - `apm marketplace add` accepts GitLab-class hosts (`gitlab.com` and self-managed instances configured via `GITLAB_HOST` / `APM_GITLAB_HOSTS`); unsupported generic hosts now show separate recovery hints for GHES (`GITHUB_HOST`) and self-managed GitLab instead of only `GITHUB_HOST`. (#1149) diff --git a/src/apm_cli/models/dependency/reference.py b/src/apm_cli/models/dependency/reference.py index 10732e1c5..52c65a27a 100644 --- a/src/apm_cli/models/dependency/reference.py +++ b/src/apm_cli/models/dependency/reference.py @@ -13,6 +13,7 @@ is_github_hostname, is_gitlab_hostname, is_supported_git_host, + is_visualstudio_legacy_hostname, maybe_raise_bare_fqdn_github_gitlab_conflict, parse_artifactory_path, unsupported_host_error, @@ -816,7 +817,12 @@ def _detect_virtual_package(cls, dependency_str: str): is_artifactory = is_generic_host and is_artifactory_path(path_segments) if is_ado: - min_base_segments = 3 + # *.visualstudio.com encodes org in the subdomain; path is proj/repo (2 parts). + # dev.azure.com encodes org as the first path segment; path is org/proj/repo (3 parts). + if validated_host and is_visualstudio_legacy_hostname(validated_host): + min_base_segments = 2 + else: + min_base_segments = 3 elif is_artifactory: # Artifactory: artifactory/{repo-key}/{owner}/{repo} min_base_segments = 4 @@ -971,11 +977,22 @@ def _resolve_virtual_shorthand_repo(cls, repo_url, validated_host, virtual_path= if len(parts) >= 3 and is_supported_git_host(parts[0]): host = parts[0] if is_azure_devops_hostname(parts[0]): - if len(parts) < 5: - raise ValueError( - "Invalid Azure DevOps virtual package format: must be dev.azure.com/org/project/repo/path" - ) - repo_url = "/".join(parts[1:4]) + if is_visualstudio_legacy_hostname(parts[0]): + # myorg.visualstudio.com/proj/repo/path: org in subdomain, + # need at least host + proj + repo + 1 virtual segment. + if len(parts) < 4: + raise ValueError( + "Invalid Azure DevOps virtual package format: must be " + "myorg.visualstudio.com/project/repo/path" + ) + repo_url = "/".join(parts[1:3]) + else: + # dev.azure.com/org/proj/repo/path: org in path + if len(parts) < 5: + raise ValueError( + "Invalid Azure DevOps virtual package format: must be dev.azure.com/org/project/repo/path" + ) + repo_url = "/".join(parts[1:4]) elif is_artifactory_path(parts[1:]): art_result = parse_artifactory_path(parts[1:]) if art_result: @@ -1022,7 +1039,11 @@ def _resolve_shorthand_to_parsed_url(cls, repo_url, host): if len(parts) >= 3 and is_supported_git_host(parts[0]): host = parts[0] - if is_azure_devops_hostname(host) and len(parts) >= 4: + if is_visualstudio_legacy_hostname(host) and len(parts) >= 3: + # *.visualstudio.com/proj/repo: org is in the subdomain, path is proj/repo only + user_repo = "/".join(parts[1:3]) + elif is_azure_devops_hostname(host) and len(parts) >= 4: + # dev.azure.com/org/proj/repo: org is the first path segment user_repo = "/".join(parts[1:4]) elif not is_github_hostname(host) and not is_azure_devops_hostname(host): if is_artifactory_path(parts[1:]): @@ -1058,7 +1079,10 @@ def _resolve_shorthand_to_parsed_url(cls, repo_url, host): is_ado_host = host and is_azure_devops_hostname(host) if is_ado_host: - if len(uparts) < 3: + # *.visualstudio.com encodes org in subdomain -> proj/repo is sufficient (2 parts). + # dev.azure.com encodes org in path -> org/proj/repo required (3 parts). + min_ado_parts = 2 if is_visualstudio_legacy_hostname(host) else 3 + if len(uparts) < min_ado_parts: raise ValueError( f"Invalid Azure DevOps repository format: {repo_url}. Expected 'org/project/repo'" ) @@ -1078,16 +1102,24 @@ def _resolve_shorthand_to_parsed_url(cls, repo_url, host): return parsed_url, host @classmethod - def _validate_url_repo_path(cls, parsed_url): + def _validate_url_repo_path(cls, parsed_url) -> tuple[str, str | None]: """Validate and normalise the repository path from a parsed URL. Checks host support, strips ``.git`` suffixes, removes ``_git`` segments, and validates each path component against the allowed character set for the detected host type. + For Azure DevOps URLs with extra path segments beyond + ``org/project/repo`` (e.g. + ``https://dev.azure.com/org/proj/_git/repo/sub/path``), the extra + segments are extracted as a virtual package path and validated with + the same rules as the shorthand virtual-path detector. + Returns: - repo_url (str): Normalised repository path - (e.g. ``owner/repo`` or ``org/project/repo``). + ``(repo_url, virtual_path)`` where *repo_url* is the normalised + base repository path (e.g. ``owner/repo`` or + ``org/project/repo``) and *virtual_path* is ``None`` unless + extra ADO sub-path segments were detected. """ hostname = parsed_url.hostname or "" if not is_supported_git_host(hostname): @@ -1107,11 +1139,57 @@ def _validate_url_repo_path(cls, parsed_url): is_ado_host = is_azure_devops_hostname(hostname) + url_virtual_path: str | None = None + if is_ado_host: - if len(path_parts) != 3: + # *.visualstudio.com encodes org in the subdomain; URL path is proj/repo (2 parts). + # dev.azure.com encodes org as the first path segment; URL path is org/proj/repo (3 parts). + is_vs_legacy = is_visualstudio_legacy_hostname(hostname) + min_ado_parts = 2 if is_vs_legacy else 3 + if len(path_parts) < min_ado_parts: raise ValueError( f"Invalid Azure DevOps repository path: expected 'org/project/repo', got '{path}'" ) + if len(path_parts) > min_ado_parts: + # Extra segments are a virtual sub-path (e.g. sub/path in + # https://dev.azure.com/org/proj/_git/repo/sub/path or + # https://myorg.visualstudio.com/proj/_git/repo/sub/path). + ado_virtual = "/".join(path_parts[min_ado_parts:]) + + # Security: reject path traversal in virtual path. + validate_path_segments(ado_virtual, context="virtual path") + + # Reject removed .collection.yml extensions. + if any(ado_virtual.endswith(ext) for ext in cls.REMOVED_COLLECTION_EXTENSIONS): + raise ValueError( + f".collection.yml is no longer supported. " + f"Convert '{ado_virtual}' to an apm.yml with a " + f"'dependencies' section. " + f"See: https://microsoft.github.io/apm/guides/dependencies/" + ) + + # Accept any recognised virtual file extension; reject other + # dotted final segments (mirrors shorthand virtual detection). + if any(ado_virtual.endswith(ext) for ext in cls.VIRTUAL_FILE_EXTENSIONS): + pass + else: + last_segment = ado_virtual.split("/")[-1] + if "." in last_segment: + raise InvalidVirtualPackageExtensionError( + f"Invalid virtual package path '{ado_virtual}'. " + f"Individual files must end with one of: " + f"{', '.join(cls.VIRTUAL_FILE_EXTENSIONS)}. " + f"For subdirectory packages, the path should not have a file extension." + ) + + url_virtual_path = ado_virtual + path_parts = path_parts[:min_ado_parts] + + # For *.visualstudio.com, inject the org from the subdomain so that the + # normalised repo_url is always org/project/repo (matching dev.azure.com). + if is_vs_legacy: + vs_org = hostname.split(".")[0] + path_parts = [vs_org, *path_parts] else: if len(path_parts) < 2: raise ValueError( @@ -1134,19 +1212,25 @@ def _validate_url_repo_path(cls, parsed_url): if not re.match(allowed_pattern, part): raise ValueError(f"Invalid repository path component: {part}") - return "/".join(path_parts) + return "/".join(path_parts), url_virtual_path @classmethod def _parse_standard_url( - cls, dependency_str: str, is_virtual_package: bool, virtual_path, validated_host - ): + cls, + dependency_str: str, + is_virtual_package: bool, + virtual_path: str | None, + validated_host: str | None, + ) -> tuple[str, int | None, str, str | None, str | None, bool, str | None]: """Parse a non-SSH dependency string (HTTPS, FQDN, or shorthand). Detects scheme vs shorthand, delegates host-specific resolution to helpers, then validates the resulting URL path. Returns: - ``(host, port, repo_url, reference, alias)`` + ``(host, port, repo_url, reference, alias, effective_is_virtual, + effective_virtual_path)`` -- the last two reflect any ADO sub-path + segments embedded in the URL itself (issue #1128). """ host = None port = None @@ -1185,12 +1269,21 @@ def _parse_standard_url( else: parsed_url, host = cls._resolve_shorthand_to_parsed_url(repo_url, host) - repo_url = cls._validate_url_repo_path(parsed_url) + repo_url, url_virtual_path = cls._validate_url_repo_path(parsed_url) + + # If URL contained extra ADO sub-path segments, they become the virtual + # path (overriding the _detect_virtual_package result which returns + # early for https:// URLs). + effective_is_virtual = is_virtual_package + effective_virtual_path = virtual_path + if url_virtual_path is not None: + effective_is_virtual = True + effective_virtual_path = url_virtual_path if not host: host = default_host() - return host, port, repo_url, reference, alias + return host, port, repo_url, reference, alias, effective_is_virtual, effective_virtual_path @classmethod def _validate_final_repo_fields(cls, host, repo_url): @@ -1333,8 +1426,10 @@ def parse(cls, dependency_str: str) -> "DependencyReference": host, port, repo_url, reference, alias = scp_result explicit_scheme = "ssh" else: - host, port, repo_url, reference, alias = cls._parse_standard_url( - dependency_str, is_virtual_package, virtual_path, validated_host + host, port, repo_url, reference, alias, is_virtual_package, virtual_path = ( + cls._parse_standard_url( + dependency_str, is_virtual_package, virtual_path, validated_host + ) ) _stripped = dependency_str.strip().lower() if _stripped.startswith("https://"): diff --git a/src/apm_cli/utils/github_host.py b/src/apm_cli/utils/github_host.py index 9224d4ea4..2377fb963 100644 --- a/src/apm_cli/utils/github_host.py +++ b/src/apm_cli/utils/github_host.py @@ -26,6 +26,19 @@ def is_azure_devops_hostname(hostname: str | None) -> bool: return bool(h.endswith(".visualstudio.com")) +def is_visualstudio_legacy_hostname(hostname: str | None) -> bool: + """Return True if hostname is a legacy ``*.visualstudio.com`` ADO host. + + For these hosts the Azure DevOps organisation is encoded in the subdomain + (e.g. ``myorg.visualstudio.com``) rather than as the first path segment. + This is in contrast to ``dev.azure.com`` where the org is the first path + segment (``dev.azure.com/org/project/repo``). + """ + if not hostname: + return False + return hostname.lower().endswith(".visualstudio.com") + + def is_gitlab_hostname(hostname: str | None) -> bool: """Return True if *hostname* is GitLab SaaS or a GitLab host from env configuration. diff --git a/tests/unit/test_ado_path_structure.py b/tests/unit/test_ado_path_structure.py index f21614211..1b64cd126 100644 --- a/tests/unit/test_ado_path_structure.py +++ b/tests/unit/test_ado_path_structure.py @@ -615,3 +615,210 @@ def test_prune_joinpath_works_for_variable_depth(self): ado_parts = ["org", "project", "repo"] ado_path = base.joinpath(*ado_parts) assert ado_path.as_posix().endswith("/tmp/apm_modules/org/project/repo") + + +class TestADOFullURLSubPath: + """Test 8-shape matrix for ADO URL parsing (issue #1128).""" + + @pytest.mark.parametrize( + "input_str,expected_virtual,expected_virtual_path,expected_ref,expected_host,expected_org,expected_project,expected_repo", + [ + # Case 1: shorthand base + ( + "dev.azure.com/org/proj/_git/repo", + False, + None, + None, + "dev.azure.com", + "org", + "proj", + "repo", + ), + # Case 2: shorthand + virtual + ( + "dev.azure.com/org/proj/_git/repo/sub/path", + True, + "sub/path", + None, + "dev.azure.com", + "org", + "proj", + "repo", + ), + # Case 3: shorthand + ref + ( + "dev.azure.com/org/proj/_git/repo#main", + False, + None, + "main", + "dev.azure.com", + "org", + "proj", + "repo", + ), + # Case 4: shorthand + virtual + ref + ( + "dev.azure.com/org/proj/_git/repo/sub/path#main", + True, + "sub/path", + "main", + "dev.azure.com", + "org", + "proj", + "repo", + ), + # Case 5: full URL base + ( + "https://dev.azure.com/org/proj/_git/repo", + False, + None, + None, + "dev.azure.com", + "org", + "proj", + "repo", + ), + # Case 6: full URL + ref + ( + "https://dev.azure.com/org/proj/_git/repo#main", + False, + None, + "main", + "dev.azure.com", + "org", + "proj", + "repo", + ), + # Case 7: full URL + virtual (THE BUG) + ( + "https://dev.azure.com/org/proj/_git/repo/sub/path", + True, + "sub/path", + None, + "dev.azure.com", + "org", + "proj", + "repo", + ), + # Case 8: full URL + virtual + ref (THE BUG) + ( + "https://dev.azure.com/org/proj/_git/repo/sub/path#main", + True, + "sub/path", + "main", + "dev.azure.com", + "org", + "proj", + "repo", + ), + # Case 9: VS shorthand base + ( + "myorg.visualstudio.com/proj/_git/repo", + False, + None, + None, + "myorg.visualstudio.com", + "myorg", + "proj", + "repo", + ), + # Case 10: VS shorthand + virtual + ( + "myorg.visualstudio.com/proj/_git/repo/sub/path", + True, + "sub/path", + None, + "myorg.visualstudio.com", + "myorg", + "proj", + "repo", + ), + # Case 11: VS shorthand + ref + ( + "myorg.visualstudio.com/proj/_git/repo#main", + False, + None, + "main", + "myorg.visualstudio.com", + "myorg", + "proj", + "repo", + ), + # Case 12: VS shorthand + virtual + ref + ( + "myorg.visualstudio.com/proj/_git/repo/sub/path#main", + True, + "sub/path", + "main", + "myorg.visualstudio.com", + "myorg", + "proj", + "repo", + ), + # Case 13: VS full URL base + ( + "https://myorg.visualstudio.com/proj/_git/repo", + False, + None, + None, + "myorg.visualstudio.com", + "myorg", + "proj", + "repo", + ), + # Case 14: VS full URL + ref + ( + "https://myorg.visualstudio.com/proj/_git/repo#main", + False, + None, + "main", + "myorg.visualstudio.com", + "myorg", + "proj", + "repo", + ), + # Case 15: VS full URL + virtual + ( + "https://myorg.visualstudio.com/proj/_git/repo/sub/path", + True, + "sub/path", + None, + "myorg.visualstudio.com", + "myorg", + "proj", + "repo", + ), + # Case 16: VS full URL + virtual + ref + ( + "https://myorg.visualstudio.com/proj/_git/repo/sub/path#main", + True, + "sub/path", + "main", + "myorg.visualstudio.com", + "myorg", + "proj", + "repo", + ), + ], + ) + def test_ado_url_matrix( + self, + input_str: str, + expected_virtual: bool, + expected_virtual_path: str | None, + expected_ref: str | None, + expected_host: str, + expected_org: str, + expected_project: str, + expected_repo: str, + ) -> None: + """All ADO URL forms parse correctly (issue #1128).""" + dep = DependencyReference.parse(input_str) + + assert dep.host == expected_host + assert dep.ado_organization == expected_org + assert dep.ado_project == expected_project + assert dep.ado_repo == expected_repo + assert dep.is_virtual == expected_virtual + assert dep.virtual_path == expected_virtual_path + assert dep.reference == expected_ref