diff --git a/CHANGELOG.md b/CHANGELOG.md index 894797c4f..5fb3872f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fixed `apm install` crash (exit code 128) when a mono-repo package depends on a sibling pinned to a non-HEAD commit; installs now resolve with a single in-place fetch, and multiple SHA-pinned references to the same repository share a single cached clone. (#1258) - MCP server token injection now requires both an allowlisted server name and a verified HTTPS GitHub hostname, preventing PAT exfiltration via poisoned registry entries. (#1239) - `apm marketplace add` accepts GitLab-class hosts (`gitlab.com` and self-managed instances configured via `GITLAB_HOST` / `APM_GITLAB_HOSTS`); unsupported generic hosts now show separate recovery hints for GHES (`GITHUB_HOST`) and self-managed GitLab instead of only `GITHUB_HOST`. (#1149) - **GitLab monorepo marketplaces:** `apm install plugin@marketplace` now resolves plugins whose sources live in a subdirectory of the marketplace repository on GitLab-class hosts (`gitlab.com` and self-managed GitLab when classified as GitLab), matching explicit `git:` + `path:` semantics without requiring that hand-written object form. (#1149) diff --git a/docs/src/content/docs/guides/dependencies.md b/docs/src/content/docs/guides/dependencies.md index 3f224a22d..3aef6d3ae 100644 --- a/docs/src/content/docs/guides/dependencies.md +++ b/docs/src/content/docs/guides/dependencies.md @@ -786,7 +786,7 @@ APM automatically retries failed HTTP requests with exponential backoff and jitt #### Parallel Downloads -APM downloads packages in parallel using a thread pool, significantly reducing wall-clock time for large dependency trees. The concurrency level defaults to 4 and is configurable via `--parallel-downloads` (set to 0 to disable). For sibling subdirectory packages from the same monorepo and ref (e.g. two skills under `skills/` in `github/awesome-copilot`), APM clones the repo bare exactly once into a shared cache and materializes each consumer's working tree from that cache via `git clone --local --shared --no-checkout`. This eliminates redundant network fetches and prevents the parallel races that affected earlier sparse-checkout based fetches. +APM downloads packages in parallel using a thread pool, significantly reducing wall-clock time for large dependency trees. The concurrency level defaults to 4 and is configurable via `--parallel-downloads` (set to 0 to disable). For sibling subdirectory packages from the same monorepo and ref (e.g. two skills under `skills/` in `github/awesome-copilot`), APM clones the repo bare exactly once into a shared cache and materializes each consumer's working tree from that cache via `git clone --local --shared --no-checkout`. This eliminates redundant network fetches and prevents the parallel races that affected earlier sparse-checkout based fetches. When a transitive dependency pins a commit SHA that differs from the ref used for the initial clone, APM fetches that specific commit into the existing bare clone on demand rather than re-cloning. ### File Processing and Content Merging diff --git a/src/apm_cli/deps/bare_cache.py b/src/apm_cli/deps/bare_cache.py index 7d6d176eb..8c786936b 100644 --- a/src/apm_cli/deps/bare_cache.py +++ b/src/apm_cli/deps/bare_cache.py @@ -24,6 +24,7 @@ from __future__ import annotations import logging +import os import subprocess from collections.abc import Callable from pathlib import Path @@ -34,6 +35,8 @@ if TYPE_CHECKING: from ..models.apm_package import DependencyReference +_log = logging.getLogger(__name__) + def _rmtree(path: Path) -> None: """Remove a directory tree, handling read-only files and brief Windows locks. @@ -292,6 +295,190 @@ def _bare_action(url: str, env: dict[str, str], target: Path) -> None: ) +def fetch_sha_into_bare( + execute_transport_plan: Callable[..., None], + repo_url_base: str, + bare_path: Path, + sha: str, + *, + dep_ref: DependencyReference, +) -> bool: + """Attempt to fetch a specific SHA into an existing bare repo. + + Used to hydrate shallow bare clones that are missing a transitive + SHA-pinned commit. Three-step strategy: + + 1. **Check first** -- ``git rev-parse --verify ^{commit}`` against + the bare. If the SHA is already present, returns ``True`` immediately + without any network I/O. + 2. **Shallow fetch by SHA** (full 40-char SHAs only) -- invokes + ``execute_transport_plan`` with a fetch action that runs + ``git fetch ``. Uses the authenticated URL supplied by + the transport plan, NOT ``git fetch origin ``, because + ``remote.origin.url`` has been redacted to ``redacted://`` by + :func:`_scrub_bare_remote_url`. After the fetch, verifies with + ``rev-parse --verify``. Returns ``True`` on success. + 3. **Broaden shallow** -- invokes ``execute_transport_plan`` with a + fetch action that runs ``git fetch `` (no ref argument), + broadening the shallow boundary to include all remote refs. After + the fetch, verifies with ``rev-parse --verify``. Returns ``True`` + on success. + + On any failure in steps 2 or 3, returns ``False`` so the caller can + fall back to a fresh bare clone. + + Note: this function deliberately does NOT call ``git update-ref HEAD`` + after a successful fetch. The consumer's :func:`materialize_from_bare` + handles SHA resolution independently via the ``known_sha`` parameter. + + Args: + execute_transport_plan: Callable that orchestrates auth and protocol + fallback (typically ``self._execute_transport_plan``). + repo_url_base: Base repo URL (unauthenticated) passed to the + transport plan so it can inject credentials. + bare_path: Path to the existing bare repo on disk. + sha: The Git commit SHA to fetch. + dep_ref: Dependency reference used by the transport plan for + auth context. + + Returns: + ``True`` if the SHA is now present in the bare, ``False`` otherwise. + """ + from ..utils.git_env import get_git_executable + + git_exe = get_git_executable() + + def _rev_parse_present() -> bool: + """Return True if sha is already reachable in the bare.""" + try: + result = subprocess.run( + [ + git_exe, + "--git-dir", + str(bare_path), + "rev-parse", + "--verify", + f"{sha}^{{commit}}", + ], + capture_output=True, + timeout=10, + ) + return result.returncode == 0 + except Exception: + return False + + def _scrub_fetch_head() -> None: + """Truncate FETCH_HEAD to remove the token-embedded URL written by fetch.""" + fetch_head = bare_path / "FETCH_HEAD" + try: + if fetch_head.exists(): + fetch_head.write_text("") + except OSError as exc: + _log.warning( + "Failed to truncate FETCH_HEAD at %s: %s. Tokenized URL " + "may persist on disk until shared cache cleanup.", + fetch_head, + exc, + ) + + # Step 1: check first -- no network if SHA already present. + _log.debug("fetch_sha_into_bare: checking if %s is present in %s", sha[:12], bare_path) + if _rev_parse_present(): + _log.debug("fetch_sha_into_bare: SHA %s already present, skipping fetch", sha[:12]) + return True + + # Step 2: shallow fetch by full SHA (only for full 40-char SHAs). + if len(sha) == 40: + _log.debug( + "fetch_sha_into_bare: attempting shallow fetch of %s into %s", sha[:12], bare_path + ) + + def _fetch_action_sha(url: str, env: dict[str, str], target: Path) -> None: + subprocess.run( + [git_exe, "-C", str(bare_path), "fetch", "--depth=1", url, sha], + env=env, + check=True, + capture_output=True, + timeout=300, + ) + + try: + execute_transport_plan( + repo_url_base, + bare_path, + dep_ref=dep_ref, + clone_action=_fetch_action_sha, + ) + _scrub_fetch_head() + if _rev_parse_present(): + _log.debug("fetch_sha_into_bare: shallow fetch of %s succeeded", sha[:12]) + return True + except subprocess.CalledProcessError as exc: + stderr_text = "" + if exc.stderr: + stderr_text = exc.stderr.decode(errors="replace").strip() + _log.debug( + "fetch_sha_into_bare: shallow fetch of %s failed: %s", + sha[:12], + stderr_text, + ) + except Exception: + _log.debug( + "fetch_sha_into_bare: shallow fetch of %s raised unexpected error", + sha[:12], + ) + + # Step 3: broaden shallow -- fetch all refs without a SHA argument. + # Depth is capped to avoid unbounded history download on large repos. + # Override via APM_BROAD_FETCH_DEPTH environment variable. + broad_depth = os.environ.get("APM_BROAD_FETCH_DEPTH", "50") + _log.info("Hydrating missing commit %s into shared bare for %s", sha[:12], repo_url_base) + _log.debug("fetch_sha_into_bare: broadening shallow in %s to find %s", bare_path, sha[:12]) + + def _fetch_action_broad(url: str, env: dict[str, str], target: Path) -> None: + subprocess.run( + [git_exe, "-C", str(bare_path), "fetch", f"--depth={broad_depth}", url], + env=env, + check=True, + capture_output=True, + timeout=300, + ) + + try: + execute_transport_plan( + repo_url_base, + bare_path, + dep_ref=dep_ref, + clone_action=_fetch_action_broad, + ) + _scrub_fetch_head() + if _rev_parse_present(): + _log.debug("fetch_sha_into_bare: broad fetch succeeded, %s now present", sha[:12]) + return True + except subprocess.CalledProcessError as exc: + stderr_text = "" + if exc.stderr: + stderr_text = exc.stderr.decode(errors="replace").strip() + _log.debug( + "fetch_sha_into_bare: broad fetch failed for %s in %s: %s", + sha[:12], + bare_path, + stderr_text, + ) + except Exception: + _log.debug( + "fetch_sha_into_bare: broad fetch raised unexpected error for %s", + sha[:12], + ) + + _log.debug( + "fetch_sha_into_bare: all fetch attempts exhausted for %s in %s", + sha[:12], + bare_path, + ) + return False + + def materialize_from_bare( bare_path: Path, consumer_dir: Path, @@ -389,8 +576,9 @@ def materialize_from_bare( env=env, check=False, ) + checkout_target = known_sha or "HEAD" subprocess.run( - [git_exe, "-C", str(consumer_dir), "checkout", "HEAD"], + [git_exe, "-C", str(consumer_dir), "checkout", checkout_target], capture_output=True, text=True, timeout=60, diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index 91b60bb8e..3fb4782f7 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -41,6 +41,7 @@ from .bare_cache import ( bare_clone_with_fallback, clone_with_fallback, + fetch_sha_into_bare, materialize_from_bare, ) from .download_strategies import DownloadDelegate @@ -580,6 +581,22 @@ def _materialize_from_bare( """Thin delegate to :func:`bare_cache.materialize_from_bare` (kept on the class so test patches still work).""" return materialize_from_bare(bare_path, consumer_dir, ref=ref, env=env, known_sha=known_sha) + def _fetch_sha_into_bare( + self, + bare_path: Path, + sha: str, + *, + dep_ref: "DependencyReference", + ) -> bool: + """Thin delegate to :func:`bare_cache.fetch_sha_into_bare` (kept on the class so test patches still work).""" + return fetch_sha_into_bare( + self._execute_transport_plan, + dep_ref.repo_url, + bare_path, + sha, + dep_ref=dep_ref, + ) + @staticmethod def _parse_ls_remote_output(output: str) -> list[RemoteRef]: """Backward-compat stub -- delegates to git_remote_ops.""" @@ -1093,9 +1110,22 @@ def _shared_bare_clone_fn(bare_target: Path) -> None: is_commit_sha=bool(is_commit_sha), ) + def _shared_bare_fetch_fn(existing_bare: Path, ref_or_sha: str) -> bool: + # get_or_clone passes `ref` here; for SHA pins it is the SHA. + return self._fetch_sha_into_bare( + existing_bare, + ref_or_sha, + dep_ref=dep_ref, + ) + try: shared_bare_path = shared_cache.get_or_clone( - cache_host, cache_owner, cache_repo, ref, _shared_bare_clone_fn + cache_host, + cache_owner, + cache_repo, + ref, + _shared_bare_clone_fn, + fetch_fn=_shared_bare_fetch_fn if is_commit_sha else None, ) except Exception as e: raise RuntimeError(f"Failed to clone repository: {e}") from e diff --git a/src/apm_cli/deps/shared_clone_cache.py b/src/apm_cli/deps/shared_clone_cache.py index e5b0b92dc..505c58124 100644 --- a/src/apm_cli/deps/shared_clone_cache.py +++ b/src/apm_cli/deps/shared_clone_cache.py @@ -51,6 +51,13 @@ def __init__(self, base_dir: Path | None = None) -> None: # Maps cache_key -> _CacheEntry self._entries: dict[tuple[str, str, str, str | None], _CacheEntry] = {} self._temp_dirs: list[str] = [] + # Maps (host, owner, repo) -> list of (ref, bare_path) tuples. + # Used to locate an existing bare for the same repo when a new ref + # (typically a SHA pin on a transitive dep) is requested. + self._repo_bares: dict[tuple[str, str, str], list[tuple[str | None, Path]]] = {} + # Per-bare-path locks to serialise concurrent Tier-0 fetches into the + # same bare (git concurrent pack-file writes are not safe). + self._bare_fetch_locks: dict[Path, threading.Lock] = {} def __enter__(self) -> "SharedCloneCache": return self @@ -65,6 +72,7 @@ def get_or_clone( repo: str, ref: str | None, clone_fn: Callable[[Path], None], + fetch_fn: Callable[[Path, str], bool] | None = None, ) -> Path: """Return a path to a shared clone, cloning on first access. @@ -76,6 +84,11 @@ def get_or_clone( clone_fn: Callable that performs the clone into the given directory. Called at most once per unique key. Must raise on failure so the entry is not cached. + fetch_fn: Optional callable ``(bare_path, sha) -> bool`` that + tries to fetch a missing SHA into an already-cloned bare + for the same repo (any ref). When provided and a suitable + bare exists, it is tried before falling back to a fresh + clone. Must not raise -- return False to signal failure. Returns: Path to the cloned repo directory. @@ -94,6 +107,39 @@ def get_or_clone( # A previous attempt failed. Clear error to allow retry. entry.error = None + # Tier-0: try fetching the SHA into an existing bare for the + # same repo (different ref). This avoids a fresh network clone + # when a transitive dep pins a SHA that is missing only because + # the initial shallow bare did not include that commit. + if ref and fetch_fn: + existing_bare = self._find_repo_bare(host, owner, repo) + if existing_bare is not None: + # Acquire a per-bare lock so concurrent Tier-0 fetches + # into the same bare are serialised (git pack-file writes + # are not concurrent-safe). + with self._lock: + if existing_bare not in self._bare_fetch_locks: + self._bare_fetch_locks[existing_bare] = threading.Lock() + bare_lock = self._bare_fetch_locks[existing_bare] + try: + with bare_lock: + if fetch_fn(existing_bare, ref): + entry.path = existing_bare + with self._lock: + repo_key = (host, owner, repo) + if repo_key not in self._repo_bares: + self._repo_bares[repo_key] = [] + self._repo_bares[repo_key].append((ref, existing_bare)) + return existing_bare + except Exception: + _log.info( + "Bare fetch miss for %s/%s/%s ref=%s, falling back to fresh clone", + host, + owner, + repo, + ref, + ) + # First caller (or retry after failure): perform the clone. temp_dir = tempfile.mkdtemp( dir=str(self._base_dir) if self._base_dir else None, @@ -121,11 +167,38 @@ def get_or_clone( f".git/ present: {git_dir.exists()})" ) entry.path = clone_path + with self._lock: + repo_key = (host, owner, repo) + if repo_key not in self._repo_bares: + self._repo_bares[repo_key] = [] + self._repo_bares[repo_key].append((ref, clone_path)) return clone_path except Exception as exc: entry.error = exc raise + def _find_repo_bare(self, host: str, owner: str, repo: str) -> Path | None: + """Return an existing bare path for the same repo (any ref), or None. + + Searches the reverse index populated after each successful clone. + Returns the path of the first registered bare for ``(host, owner, + repo)`` regardless of which ref it was originally cloned at. + + Args: + host: Git host (e.g. "github.com"). + owner: Repository owner. + repo: Repository name. + + Returns: + A :class:`Path` to an existing bare, or ``None`` if none is + registered yet. + """ + with self._lock: + entries = self._repo_bares.get((host, owner, repo)) + if entries: + return entries[0][1] + return None + def _get_or_create_entry(self, key: tuple) -> "_CacheEntry": """Retrieve or create a cache entry (thread-safe).""" with self._lock: @@ -139,6 +212,8 @@ def cleanup(self) -> None: dirs_to_remove = list(self._temp_dirs) self._temp_dirs.clear() self._entries.clear() + self._repo_bares.clear() + self._bare_fetch_locks.clear() for d in dirs_to_remove: try: shutil.rmtree(d, ignore_errors=True) diff --git a/tests/unit/deps/test_shared_clone_cache.py b/tests/unit/deps/test_shared_clone_cache.py index a579f6c09..eeb9f1b3f 100644 --- a/tests/unit/deps/test_shared_clone_cache.py +++ b/tests/unit/deps/test_shared_clone_cache.py @@ -696,8 +696,84 @@ def test_materialize_pins_autocrlf_false(self, tmp_path: Path) -> None: config_text = (consumer / ".git" / "config").read_text() assert "autocrlf = false" in config_text or "autocrlf=false" in config_text + def test_materialize_known_sha_checks_out_correct_commit(self, tmp_path: Path) -> None: + """materialize_from_bare checks out known_sha, not HEAD.""" + import subprocess as sp + + from apm_cli.deps.bare_cache import materialize_from_bare + + bare = tmp_path / "bare.git" + consumer = tmp_path / "consumer" + + env = {k: v for k, v in __import__("os").environ.items()} + + git_exe = "git" + + # Create a normal repo with 2 commits, then clone as bare. + src = tmp_path / "src" + src.mkdir() + sp.run([git_exe, "init", "-b", "main", str(src)], env=env, check=True, capture_output=True) + sp.run( + [git_exe, "-C", str(src), "config", "user.email", "t@t"], + env=env, + check=True, + capture_output=True, + ) + sp.run( + [git_exe, "-C", str(src), "config", "user.name", "t"], + env=env, + check=True, + capture_output=True, + ) + (src / "a.txt").write_text("first") + sp.run([git_exe, "-C", str(src), "add", "."], env=env, check=True, capture_output=True) + sp.run( + [git_exe, "-C", str(src), "commit", "-m", "first"], + env=env, + check=True, + capture_output=True, + ) + first_sha = sp.run( + [git_exe, "-C", str(src), "rev-parse", "HEAD"], + env=env, + check=True, + capture_output=True, + text=True, + ).stdout.strip() + (src / "b.txt").write_text("second") + sp.run([git_exe, "-C", str(src), "add", "."], env=env, check=True, capture_output=True) + sp.run( + [git_exe, "-C", str(src), "commit", "-m", "second"], + env=env, + check=True, + capture_output=True, + ) + + # Clone as bare + sp.run( + [git_exe, "clone", "--bare", str(src), str(bare)], + env=env, + check=True, + capture_output=True, + ) + + # Materialize with known_sha pointing to the FIRST commit + resolved = materialize_from_bare(bare, consumer, ref=None, env=env, known_sha=first_sha) + + # Assert HEAD in consumer equals first_sha + consumer_head = sp.run( + [git_exe, "-C", str(consumer), "rev-parse", "HEAD"], + env=env, + check=True, + capture_output=True, + text=True, + ).stdout.strip() + assert consumer_head == first_sha, f"Expected {first_sha}, got {consumer_head}" + assert resolved == first_sha + # First commit should have a.txt but NOT b.txt + assert (consumer / "a.txt").exists() + assert not (consumer / "b.txt").exists() -class TestSharedCloneCacheBareInvariant: """6.16: cache enforces bare-shape invariant in debug mode.""" def test_apm_debug_rejects_non_bare_clone(self, tmp_path: Path, monkeypatch) -> None: @@ -1032,3 +1108,544 @@ def fake_run(args, **kwargs): assert bearer_idx > pat_idx, f"bearer retry must follow PAT failure, urls={urls_seen}" # Stale-PAT diagnostic must be emitted on bearer success. assert d.auth_resolver.emit_stale_pat_diagnostic.called + + +# --------------------------------------------------------------------------- +# #1258 fix: fetch_sha_into_bare() tests +# --------------------------------------------------------------------------- + + +class TestFetchShaIntoBare: + """Tests for the fetch_sha_into_bare() free function.""" + + def test_sha_already_present_returns_true_without_fetch(self, tmp_path: Path) -> None: + """SHA already present in bare: rev-parse succeeds, no network fetch.""" + from apm_cli.deps.bare_cache import fetch_sha_into_bare + from apm_cli.models.apm_package import DependencyReference + + bare_path = tmp_path / "bare" + bare_path.mkdir() + sha = "a" * 40 + + dep_ref = DependencyReference.parse("owner/repo/sub#main") + mock_execute = MagicMock() + + with patch("apm_cli.deps.bare_cache.subprocess.run") as mock_run: + # rev-parse returns success (exit code 0) + mock_run.return_value = MagicMock(returncode=0) + + result = fetch_sha_into_bare( + mock_execute, + "https://github.com/owner/repo.git", + bare_path, + sha, + dep_ref=dep_ref, + ) + + assert result is True + # execute_transport_plan should NOT be called + mock_execute.assert_not_called() + # Only one rev-parse call (the initial check) + assert mock_run.call_count == 1 + + def test_shallow_fetch_full_sha_succeeds(self, tmp_path: Path) -> None: + """Full 40-char SHA: shallow fetch via transport plan succeeds.""" + from apm_cli.deps.bare_cache import fetch_sha_into_bare + from apm_cli.models.apm_package import DependencyReference + + bare_path = tmp_path / "bare" + bare_path.mkdir() + sha = "b" * 40 + + dep_ref = DependencyReference.parse("owner/repo/sub#main") + + # Capture the clone_action passed to execute_transport_plan + captured_actions: list = [] + + def mock_execute( + repo_url_base: str, bare_target: Path, *, dep_ref, clone_action, **kwargs + ) -> None: + captured_actions.append(clone_action) + # Simulate successful fetch by setting returncode 0 + # The clone_action will be called with url, env, target + + with patch("apm_cli.deps.bare_cache.subprocess.run") as mock_run: + # First rev-parse returns 1 (SHA not present) + # Second rev-parse (after fetch) returns 0 (SHA now present) + mock_run.side_effect = [ + MagicMock(returncode=1), # SHA not present initially + MagicMock(returncode=0), # SHA present after fetch + ] + + result = fetch_sha_into_bare( + mock_execute, + "https://github.com/owner/repo.git", + bare_path, + sha, + dep_ref=dep_ref, + ) + + assert result is True + assert len(captured_actions) == 1 + # Invoke the captured clone_action to verify it runs the right command + captured_action = captured_actions[0] + with patch("apm_cli.deps.bare_cache.subprocess.run") as mock_run_fetch: + mock_run_fetch.return_value = MagicMock(returncode=0) + captured_action( + url="https://github.com/owner/repo.git", + env={}, + target=bare_path, + ) + # Verify the fetch command uses the URL and SHA + call_args = mock_run_fetch.call_args[0][0] + assert "fetch" in call_args + assert "--depth=1" in call_args + assert sha in call_args + assert "https://github.com/owner/repo.git" in call_args + + def test_short_sha_skips_shallow_fetch_goes_to_broad(self, tmp_path: Path) -> None: + """Short 7-char SHA: skip shallow fetch, go directly to broad fetch.""" + from apm_cli.deps.bare_cache import fetch_sha_into_bare + from apm_cli.models.apm_package import DependencyReference + + bare_path = tmp_path / "bare" + bare_path.mkdir() + sha = "c" * 7 # Short SHA + + dep_ref = DependencyReference.parse("owner/repo/sub#main") + captured_actions: list = [] + + def mock_execute( + repo_url_base: str, bare_target: Path, *, dep_ref, clone_action, **kwargs + ) -> None: + captured_actions.append(clone_action) + + with patch("apm_cli.deps.bare_cache.subprocess.run") as mock_run: + # First rev-parse returns 1 (SHA not present) + # Second rev-parse (after broad fetch) returns 0 + mock_run.side_effect = [ + MagicMock(returncode=1), # SHA not present + MagicMock(returncode=0), # SHA present after broad fetch + ] + + result = fetch_sha_into_bare( + mock_execute, + "https://github.com/owner/repo.git", + bare_path, + sha, + dep_ref=dep_ref, + ) + + assert result is True + # Only one transport plan call (the broad fetch, not shallow) + assert len(captured_actions) == 1 + # Verify the broad fetch action doesn't include the SHA + captured_action = captured_actions[0] + with patch("apm_cli.deps.bare_cache.subprocess.run") as mock_run_fetch: + mock_run_fetch.return_value = MagicMock(returncode=0) + captured_action( + url="https://github.com/owner/repo.git", + env={}, + target=bare_path, + ) + call_args = mock_run_fetch.call_args[0][0] + assert "fetch" in call_args + # Should NOT have the SHA in a broad fetch (only the URL) + assert sha not in call_args + assert "https://github.com/owner/repo.git" in call_args + + def test_all_steps_fail_returns_false(self, tmp_path: Path) -> None: + """All fetch attempts fail: return False.""" + from apm_cli.deps.bare_cache import fetch_sha_into_bare + from apm_cli.models.apm_package import DependencyReference + + bare_path = tmp_path / "bare" + bare_path.mkdir() + sha = "d" * 40 + + dep_ref = DependencyReference.parse("owner/repo/sub#main") + + def mock_execute_fail( + repo_url_base: str, bare_target: Path, *, dep_ref, clone_action, **kwargs + ) -> None: + # Simulate transport plan failure + raise Exception("Transport plan failed") + + with patch("apm_cli.deps.bare_cache.subprocess.run") as mock_run: + # rev-parse always returns 1 (SHA never present) + mock_run.return_value = MagicMock(returncode=1) + + result = fetch_sha_into_bare( + mock_execute_fail, + "https://github.com/owner/repo.git", + bare_path, + sha, + dep_ref=dep_ref, + ) + + assert result is False + + def test_fetch_action_uses_explicit_url_not_origin(self, tmp_path: Path) -> None: + """Fetch action uses the explicit URL from transport plan, not 'origin'.""" + from apm_cli.deps.bare_cache import fetch_sha_into_bare + from apm_cli.models.apm_package import DependencyReference + + bare_path = tmp_path / "bare" + bare_path.mkdir() + sha = "e" * 40 + + dep_ref = DependencyReference.parse("owner/repo/sub#main") + captured_actions: list = [] + + def mock_execute( + repo_url_base: str, bare_target: Path, *, dep_ref, clone_action, **kwargs + ) -> None: + captured_actions.append(clone_action) + + with patch("apm_cli.deps.bare_cache.subprocess.run") as mock_run: + mock_run.side_effect = [ + MagicMock(returncode=1), # SHA not present + MagicMock(returncode=0), # SHA present after fetch + ] + + fetch_sha_into_bare( + mock_execute, + "https://github.com/owner/repo.git", + bare_path, + sha, + dep_ref=dep_ref, + ) + + # Verify the action uses the explicit URL + captured_action = captured_actions[0] + explicit_url = "https://explicit.example.com/repo.git" + with patch("apm_cli.deps.bare_cache.subprocess.run") as mock_run_fetch: + mock_run_fetch.return_value = MagicMock(returncode=0) + captured_action(url=explicit_url, env={}, target=bare_path) + + call_args = mock_run_fetch.call_args[0][0] + assert explicit_url in call_args + # Verify "origin" is NOT used as a shorthand (remote URL is redacted) + assert "origin" not in call_args or call_args.index("origin") < call_args.index( + explicit_url + ) + # Ensure 'origin' shorthand is never used (remote URL is redacted) + for call_args_item in mock_run_fetch.call_args_list: + argv = call_args_item[0][0] if call_args_item[0] else call_args_item[1].get("args", []) + assert "origin" not in argv, f"Found 'origin' in fetch argv: {argv}" + + +# --------------------------------------------------------------------------- +# #1258 fix: SharedCloneCache repo-level reuse tests +# --------------------------------------------------------------------------- + + +class TestSharedCloneCacheRepoReuse: + """Tests for SharedCloneCache repo-level bare reuse via fetch_fn.""" + + def test_fetch_fn_reuses_existing_bare_for_different_sha(self, tmp_path: Path) -> None: + """fetch_fn allows reusing an existing bare for a different SHA.""" + cache = SharedCloneCache(base_dir=tmp_path) + clone_count_1 = {"n": 0} + clone_count_2 = {"n": 0} + + def clone_fn_1(target: Path) -> None: + clone_count_1["n"] += 1 + target.mkdir(parents=True, exist_ok=True) + (target / "HEAD").write_text("ref: refs/heads/main\n") + + def clone_fn_2(target: Path) -> None: + clone_count_2["n"] += 1 + target.mkdir(parents=True, exist_ok=True) + + def fetch_fn_mock(bare_path: Path, sha: str) -> bool: + # Simulate successful fetch + return True + + # First call: normal clone with "main" + path1 = cache.get_or_clone("github.com", "owner", "repo", "main", clone_fn_1) + + # Second call: fetch_fn should be tried for the SHA + sha = "a" * 40 + path2 = cache.get_or_clone( + "github.com", + "owner", + "repo", + sha, + clone_fn_2, + fetch_fn=fetch_fn_mock, + ) + + # Both should return the same path (bare reuse) + assert path1 == path2 + # clone_fn_1 called once, clone_fn_2 NOT called (fetch_fn succeeded) + assert clone_count_1["n"] == 1 + assert clone_count_2["n"] == 0 + cache.cleanup() + + def test_fetch_fn_failure_falls_through_to_fresh_clone(self, tmp_path: Path) -> None: + """fetch_fn returns False: fall through to fresh clone.""" + cache = SharedCloneCache(base_dir=tmp_path) + clone_count_1 = {"n": 0} + clone_count_2 = {"n": 0} + + def clone_fn_1(target: Path) -> None: + clone_count_1["n"] += 1 + target.mkdir(parents=True, exist_ok=True) + (target / "HEAD").write_text("ref: refs/heads/main\n") + + def clone_fn_2(target: Path) -> None: + clone_count_2["n"] += 1 + target.mkdir(parents=True, exist_ok=True) + + def fetch_fn_fail(bare_path: Path, sha: str) -> bool: + # Simulate fetch failure + return False + + # First call: normal clone + path1 = cache.get_or_clone("github.com", "owner", "repo", "main", clone_fn_1) + + # Second call: fetch_fn fails, should fall through to clone_fn_2 + sha = "b" * 40 + path2 = cache.get_or_clone( + "github.com", + "owner", + "repo", + sha, + clone_fn_2, + fetch_fn=fetch_fn_fail, + ) + + # Different paths (two separate bares) + assert path1 != path2 + # Both clone functions called once + assert clone_count_1["n"] == 1 + assert clone_count_2["n"] == 1 + cache.cleanup() + + def test_fetch_fn_exception_falls_through_to_fresh_clone(self, tmp_path: Path) -> None: + """fetch_fn raises: catch exception and fall through to fresh clone.""" + cache = SharedCloneCache(base_dir=tmp_path) + clone_count_1 = {"n": 0} + clone_count_2 = {"n": 0} + + def clone_fn_1(target: Path) -> None: + clone_count_1["n"] += 1 + target.mkdir(parents=True, exist_ok=True) + (target / "HEAD").write_text("ref: refs/heads/main\n") + + def clone_fn_2(target: Path) -> None: + clone_count_2["n"] += 1 + target.mkdir(parents=True, exist_ok=True) + + def fetch_fn_raises(bare_path: Path, sha: str) -> bool: + raise RuntimeError("Fetch failed unexpectedly") + + # First call: normal clone + path1 = cache.get_or_clone("github.com", "owner", "repo", "main", clone_fn_1) + + # Second call: fetch_fn raises, should be caught and fall through + sha = "c" * 40 + path2 = cache.get_or_clone( + "github.com", + "owner", + "repo", + sha, + clone_fn_2, + fetch_fn=fetch_fn_raises, + ) + + # Different paths (two separate bares) + assert path1 != path2 + # Both clone functions called once + assert clone_count_1["n"] == 1 + assert clone_count_2["n"] == 1 + cache.cleanup() + + def test_fetch_fn_called_for_non_sha_refs(self, tmp_path: Path) -> None: + """fetch_fn is called for any non-None ref (not just SHAs).""" + cache = SharedCloneCache(base_dir=tmp_path) + clone_count_1 = {"n": 0} + clone_count_2 = {"n": 0} + fetch_call_count = {"n": 0} + + def clone_fn_1(target: Path) -> None: + clone_count_1["n"] += 1 + target.mkdir(parents=True, exist_ok=True) + (target / "HEAD").write_text("ref: refs/heads/main\n") + + def clone_fn_2(target: Path) -> None: + clone_count_2["n"] += 1 + target.mkdir(parents=True, exist_ok=True) + + def fetch_fn_track(bare_path: Path, ref: str) -> bool: + fetch_call_count["n"] += 1 + return True + + # First call: clone with "main" + _ = cache.get_or_clone("github.com", "owner", "repo", "main", clone_fn_1) + + # Second call: with "develop" ref, fetch_fn should be called + _ = cache.get_or_clone( + "github.com", + "owner", + "repo", + "develop", + clone_fn_2, + fetch_fn=fetch_fn_track, + ) + + # fetch_fn should have been called + assert fetch_call_count["n"] == 1 + cache.cleanup() + + def test_fetch_fn_not_called_when_ref_is_none(self, tmp_path: Path) -> None: + """fetch_fn is not called when ref is None.""" + cache = SharedCloneCache(base_dir=tmp_path) + clone_count_1 = {"n": 0} + clone_count_2 = {"n": 0} + fetch_call_count = {"n": 0} + + def clone_fn_1(target: Path) -> None: + clone_count_1["n"] += 1 + target.mkdir(parents=True, exist_ok=True) + (target / "HEAD").write_text("ref: refs/heads/main\n") + + def clone_fn_2(target: Path) -> None: + clone_count_2["n"] += 1 + target.mkdir(parents=True, exist_ok=True) + + def fetch_fn_track(bare_path: Path, ref: str) -> bool: + fetch_call_count["n"] += 1 + return True + + # First call: clone with "main" + _ = cache.get_or_clone("github.com", "owner", "repo", "main", clone_fn_1) + + # Second call: with ref=None, fetch_fn should NOT be called + _ = cache.get_or_clone( + "github.com", + "owner", + "repo", + None, + clone_fn_2, + fetch_fn=fetch_fn_track, + ) + + # fetch_fn should NOT have been called (ref is None) + assert fetch_call_count["n"] == 0 + # Both clones called (no fetch opportunity) + assert clone_count_1["n"] == 1 + assert clone_count_2["n"] == 1 + cache.cleanup() + + def test_repo_bares_cleared_on_cleanup(self, tmp_path: Path) -> None: + """_find_repo_bare returns None after cleanup.""" + cache = SharedCloneCache(base_dir=tmp_path) + + def clone_fn(target: Path) -> None: + target.mkdir(parents=True, exist_ok=True) + (target / "HEAD").write_text("ref: refs/heads/main\n") + + # Clone to populate _repo_bares + path = cache.get_or_clone("github.com", "owner", "repo", "main", clone_fn) + assert path is not None + + # Verify _find_repo_bare finds it + found = cache._find_repo_bare("github.com", "owner", "repo") + assert found is not None + + # After cleanup, _find_repo_bare should return None + cache.cleanup() + found_after = cache._find_repo_bare("github.com", "owner", "repo") + assert found_after is None + + def test_fetch_fn_none_skips_tier0(self, tmp_path: Path) -> None: + """When fetch_fn is None, Tier-0 is skipped even if repo_bares has entries.""" + cache = SharedCloneCache(base_dir=tmp_path) + clone_called = threading.Event() + + def clone_fn(target: Path) -> None: + clone_called.set() + target.mkdir(parents=True, exist_ok=True) + (target / "HEAD").write_text("ref: refs/heads/main\n") + + # First clone populates _repo_bares + cache.get_or_clone("gh", "o", "r", "main", clone_fn) + + # Second call with fetch_fn=None should NOT try Tier-0 + clone2_called = threading.Event() + + def clone_fn2(target: Path) -> None: + clone2_called.set() + target.mkdir(parents=True, exist_ok=True) + (target / "HEAD").write_text("ref: refs/heads/main\n") + + cache.get_or_clone("gh", "o", "r", "dev", clone_fn2, fetch_fn=None) + assert clone2_called.is_set(), "Should have cloned fresh when fetch_fn=None" + cache.cleanup() + + +# --------------------------------------------------------------------------- +# #1258 fix: materialize_from_bare checkout target tests +# --------------------------------------------------------------------------- + + +class TestMaterializeCheckoutTarget: + """Tests for materialize_from_bare known_sha parameter.""" + + def test_checkout_uses_known_sha_when_provided(self, tmp_path: Path) -> None: + """materialize_from_bare uses known_sha for checkout when provided.""" + from apm_cli.deps.bare_cache import materialize_from_bare + + bare_path = tmp_path / "bare" + bare_path.mkdir() + consumer_dir = tmp_path / "consumer" + known_sha = "f" * 40 + + with patch("apm_cli.deps.bare_cache.subprocess.run") as mock_run: + # Mock all subprocess calls (clone, config, checkout) + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + + materialize_from_bare( + bare_path, + consumer_dir, + ref=None, + env={}, + known_sha=known_sha, + ) + + # Find the checkout call and verify it uses the known_sha + checkout_calls = [call for call in mock_run.call_args_list if call[0][0][-2] == "checkout"] + assert len(checkout_calls) > 0, "checkout command should have been called" + + # The checkout command should use the known_sha as the target, not HEAD + checkout_args = checkout_calls[0][0][0] + assert known_sha in checkout_args + + def test_checkout_uses_head_when_known_sha_is_none(self, tmp_path: Path) -> None: + """materialize_from_bare uses HEAD for checkout when known_sha is None.""" + from apm_cli.deps.bare_cache import materialize_from_bare + + bare_path = tmp_path / "bare" + bare_path.mkdir() + consumer_dir = tmp_path / "consumer" + + with patch("apm_cli.deps.bare_cache.subprocess.run") as mock_run: + # Mock all subprocess calls + mock_run.return_value = MagicMock(returncode=0, stdout="abc1234567890", stderr="") + + materialize_from_bare( + bare_path, + consumer_dir, + ref=None, + env={}, + known_sha=None, + ) + + # Find the checkout call and verify it uses HEAD + checkout_calls = [call for call in mock_run.call_args_list if call[0][0][-2] == "checkout"] + assert len(checkout_calls) > 0, "checkout command should have been called" + + checkout_args = checkout_calls[0][0][0] + assert "HEAD" in checkout_args