Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- Fixed `apm install` crash (exit code 128) when a mono-repo package depends on a sibling pinned to a non-HEAD commit; installs now resolve with a single in-place fetch, and multiple SHA-pinned references to the same repository share a single cached clone. (#1258)
- MCP server token injection now requires both an allowlisted server name and a verified HTTPS GitHub hostname, preventing PAT exfiltration via poisoned registry entries. (#1239)
- `apm marketplace add` accepts GitLab-class hosts (`gitlab.com` and self-managed instances configured via `GITLAB_HOST` / `APM_GITLAB_HOSTS`); unsupported generic hosts now show separate recovery hints for GHES (`GITHUB_HOST`) and self-managed GitLab instead of only `GITHUB_HOST`. (#1149)
- **GitLab monorepo marketplaces:** `apm install plugin@marketplace` now resolves plugins whose sources live in a subdirectory of the marketplace repository on GitLab-class hosts (`gitlab.com` and self-managed GitLab when classified as GitLab), matching explicit `git:` + `path:` semantics without requiring that hand-written object form. (#1149)
Expand Down
2 changes: 1 addition & 1 deletion docs/src/content/docs/guides/dependencies.md
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,7 @@ APM automatically retries failed HTTP requests with exponential backoff and jitt

#### Parallel Downloads

APM downloads packages in parallel using a thread pool, significantly reducing wall-clock time for large dependency trees. The concurrency level defaults to 4 and is configurable via `--parallel-downloads` (set to 0 to disable). For sibling subdirectory packages from the same monorepo and ref (e.g. two skills under `skills/` in `github/awesome-copilot`), APM clones the repo bare exactly once into a shared cache and materializes each consumer's working tree from that cache via `git clone --local --shared --no-checkout`. This eliminates redundant network fetches and prevents the parallel races that affected earlier sparse-checkout based fetches.
APM downloads packages in parallel using a thread pool, significantly reducing wall-clock time for large dependency trees. The concurrency level defaults to 4 and is configurable via `--parallel-downloads` (set to 0 to disable). For sibling subdirectory packages from the same monorepo and ref (e.g. two skills under `skills/` in `github/awesome-copilot`), APM clones the repo bare exactly once into a shared cache and materializes each consumer's working tree from that cache via `git clone --local --shared --no-checkout`. This eliminates redundant network fetches and prevents the parallel races that affected earlier sparse-checkout based fetches. When a transitive dependency pins a commit SHA that differs from the ref used for the initial clone, APM fetches that specific commit into the existing bare clone on demand rather than re-cloning.

### File Processing and Content Merging

Expand Down
190 changes: 189 additions & 1 deletion src/apm_cli/deps/bare_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from __future__ import annotations

import logging
import os
import subprocess
from collections.abc import Callable
from pathlib import Path
Expand All @@ -34,6 +35,8 @@
if TYPE_CHECKING:
from ..models.apm_package import DependencyReference

_log = logging.getLogger(__name__)


def _rmtree(path: Path) -> None:
"""Remove a directory tree, handling read-only files and brief Windows locks.
Expand Down Expand Up @@ -292,6 +295,190 @@ def _bare_action(url: str, env: dict[str, str], target: Path) -> None:
)


def fetch_sha_into_bare(
execute_transport_plan: Callable[..., None],
repo_url_base: str,
bare_path: Path,
sha: str,
*,
dep_ref: DependencyReference,
) -> bool:
"""Attempt to fetch a specific SHA into an existing bare repo.

Used to hydrate shallow bare clones that are missing a transitive
SHA-pinned commit. Three-step strategy:

1. **Check first** -- ``git rev-parse --verify <sha>^{commit}`` against
the bare. If the SHA is already present, returns ``True`` immediately
without any network I/O.
2. **Shallow fetch by SHA** (full 40-char SHAs only) -- invokes
``execute_transport_plan`` with a fetch action that runs
``git fetch <url> <sha>``. Uses the authenticated URL supplied by
the transport plan, NOT ``git fetch origin <sha>``, because
``remote.origin.url`` has been redacted to ``redacted://`` by
:func:`_scrub_bare_remote_url`. After the fetch, verifies with
``rev-parse --verify``. Returns ``True`` on success.
3. **Broaden shallow** -- invokes ``execute_transport_plan`` with a
fetch action that runs ``git fetch <url>`` (no ref argument),
broadening the shallow boundary to include all remote refs. After
the fetch, verifies with ``rev-parse --verify``. Returns ``True``
on success.

On any failure in steps 2 or 3, returns ``False`` so the caller can
fall back to a fresh bare clone.

Note: this function deliberately does NOT call ``git update-ref HEAD``
after a successful fetch. The consumer's :func:`materialize_from_bare`
handles SHA resolution independently via the ``known_sha`` parameter.

Args:
execute_transport_plan: Callable that orchestrates auth and protocol
fallback (typically ``self._execute_transport_plan``).
repo_url_base: Base repo URL (unauthenticated) passed to the
transport plan so it can inject credentials.
bare_path: Path to the existing bare repo on disk.
sha: The Git commit SHA to fetch.
dep_ref: Dependency reference used by the transport plan for
auth context.

Returns:
``True`` if the SHA is now present in the bare, ``False`` otherwise.
"""
from ..utils.git_env import get_git_executable

git_exe = get_git_executable()

def _rev_parse_present() -> bool:
"""Return True if sha is already reachable in the bare."""
try:
result = subprocess.run(
[
git_exe,
"--git-dir",
str(bare_path),
"rev-parse",
"--verify",
f"{sha}^{{commit}}",
],
capture_output=True,
timeout=10,
)
return result.returncode == 0
except Exception:
return False

def _scrub_fetch_head() -> None:
"""Truncate FETCH_HEAD to remove the token-embedded URL written by fetch."""
fetch_head = bare_path / "FETCH_HEAD"
try:
if fetch_head.exists():
fetch_head.write_text("")
except OSError as exc:
_log.warning(
"Failed to truncate FETCH_HEAD at %s: %s. Tokenized URL "
"may persist on disk until shared cache cleanup.",
fetch_head,
exc,
)

# Step 1: check first -- no network if SHA already present.
_log.debug("fetch_sha_into_bare: checking if %s is present in %s", sha[:12], bare_path)
if _rev_parse_present():
_log.debug("fetch_sha_into_bare: SHA %s already present, skipping fetch", sha[:12])
return True

# Step 2: shallow fetch by full SHA (only for full 40-char SHAs).
if len(sha) == 40:
_log.debug(
"fetch_sha_into_bare: attempting shallow fetch of %s into %s", sha[:12], bare_path
)

def _fetch_action_sha(url: str, env: dict[str, str], target: Path) -> None:
subprocess.run(
[git_exe, "-C", str(bare_path), "fetch", "--depth=1", url, sha],
env=env,
check=True,
capture_output=True,
timeout=300,
)

try:
execute_transport_plan(
repo_url_base,
bare_path,
dep_ref=dep_ref,
clone_action=_fetch_action_sha,
)
_scrub_fetch_head()
if _rev_parse_present():
_log.debug("fetch_sha_into_bare: shallow fetch of %s succeeded", sha[:12])
return True
except subprocess.CalledProcessError as exc:
stderr_text = ""
if exc.stderr:
stderr_text = exc.stderr.decode(errors="replace").strip()
_log.debug(
"fetch_sha_into_bare: shallow fetch of %s failed: %s",
sha[:12],
stderr_text,
)
except Exception:
_log.debug(
"fetch_sha_into_bare: shallow fetch of %s raised unexpected error",
sha[:12],
)

# Step 3: broaden shallow -- fetch all refs without a SHA argument.
# Depth is capped to avoid unbounded history download on large repos.
# Override via APM_BROAD_FETCH_DEPTH environment variable.
broad_depth = os.environ.get("APM_BROAD_FETCH_DEPTH", "50")
_log.info("Hydrating missing commit %s into shared bare for %s", sha[:12], repo_url_base)
_log.debug("fetch_sha_into_bare: broadening shallow in %s to find %s", bare_path, sha[:12])

def _fetch_action_broad(url: str, env: dict[str, str], target: Path) -> None:
subprocess.run(
[git_exe, "-C", str(bare_path), "fetch", f"--depth={broad_depth}", url],
env=env,
check=True,
capture_output=True,
timeout=300,
)
Comment on lines +431 to +445
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in a2b78f5. The broad fetch now uses --depth=50 (configurable via APM_BROAD_FETCH_DEPTH env var) instead of unbounded fetch. This brings in enough history to resolve most ancestor SHAs without downloading the entire repo.


try:
execute_transport_plan(
repo_url_base,
bare_path,
dep_ref=dep_ref,
clone_action=_fetch_action_broad,
)
_scrub_fetch_head()
if _rev_parse_present():
_log.debug("fetch_sha_into_bare: broad fetch succeeded, %s now present", sha[:12])
return True
except subprocess.CalledProcessError as exc:
stderr_text = ""
if exc.stderr:
stderr_text = exc.stderr.decode(errors="replace").strip()
_log.debug(
"fetch_sha_into_bare: broad fetch failed for %s in %s: %s",
sha[:12],
bare_path,
stderr_text,
)
except Exception:
_log.debug(
"fetch_sha_into_bare: broad fetch raised unexpected error for %s",
sha[:12],
)

_log.debug(
"fetch_sha_into_bare: all fetch attempts exhausted for %s in %s",
sha[:12],
bare_path,
)
return False


def materialize_from_bare(
bare_path: Path,
consumer_dir: Path,
Expand Down Expand Up @@ -389,8 +576,9 @@ def materialize_from_bare(
env=env,
check=False,
)
checkout_target = known_sha or "HEAD"
subprocess.run(
[git_exe, "-C", str(consumer_dir), "checkout", "HEAD"],
[git_exe, "-C", str(consumer_dir), "checkout", checkout_target],
capture_output=True,
text=True,
timeout=60,
Expand Down
32 changes: 31 additions & 1 deletion src/apm_cli/deps/github_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from .bare_cache import (
bare_clone_with_fallback,
clone_with_fallback,
fetch_sha_into_bare,
materialize_from_bare,
)
from .download_strategies import DownloadDelegate
Expand Down Expand Up @@ -580,6 +581,22 @@ def _materialize_from_bare(
"""Thin delegate to :func:`bare_cache.materialize_from_bare` (kept on the class so test patches still work)."""
return materialize_from_bare(bare_path, consumer_dir, ref=ref, env=env, known_sha=known_sha)

def _fetch_sha_into_bare(
self,
bare_path: Path,
sha: str,
*,
dep_ref: "DependencyReference",
) -> bool:
"""Thin delegate to :func:`bare_cache.fetch_sha_into_bare` (kept on the class so test patches still work)."""
return fetch_sha_into_bare(
self._execute_transport_plan,
dep_ref.repo_url,
bare_path,
sha,
dep_ref=dep_ref,
)

@staticmethod
def _parse_ls_remote_output(output: str) -> list[RemoteRef]:
"""Backward-compat stub -- delegates to git_remote_ops."""
Expand Down Expand Up @@ -1093,9 +1110,22 @@ def _shared_bare_clone_fn(bare_target: Path) -> None:
is_commit_sha=bool(is_commit_sha),
)

def _shared_bare_fetch_fn(existing_bare: Path, ref_or_sha: str) -> bool:
# get_or_clone passes `ref` here; for SHA pins it is the SHA.
return self._fetch_sha_into_bare(
existing_bare,
ref_or_sha,
dep_ref=dep_ref,
)

try:
shared_bare_path = shared_cache.get_or_clone(
cache_host, cache_owner, cache_repo, ref, _shared_bare_clone_fn
cache_host,
cache_owner,
cache_repo,
ref,
_shared_bare_clone_fn,
fetch_fn=_shared_bare_fetch_fn if is_commit_sha else None,
)
except Exception as e:
raise RuntimeError(f"Failed to clone repository: {e}") from e
Expand Down
Loading
Loading