Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- `apm install` now fetches missing SHA-pinned commits into existing shallow bare clones on demand, fixing mono-repo transitive dependencies that reference sibling packages at a commit other than HEAD. (#1258)
- `apm marketplace add` accepts GitLab-class hosts (`gitlab.com` and self-managed instances configured via `GITLAB_HOST` / `APM_GITLAB_HOSTS`); unsupported generic hosts now show separate recovery hints for GHES (`GITHUB_HOST`) and self-managed GitLab instead of only `GITHUB_HOST`. (#1149)
- **GitLab monorepo marketplaces:** `apm install plugin@marketplace` now resolves plugins whose sources live in a subdirectory of the marketplace repository on GitLab-class hosts (`gitlab.com` and self-managed GitLab when classified as GitLab), matching explicit `git:` + `path:` semantics without requiring that hand-written object form. (#1149)
- `apm install` now rejects unsupported flat-format `dependencies` (e.g. `dependencies: [owner/repo]`) with a clear error and structured-format hint instead of silently ignoring them; the resolver also surfaces `ValueError` from malformed transitive manifests as warnings instead of swallowing them. (#1189)
Expand Down
163 changes: 162 additions & 1 deletion src/apm_cli/deps/bare_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,166 @@ def _bare_action(url: str, env: dict[str, str], target: Path) -> None:
)


def fetch_sha_into_bare(
execute_transport_plan: Callable[..., None],
repo_url_base: str,
bare_path: Path,
sha: str,
*,
dep_ref: DependencyReference,
) -> bool:
"""Attempt to fetch a specific SHA into an existing bare repo.

Used to hydrate shallow bare clones that are missing a transitive
SHA-pinned commit. Three-step strategy:

1. **Check first** -- ``git rev-parse --verify <sha>^{commit}`` against
the bare. If the SHA is already present, returns ``True`` immediately
without any network I/O.
2. **Shallow fetch by SHA** (full 40-char SHAs only) -- invokes
``execute_transport_plan`` with a fetch action that runs
``git fetch <url> <sha>``. Uses the authenticated URL supplied by
the transport plan, NOT ``git fetch origin <sha>``, because
``remote.origin.url`` has been redacted to ``redacted://`` by
:func:`_scrub_bare_remote_url`. After the fetch, verifies with
``rev-parse --verify``. Returns ``True`` on success.
3. **Broaden shallow** -- invokes ``execute_transport_plan`` with a
fetch action that runs ``git fetch <url>`` (no ref argument),
broadening the shallow boundary to include all remote refs. After
the fetch, verifies with ``rev-parse --verify``. Returns ``True``
on success.

On any failure in steps 2 or 3, returns ``False`` so the caller can
fall back to a fresh bare clone.

Note: this function deliberately does NOT call ``git update-ref HEAD``
after a successful fetch. The consumer's :func:`materialize_from_bare`
handles SHA resolution independently via the ``known_sha`` parameter.

Args:
execute_transport_plan: Callable that orchestrates auth and protocol
fallback (typically ``self._execute_transport_plan``).
repo_url_base: Base repo URL (unauthenticated) passed to the
transport plan so it can inject credentials.
bare_path: Path to the existing bare repo on disk.
sha: The Git commit SHA to fetch.
dep_ref: Dependency reference used by the transport plan for
auth context.

Returns:
``True`` if the SHA is now present in the bare, ``False`` otherwise.
"""
from ..utils.git_env import get_git_executable

_log = logging.getLogger(__name__)
git_exe = get_git_executable()

def _rev_parse_present() -> bool:
"""Return True if sha is already reachable in the bare."""
try:
result = subprocess.run(
[
git_exe,
"--git-dir",
str(bare_path),
"rev-parse",
"--verify",
f"{sha}^{{commit}}",
],
capture_output=True,
timeout=10,
)
return result.returncode == 0
except Exception:
return False

# Step 1: check first -- no network if SHA already present.
_log.debug("fetch_sha_into_bare: checking if %s is present in %s", sha[:12], bare_path)
if _rev_parse_present():
_log.debug("fetch_sha_into_bare: SHA %s already present, skipping fetch", sha[:12])
return True

# Step 2: shallow fetch by full SHA (only for full 40-char SHAs).
if len(sha) == 40:
_log.debug(
"fetch_sha_into_bare: attempting shallow fetch of %s into %s", sha[:12], bare_path
)

def _fetch_action_sha(url: str, env: dict[str, str], target: Path) -> None:
subprocess.run(
[git_exe, "-C", str(bare_path), "fetch", "--depth=1", url, sha],
env=env,
check=True,
capture_output=True,
timeout=300,
)

try:
execute_transport_plan(
repo_url_base,
bare_path,
dep_ref=dep_ref,
clone_action=_fetch_action_sha,
)
if _rev_parse_present():
_log.debug("fetch_sha_into_bare: shallow fetch of %s succeeded", sha[:12])
return True
except subprocess.CalledProcessError:
_log.debug(
"fetch_sha_into_bare: shallow fetch of %s failed, broadening shallow",
sha[:12],
)
except Exception:
_log.debug(
"fetch_sha_into_bare: shallow fetch of %s raised unexpected error",
sha[:12],
exc_info=True,
)

# Step 3: broaden shallow -- fetch all refs without a SHA argument.
_log.debug("fetch_sha_into_bare: broadening shallow in %s to find %s", bare_path, sha[:12])

def _fetch_action_broad(url: str, env: dict[str, str], target: Path) -> None:
subprocess.run(
[git_exe, "-C", str(bare_path), "fetch", url],
env=env,
check=True,
capture_output=True,
timeout=300,
)
Comment on lines +431 to +445
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in a2b78f5. The broad fetch now uses --depth=50 (configurable via APM_BROAD_FETCH_DEPTH env var) instead of unbounded fetch. This brings in enough history to resolve most ancestor SHAs without downloading the entire repo.


try:
execute_transport_plan(
repo_url_base,
bare_path,
dep_ref=dep_ref,
clone_action=_fetch_action_broad,
)
if _rev_parse_present():
_log.debug("fetch_sha_into_bare: broad fetch succeeded, %s now present", sha[:12])
return True
except subprocess.CalledProcessError:
_log.debug(
"fetch_sha_into_bare: broad fetch failed for %s in %s",
sha[:12],
bare_path,
)
except Exception:
_log.debug(
"fetch_sha_into_bare: broad fetch raised unexpected error for %s",
sha[:12],
exc_info=True,
)

_log.debug(
"fetch_sha_into_bare: all fetch attempts exhausted for %s in %s; "
"caller should fall back to a fresh clone",
sha[:12],
bare_path,
)
return False


def materialize_from_bare(
bare_path: Path,
consumer_dir: Path,
Expand Down Expand Up @@ -389,8 +549,9 @@ def materialize_from_bare(
env=env,
check=False,
)
checkout_target = known_sha or "HEAD"
subprocess.run(
[git_exe, "-C", str(consumer_dir), "checkout", "HEAD"],
[git_exe, "-C", str(consumer_dir), "checkout", checkout_target],
capture_output=True,
text=True,
timeout=60,
Expand Down
31 changes: 30 additions & 1 deletion src/apm_cli/deps/github_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from .bare_cache import (
bare_clone_with_fallback,
clone_with_fallback,
fetch_sha_into_bare,
materialize_from_bare,
)
from .download_strategies import DownloadDelegate
Expand Down Expand Up @@ -578,6 +579,22 @@ def _materialize_from_bare(
"""Thin delegate to :func:`bare_cache.materialize_from_bare` (kept on the class so test patches still work)."""
return materialize_from_bare(bare_path, consumer_dir, ref=ref, env=env, known_sha=known_sha)

def _fetch_sha_into_bare(
self,
bare_path: Path,
sha: str,
*,
dep_ref: "DependencyReference",
) -> bool:
"""Thin delegate to :func:`bare_cache.fetch_sha_into_bare` (kept on the class so test patches still work)."""
return fetch_sha_into_bare(
self._execute_transport_plan,
dep_ref.repo_url,
bare_path,
sha,
dep_ref=dep_ref,
)

@staticmethod
def _parse_ls_remote_output(output: str) -> list[RemoteRef]:
"""Backward-compat stub -- delegates to git_remote_ops."""
Expand Down Expand Up @@ -1091,9 +1108,21 @@ def _shared_bare_clone_fn(bare_target: Path) -> None:
is_commit_sha=bool(is_commit_sha),
)

def _shared_bare_fetch_fn(existing_bare: Path, sha: str) -> bool:
return self._fetch_sha_into_bare(
existing_bare,
sha,
dep_ref=dep_ref,
)

try:
shared_bare_path = shared_cache.get_or_clone(
cache_host, cache_owner, cache_repo, ref, _shared_bare_clone_fn
cache_host,
cache_owner,
cache_repo,
ref,
_shared_bare_clone_fn,
fetch_fn=_shared_bare_fetch_fn if is_commit_sha else None,
)
except Exception as e:
raise RuntimeError(f"Failed to clone repository: {e}") from e
Expand Down
60 changes: 60 additions & 0 deletions src/apm_cli/deps/shared_clone_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ def __init__(self, base_dir: Path | None = None) -> None:
# Maps cache_key -> _CacheEntry
self._entries: dict[tuple[str, str, str, str | None], _CacheEntry] = {}
self._temp_dirs: list[str] = []
# Maps (host, owner, repo) -> list of (ref, bare_path) tuples.
# Used to locate an existing bare for the same repo when a new ref
# (typically a SHA pin on a transitive dep) is requested.
self._repo_bares: dict[tuple[str, str, str], list[tuple[str | None, Path]]] = {}

def __enter__(self) -> "SharedCloneCache":
return self
Expand All @@ -65,6 +69,7 @@ def get_or_clone(
repo: str,
ref: str | None,
clone_fn: Callable[[Path], None],
fetch_fn: Callable[[Path, str], bool] | None = None,
) -> Path:
"""Return a path to a shared clone, cloning on first access.

Expand All @@ -76,6 +81,11 @@ def get_or_clone(
clone_fn: Callable that performs the clone into the given
directory. Called at most once per unique key. Must
raise on failure so the entry is not cached.
fetch_fn: Optional callable ``(bare_path, sha) -> bool`` that
tries to fetch a missing SHA into an already-cloned bare
for the same repo (any ref). When provided and a suitable
bare exists, it is tried before falling back to a fresh
clone. Must not raise -- return False to signal failure.

Returns:
Path to the cloned repo directory.
Expand All @@ -94,6 +104,28 @@ def get_or_clone(
# A previous attempt failed. Clear error to allow retry.
entry.error = None

# Tier-0: try fetching the SHA into an existing bare for the
# same repo (different ref). This avoids a fresh network clone
# when a transitive dep pins a SHA that is missing only because
# the initial shallow bare did not include that commit.
if ref and fetch_fn:
existing_bare = self._find_repo_bare(host, owner, repo)
if existing_bare is not None:
try:
if fetch_fn(existing_bare, ref):
entry.path = existing_bare
return existing_bare
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The API is safe-by-default: fetch_fn is only supplied by GitHubPackageDownloader._shared_bare_fetch_fn which is wired only for 40-char hex SHA refs (gated by is_commit_sha()). The get_or_clone parameter is intentionally named fetch_fn (not sha_fetch_fn) to keep the API general, but the only caller guards it. Added test test_fetch_fn_none_skips_tier0 to verify Tier-0 is bypassed when fetch_fn=None.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in a2b78f5. Added _bare_fetch_locks: dict[Path, threading.Lock] to SharedCloneCache. Before invoking fetch_fn, the code acquires a per-bare-path lock (created under self._lock), serialising concurrent fetches into the same bare while still allowing parallel fetches to different bares. Locks are cleared in cleanup().

except Exception:
_log.debug(
"Fetch into existing bare failed for %s/%s/%s ref=%s, "
"falling through to fresh clone",
host,
owner,
repo,
ref,
exc_info=True,
)

# First caller (or retry after failure): perform the clone.
temp_dir = tempfile.mkdtemp(
dir=str(self._base_dir) if self._base_dir else None,
Expand Down Expand Up @@ -121,11 +153,38 @@ def get_or_clone(
f".git/ present: {git_dir.exists()})"
)
entry.path = clone_path
with self._lock:
repo_key = (host, owner, repo)
if repo_key not in self._repo_bares:
self._repo_bares[repo_key] = []
self._repo_bares[repo_key].append((ref, clone_path))
return clone_path
except Exception as exc:
entry.error = exc
raise

def _find_repo_bare(self, host: str, owner: str, repo: str) -> Path | None:
"""Return an existing bare path for the same repo (any ref), or None.

Searches the reverse index populated after each successful clone.
Returns the path of the first registered bare for ``(host, owner,
repo)`` regardless of which ref it was originally cloned at.

Args:
host: Git host (e.g. "github.com").
owner: Repository owner.
repo: Repository name.

Returns:
A :class:`Path` to an existing bare, or ``None`` if none is
registered yet.
"""
with self._lock:
entries = self._repo_bares.get((host, owner, repo))
if entries:
return entries[0][1]
return None

def _get_or_create_entry(self, key: tuple) -> "_CacheEntry":
"""Retrieve or create a cache entry (thread-safe)."""
with self._lock:
Expand All @@ -139,6 +198,7 @@ def cleanup(self) -> None:
dirs_to_remove = list(self._temp_dirs)
self._temp_dirs.clear()
self._entries.clear()
self._repo_bares.clear()
for d in dirs_to_remove:
try:
shutil.rmtree(d, ignore_errors=True)
Expand Down
Loading
Loading