-
Notifications
You must be signed in to change notification settings - Fork 171
fix(deps): fetch missing SHA-pinned commits into shallow bare clones #1259
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,6 +51,10 @@ def __init__(self, base_dir: Path | None = None) -> None: | |
| # Maps cache_key -> _CacheEntry | ||
| self._entries: dict[tuple[str, str, str, str | None], _CacheEntry] = {} | ||
| self._temp_dirs: list[str] = [] | ||
| # Maps (host, owner, repo) -> list of (ref, bare_path) tuples. | ||
| # Used to locate an existing bare for the same repo when a new ref | ||
| # (typically a SHA pin on a transitive dep) is requested. | ||
| self._repo_bares: dict[tuple[str, str, str], list[tuple[str | None, Path]]] = {} | ||
|
|
||
| def __enter__(self) -> "SharedCloneCache": | ||
| return self | ||
|
|
@@ -65,6 +69,7 @@ def get_or_clone( | |
| repo: str, | ||
| ref: str | None, | ||
| clone_fn: Callable[[Path], None], | ||
| fetch_fn: Callable[[Path, str], bool] | None = None, | ||
| ) -> Path: | ||
| """Return a path to a shared clone, cloning on first access. | ||
|
|
||
|
|
@@ -76,6 +81,11 @@ def get_or_clone( | |
| clone_fn: Callable that performs the clone into the given | ||
| directory. Called at most once per unique key. Must | ||
| raise on failure so the entry is not cached. | ||
| fetch_fn: Optional callable ``(bare_path, sha) -> bool`` that | ||
| tries to fetch a missing SHA into an already-cloned bare | ||
| for the same repo (any ref). When provided and a suitable | ||
| bare exists, it is tried before falling back to a fresh | ||
| clone. Must not raise -- return False to signal failure. | ||
|
|
||
| Returns: | ||
| Path to the cloned repo directory. | ||
|
|
@@ -94,6 +104,28 @@ def get_or_clone( | |
| # A previous attempt failed. Clear error to allow retry. | ||
| entry.error = None | ||
|
|
||
| # Tier-0: try fetching the SHA into an existing bare for the | ||
| # same repo (different ref). This avoids a fresh network clone | ||
| # when a transitive dep pins a SHA that is missing only because | ||
| # the initial shallow bare did not include that commit. | ||
| if ref and fetch_fn: | ||
| existing_bare = self._find_repo_bare(host, owner, repo) | ||
| if existing_bare is not None: | ||
| try: | ||
| if fetch_fn(existing_bare, ref): | ||
| entry.path = existing_bare | ||
| return existing_bare | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The API is safe-by-default:
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in a2b78f5. Added |
||
| except Exception: | ||
| _log.debug( | ||
| "Fetch into existing bare failed for %s/%s/%s ref=%s, " | ||
| "falling through to fresh clone", | ||
| host, | ||
| owner, | ||
| repo, | ||
| ref, | ||
| exc_info=True, | ||
| ) | ||
|
|
||
| # First caller (or retry after failure): perform the clone. | ||
| temp_dir = tempfile.mkdtemp( | ||
| dir=str(self._base_dir) if self._base_dir else None, | ||
|
|
@@ -121,11 +153,38 @@ def get_or_clone( | |
| f".git/ present: {git_dir.exists()})" | ||
| ) | ||
| entry.path = clone_path | ||
| with self._lock: | ||
| repo_key = (host, owner, repo) | ||
| if repo_key not in self._repo_bares: | ||
| self._repo_bares[repo_key] = [] | ||
| self._repo_bares[repo_key].append((ref, clone_path)) | ||
| return clone_path | ||
| except Exception as exc: | ||
| entry.error = exc | ||
| raise | ||
|
|
||
| def _find_repo_bare(self, host: str, owner: str, repo: str) -> Path | None: | ||
| """Return an existing bare path for the same repo (any ref), or None. | ||
|
|
||
| Searches the reverse index populated after each successful clone. | ||
| Returns the path of the first registered bare for ``(host, owner, | ||
| repo)`` regardless of which ref it was originally cloned at. | ||
|
|
||
| Args: | ||
| host: Git host (e.g. "github.com"). | ||
| owner: Repository owner. | ||
| repo: Repository name. | ||
|
|
||
| Returns: | ||
| A :class:`Path` to an existing bare, or ``None`` if none is | ||
| registered yet. | ||
| """ | ||
| with self._lock: | ||
| entries = self._repo_bares.get((host, owner, repo)) | ||
| if entries: | ||
| return entries[0][1] | ||
| return None | ||
|
|
||
| def _get_or_create_entry(self, key: tuple) -> "_CacheEntry": | ||
| """Retrieve or create a cache entry (thread-safe).""" | ||
| with self._lock: | ||
|
|
@@ -139,6 +198,7 @@ def cleanup(self) -> None: | |
| dirs_to_remove = list(self._temp_dirs) | ||
| self._temp_dirs.clear() | ||
| self._entries.clear() | ||
| self._repo_bares.clear() | ||
| for d in dirs_to_remove: | ||
| try: | ||
| shutil.rmtree(d, ignore_errors=True) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in a2b78f5. The broad fetch now uses
--depth=50(configurable viaAPM_BROAD_FETCH_DEPTHenv var) instead of unbounded fetch. This brings in enough history to resolve most ancestor SHAs without downloading the entire repo.