diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 60a96e852..cb0abb97e 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -126,6 +126,7 @@ apm install [PACKAGES...] [OPTIONS] - `--verbose` - Show detailed installation information - `--trust-transitive-mcp` - Trust self-defined MCP servers from transitive packages (skip re-declaration requirement) + **Behavior:** - `apm install` (no args): Installs **all** packages from `apm.yml` - `apm install `: Installs **only** the specified package (adds to `apm.yml` if not present) diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index 0db08851c..72c5d3107 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -38,6 +38,18 @@ ) +_GIT_FETCH_PROGRESS_RE = re.compile( + r"(?:remote:\s*)?(Counting objects|Compressing objects|Receiving objects|Resolving deltas):\s+(\d+)%" +) + +_GIT_FETCH_STAGE_RANGES = { + 'Counting objects': (0, 5), + 'Compressing objects': (5, 15), + 'Receiving objects': (15, 90), + 'Resolving deltas': (90, 100), +} + + def normalize_collection_path(virtual_path: str) -> str: """Normalize a collection virtual path by stripping any existing extension. @@ -999,12 +1011,7 @@ def download_collection_package(self, dep_ref: DependencyReference, target_path: failed_items = [] total_items = len(manifest.items) - for idx, item in enumerate(manifest.items): - # Update progress for each item - if progress_obj and progress_task_id is not None: - progress_percent = 20 + int((idx / total_items) * 70) # 20% to 90% - progress_obj.update(progress_task_id, completed=progress_percent, total=100) - + for idx, item in enumerate(manifest.items, start=1): try: # Download the file item_content = self.download_raw_file(dep_ref, item.path, ref) @@ -1026,7 +1033,10 @@ def download_collection_package(self, dep_ref: DependencyReference, target_path: except RuntimeError as e: # Log the failure but continue with other items failed_items.append(f"{item.path} ({e})") - continue + finally: + if progress_obj and progress_task_id is not None: + progress_percent = 20 + int((idx / total_items) * 70) # 20% to 90% + progress_obj.update(progress_task_id, completed=progress_percent, total=100) # Check if we downloaded at least some items if downloaded_count == 0: @@ -1079,7 +1089,15 @@ def download_collection_package(self, dep_ref: DependencyReference, target_path: dependency_ref=dep_ref # Store for canonical dependency string ) - def _try_sparse_checkout(self, dep_ref: DependencyReference, temp_clone_path: Path, subdir_path: str, ref: str = None) -> bool: + def _try_sparse_checkout( + self, + dep_ref: DependencyReference, + temp_clone_path: Path, + subdir_path: str, + ref: str = None, + progress_task_id=None, + progress_obj=None, + ) -> bool: """Attempt sparse-checkout to download only a subdirectory (git 2.25+). Returns True on success. Falls back silently on failure. @@ -1089,24 +1107,41 @@ def _try_sparse_checkout(self, dep_ref: DependencyReference, temp_clone_path: Pa temp_clone_path.mkdir(parents=True, exist_ok=True) env = {**os.environ, **(self.git_env or {})} auth_url = self._build_repo_url(dep_ref.repo_url, use_ssh=False, dep_ref=dep_ref) - cmds = [ ['git', 'init'], ['git', 'remote', 'add', 'origin', auth_url], ['git', 'sparse-checkout', 'init', '--cone'], ['git', 'sparse-checkout', 'set', subdir_path], ] - fetch_cmd = ['git', 'fetch', 'origin'] + fetch_cmd = ['git', 'fetch', '--progress', 'origin'] fetch_cmd.append(ref or 'HEAD') fetch_cmd.append('--depth=1') cmds.append(fetch_cmd) cmds.append(['git', 'checkout', 'FETCH_HEAD']) - for cmd in cmds: - result = subprocess.run( - cmd, cwd=str(temp_clone_path), env=env, - capture_output=True, text=True, timeout=120, - ) + for idx, cmd in enumerate(cmds, start=1): + is_fetch_step = cmd[:3] == ['git', 'fetch', '--progress'] + if is_fetch_step: + fetch_start = self._get_sparse_checkout_progress_checkpoint(idx - 1, len(cmds)) + fetch_end = self._get_sparse_checkout_progress_checkpoint(idx, len(cmds)) + result = self._run_sparse_fetch_with_progress( + cmd, + temp_clone_path, + env, + fetch_start, + fetch_end, + progress_task_id=progress_task_id, + progress_obj=progress_obj, + ) + else: + if progress_obj and progress_task_id is not None: + progress_percent = self._get_sparse_checkout_progress_checkpoint(idx, len(cmds)) + progress_obj.update(progress_task_id, completed=progress_percent, total=100) + + result = subprocess.run( + cmd, cwd=str(temp_clone_path), env=env, + capture_output=True, text=True, timeout=120, + ) if result.returncode != 0: _debug(f"Sparse-checkout step failed ({' '.join(cmd)}): {result.stderr.strip()}") return False @@ -1116,6 +1151,105 @@ def _try_sparse_checkout(self, dep_ref: DependencyReference, temp_clone_path: Pa _debug(f"Sparse-checkout failed: {e}") return False + def _get_sparse_checkout_progress_checkpoint(self, step_index: int, total_steps: int) -> int: + """Map sparse-checkout step boundaries into the install progress range.""" + return 20 + int((step_index / total_steps) * 50) + + def _parse_git_fetch_progress(self, line: str) -> Optional[int]: + """Parse a git fetch progress line into a monotonic 0-100 fetch percentage.""" + match = _GIT_FETCH_PROGRESS_RE.search(line) + if not match: + return None + + stage, percent_text = match.groups() + stage_start, stage_end = _GIT_FETCH_STAGE_RANGES[stage] + stage_percent = max(0, min(int(percent_text), 100)) + return stage_start + int(((stage_end - stage_start) * stage_percent) / 100) + + def _run_sparse_fetch_with_progress( + self, + cmd: list[str], + temp_clone_path: Path, + env: Dict[str, str], + fetch_start: int, + fetch_end: int, + progress_task_id=None, + progress_obj=None, + ): + """Run `git fetch` and translate native progress output into Rich updates.""" + import subprocess + import threading + + stderr_lines: list[str] = [] + last_completed = fetch_start + + def handle_progress_line(line: str) -> None: + nonlocal last_completed + + stderr_lines.append(line) + + fetch_percent = self._parse_git_fetch_progress(line) + if fetch_percent is None or not progress_obj or progress_task_id is None: + return + + mapped_progress = fetch_start + int(((fetch_end - fetch_start) * fetch_percent) / 100) + if mapped_progress > last_completed: + progress_obj.update(progress_task_id, completed=mapped_progress, total=100) + last_completed = mapped_progress + + process = subprocess.Popen( + cmd, + cwd=str(temp_clone_path), + env=env, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True, + bufsize=1, + ) + + def consume_stderr() -> None: + buffer = '' + stderr_pipe = process.stderr + if stderr_pipe is None: + return + + while True: + chunk = stderr_pipe.read(1) + if not chunk: + break + + if chunk in {'\r', '\n'}: + line = buffer.strip() + if line: + handle_progress_line(line) + buffer = '' + continue + + buffer += chunk + + line = buffer.strip() + if line: + handle_progress_line(line) + + stderr_thread = threading.Thread(target=consume_stderr, daemon=True) + stderr_thread.start() + + try: + process.wait(timeout=120) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + stderr_thread.join(timeout=1) + raise + + stderr_thread.join(timeout=1) + stderr_text = '\n'.join(stderr_lines) + + if process.returncode == 0 and progress_obj and progress_task_id is not None and last_completed < fetch_end: + progress_obj.update(progress_task_id, completed=fetch_end, total=100) + + return subprocess.CompletedProcess(cmd, process.returncode, stdout='', stderr=stderr_text) + def download_subdirectory_package(self, dep_ref: DependencyReference, target_path: Path, progress_task_id=None, progress_obj=None) -> PackageInfo: """Download a subdirectory from a repo as an APM package. @@ -1159,7 +1293,14 @@ def download_subdirectory_package(self, dep_ref: DependencyReference, target_pat progress_obj.update(progress_task_id, completed=20, total=100) # Phase 4 (#171): Try sparse-checkout first (git 2.25+), fall back to full clone - sparse_ok = self._try_sparse_checkout(dep_ref, temp_clone_path, subdir_path, ref) + sparse_ok = self._try_sparse_checkout( + dep_ref, + temp_clone_path, + subdir_path, + ref, + progress_task_id=progress_task_id, + progress_obj=progress_obj, + ) if not sparse_ok: # Full clone fallback diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index 67d634f07..708414fa8 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -1,11 +1,13 @@ """Tests for GitHub package downloader.""" +import io import os import pytest import tempfile import shutil from pathlib import Path -from unittest.mock import Mock, patch, MagicMock +from types import SimpleNamespace +from unittest.mock import Mock, call, patch, MagicMock from urllib.parse import urlparse import requests as requests_lib @@ -997,6 +999,189 @@ def setup_subdir(*args, **kwargs): with pytest.raises(RuntimeError, match="Failed to checkout commit"): downloader.download_subdirectory_package(dep_ref, target) + @patch('apm_cli.deps.github_downloader.Repo') + @patch('apm_cli.deps.github_downloader.validate_apm_package') + def test_subdirectory_download_passes_progress_context_to_sparse_checkout(self, mock_validate, mock_repo_class): + """Subdirectory downloads should pass the active progress context into sparse checkout.""" + dep_ref = self._make_dep_ref(ref='main') + + mock_repo = Mock() + mock_repo.head.commit.hexsha = 'abc123' + mock_repo_class.return_value = mock_repo + + mock_validation = ValidationResult() + mock_validation.is_valid = True + mock_validation.package = APMPackage(name='my-skill', version='1.0.0') + mock_validate.return_value = mock_validation + + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + + progress_obj = Mock() + + def sparse_success(dep_arg, clone_path, subdir_path, ref_arg, progress_task_id=None, progress_obj=None): + subdir = clone_path / 'packages' / 'my-skill' + subdir.mkdir(parents=True) + (subdir / 'apm.yml').write_text('name: my-skill\nversion: 1.0.0\n') + return True + + with patch.object(downloader, '_try_sparse_checkout', side_effect=sparse_success) as mock_sparse, \ + patch.object(downloader, '_clone_with_fallback') as mock_clone: + target = self.temp_dir / 'my-skill' + downloader.download_subdirectory_package(dep_ref, target, progress_task_id=17, progress_obj=progress_obj) + + assert mock_sparse.call_args.args[0] is dep_ref + assert mock_sparse.call_args.args[2] == 'packages/my-skill' + assert mock_sparse.call_args.args[3] == 'main' + assert mock_sparse.call_args.kwargs == { + 'progress_task_id': 17, + 'progress_obj': progress_obj, + } + mock_clone.assert_not_called() + + +class TestDownloadProgressReporting: + """Regression tests for install/update progress sequencing.""" + + def setup_method(self): + self.temp_dir = Path(tempfile.mkdtemp()) + + def teardown_method(self): + if self.temp_dir.exists(): + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def _make_sparse_dep_ref(self, ref='main'): + return DependencyReference( + repo_url='owner/monorepo', + host='github.com', + reference=ref, + virtual_path='packages/my-skill', + is_virtual=True, + ) + + def test_parse_git_fetch_progress_handles_known_and_unknown_lines(self): + """Git fetch progress parsing should recognize known stages and ignore noise.""" + downloader = GitHubPackageDownloader() + + assert downloader._parse_git_fetch_progress('remote: Counting objects: 50% (5/10)') == 2 + assert downloader._parse_git_fetch_progress('Receiving objects: 50% (5/10)') == 52 + assert downloader._parse_git_fetch_progress('Resolving deltas: 100% (5/5), done.') == 100 + assert downloader._parse_git_fetch_progress('Total 10 (delta 0), reused 0 (delta 0)') is None + + @patch('subprocess.run') + def test_try_sparse_checkout_always_uses_native_fetch_progress(self, mock_run): + """Sparse checkout should always use native git fetch progress parsing.""" + mock_run.return_value = SimpleNamespace(returncode=0, stderr='') + + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + + with patch.object(downloader, '_build_repo_url', return_value='https://github.com/owner/monorepo.git'), \ + patch.object(downloader, '_run_sparse_fetch_with_progress', return_value=SimpleNamespace(returncode=0, stderr='')) as mock_native: + sparse_ok = downloader._try_sparse_checkout( + self._make_sparse_dep_ref(), + self.temp_dir / 'repo-default-native', + 'packages/my-skill', + 'main', + ) + + assert sparse_ok is True + mock_native.assert_called_once() + + @patch('subprocess.Popen') + @patch('subprocess.run') + def test_try_sparse_checkout_streams_fetch_progress_within_sparse_slice(self, mock_run, mock_popen): + """Sparse checkout should stream native git fetch progress inside the fetch step.""" + downloader = GitHubPackageDownloader() + dep_ref = self._make_sparse_dep_ref() + progress_obj = Mock() + temp_clone_path = self.temp_dir / 'repo' + + mock_run.return_value = SimpleNamespace(returncode=0, stderr='') + + fetch_output = ( + 'remote: Counting objects: 100% (10/10)\r' + 'Receiving objects: 50% (5/10)\r' + 'Resolving deltas: 100% (2/2), done.\n' + ) + mock_popen.return_value = SimpleNamespace( + stderr=io.StringIO(fetch_output), + returncode=0, + wait=Mock(return_value=0), + kill=Mock(), + ) + + with patch.object(downloader, '_build_repo_url', return_value='https://github.com/owner/monorepo.git'): + sparse_ok = downloader._try_sparse_checkout( + dep_ref, + temp_clone_path, + 'packages/my-skill', + 'main', + progress_task_id=9, + progress_obj=progress_obj, + ) + + assert sparse_ok is True + assert [call.args[0] for call in mock_run.call_args_list] == [ + ['git', 'init'], + ['git', 'remote', 'add', 'origin', 'https://github.com/owner/monorepo.git'], + ['git', 'sparse-checkout', 'init', '--cone'], + ['git', 'sparse-checkout', 'set', 'packages/my-skill'], + ['git', 'checkout', 'FETCH_HEAD'], + ] + assert mock_popen.call_args.args[0] == ['git', 'fetch', '--progress', 'origin', 'main', '--depth=1'] + + actual_progress = [call.kwargs['completed'] for call in progress_obj.update.call_args_list] + assert actual_progress == [28, 36, 45, 53, 57, 61, 70] + + def test_collection_download_updates_progress_after_each_item_completion(self): + """Collection progress should advance only after each item finishes downloading.""" + downloader = GitHubPackageDownloader() + dep_ref = DependencyReference( + repo_url='owner/collection-repo', + host='github.com', + reference='main', + virtual_path='collections/team-kit', + is_virtual=True, + ) + progress_obj = Mock() + target = self.temp_dir / 'team-kit' + events = [] + + manifest = SimpleNamespace( + description='Team kit', + tags=['team'], + item_count=2, + items=[ + SimpleNamespace(path='skills/first.skill.md', subdirectory='skills'), + SimpleNamespace(path='prompts/second.prompt.md', subdirectory='prompts'), + ], + ) + + def progress_update(task_id, completed, total): + events.append(('progress', completed)) + + def download_raw_file(dep_arg, path, ref_arg): + events.append(('download', path)) + if path.endswith('.collection.yml'): + return b'collection manifest' + return path.encode('utf-8') + + progress_obj.update.side_effect = progress_update + + with patch.object(downloader, 'download_raw_file', side_effect=download_raw_file), \ + patch('apm_cli.deps.collection_parser.parse_collection_yml', return_value=manifest): + downloader.download_collection_package(dep_ref, target, progress_task_id=3, progress_obj=progress_obj) + + assert events == [ + ('progress', 10), + ('download', 'collections/team-kit.collection.yml'), + ('download', 'skills/first.skill.md'), + ('progress', 55), + ('download', 'prompts/second.prompt.md'), + ('progress', 90), + ] + if __name__ == '__main__': - pytest.main([__file__]) \ No newline at end of file + pytest.main([__file__])