Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/cli-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,7 @@ Use the `exclude` field to skip directories during compilation. This improves pe
- `projects/**/apm/**` - Complex nested matching with `**`

**Default exclusions** (always applied):
- `node_modules`, `__pycache__`, `.git`, `dist`, `build`
- `node_modules`, `__pycache__`, `.git`, `dist`, `build`, `apm_modules`
- Hidden directories (starting with `.`)

Command-line options always override `apm.yml` settings. Priority order:
Expand Down
1 change: 1 addition & 0 deletions docs/compilation.md
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ APM always excludes these directories (no configuration needed):
- `.git`
- `dist`
- `build`
- `apm_modules`
- Hidden directories (starting with `.`)

## Advanced Optimization Features
Expand Down
22 changes: 15 additions & 7 deletions src/apm_cli/compilation/context_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@
list = builtins.list
dict = builtins.dict

# Default directory names excluded from compilation scanning.
# Shared across _analyze_project_structure, _should_exclude_subdir, and _get_all_files.
DEFAULT_EXCLUDED_DIRNAMES = frozenset({
'node_modules', '__pycache__', '.git', 'dist', 'build', 'apm_modules',
})


@dataclass
class DirectoryAnalysis:
Expand Down Expand Up @@ -113,6 +119,7 @@ def __init__(self, base_dir: str = ".", exclude_patterns: Optional[List[str]] =

# Performance optimization caches
self._glob_cache: Dict[str, List[str]] = {}
self._glob_set_cache: Dict[str, Set[Path]] = {}
self._file_list_cache: Optional[List[Path]] = None
self._timing_enabled = False
self._phase_timings: Dict[str, float] = {}
Expand Down Expand Up @@ -162,8 +169,8 @@ def _get_all_files(self) -> List[Path]:
if self._file_list_cache is None:
self._file_list_cache = []
for root, dirs, files in os.walk(self.base_dir):
# Skip hidden directories for performance
dirs[:] = [d for d in dirs if not d.startswith('.')]
# Skip hidden and excluded directories for performance
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in DEFAULT_EXCLUDED_DIRNAMES]
for file in files:
if not file.startswith('.'):
self._file_list_cache.append(Path(root) / file)
Expand Down Expand Up @@ -423,7 +430,7 @@ def _analyze_project_structure(self) -> None:
continue

# Default hardcoded exclusions for backwards compatibility
if any(ignore in str(current_path) for ignore in ['node_modules', '__pycache__', '.git', 'dist', 'build']):
if any(ignore in str(current_path) for ignore in DEFAULT_EXCLUDED_DIRNAMES):
continue
Comment thread
danielmeppiel marked this conversation as resolved.

# Apply configurable exclusion patterns
Expand Down Expand Up @@ -475,7 +482,7 @@ def _should_exclude_subdir(self, path: Path) -> bool:

# Also check if subdirectory is a default exclusion
dir_name = path.name
if dir_name in ['node_modules', '__pycache__', '.git', 'dist', 'build']:
if dir_name in DEFAULT_EXCLUDED_DIRNAMES:
return True
Comment thread
danielmeppiel marked this conversation as resolved.

# Skip hidden directories
Expand Down Expand Up @@ -786,9 +793,10 @@ def _file_matches_pattern(self, file_path: Path, pattern: str) -> bool:

# Use cached glob results instead of repeated glob calls
matches = self._cached_glob(expanded_pattern)
# Convert to Path objects for comparison
match_paths = {Path(match) for match in matches}
if rel_path in match_paths:
# Use cached Set[Path] to avoid recreating on every call
if expanded_pattern not in self._glob_set_cache:
self._glob_set_cache[expanded_pattern] = {Path(match) for match in matches}
if rel_path in self._glob_set_cache[expanded_pattern]:
return True
except (ValueError, OSError):
pass
Expand Down
116 changes: 116 additions & 0 deletions tests/unit/compilation/test_context_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,122 @@ def test_single_item_brace_group(self):
result = optimizer._expand_glob_pattern("**/*.{ts}")
assert result == ["**/*.ts"]

def test_three_brace_groups(self):
"""Test expansion of three brace groups."""
optimizer = ContextOptimizer(base_dir="/tmp")
result = optimizer._expand_glob_pattern("{src,lib}/*.{test,spec}.{ts,js}")
expected = [
"src/*.test.ts", "src/*.test.js",
"src/*.spec.ts", "src/*.spec.js",
"lib/*.test.ts", "lib/*.test.js",
"lib/*.spec.ts", "lib/*.spec.js",
]
assert sorted(result) == sorted(expected)


class TestApmModulesExclusion:
"""Test that apm_modules/ is excluded from project scanning (Fixes #154)."""

@pytest.fixture
def project_with_apm_modules(self):
"""Create a project with a realistic apm_modules/ tree."""
with tempfile.TemporaryDirectory() as temp_dir:
base = Path(temp_dir)

# Real project directories
for d in ["src", "src/components", "tests", "docs"]:
(base / d).mkdir(parents=True, exist_ok=True)

# Real project files
for f in [
"src/index.ts", "src/components/App.tsx",
"tests/app.test.ts", "docs/README.md",
]:
(base / f).touch()

# Simulate apm_modules with many nested packages
for i in range(50):
pkg_dir = base / "apm_modules" / f"org/package-{i}" / "sub"
pkg_dir.mkdir(parents=True, exist_ok=True)
(pkg_dir / "SKILL.md").touch()
(pkg_dir / "instructions.md").touch()

yield base

def test_apm_modules_excluded_from_directory_cache(self, project_with_apm_modules):
"""apm_modules directories must not appear in _directory_cache."""
optimizer = ContextOptimizer(base_dir=str(project_with_apm_modules))
optimizer._analyze_project_structure()

cached_paths = set(optimizer._directory_cache.keys())
# Resolve to handle macOS /var -> /private/var symlink
resolved_base = project_with_apm_modules.resolve()

# Real dirs are cached
assert resolved_base / "src" in cached_paths
assert resolved_base / "tests" in cached_paths

# No apm_modules path is cached
apm_paths = [p for p in cached_paths if "apm_modules" in str(p)]
assert apm_paths == [], f"apm_modules leaked into cache: {apm_paths}"

def test_cache_size_unaffected_by_apm_modules(self, project_with_apm_modules):
"""Directory cache size should reflect only project dirs, not apm_modules."""
optimizer = ContextOptimizer(base_dir=str(project_with_apm_modules))
optimizer._analyze_project_structure()

# 50 packages × sub-directory each = 150+ dirs if not excluded.
# Real project has at most ~5 dirs (root, src, src/components, tests, docs).
assert len(optimizer._directory_cache) <= 10, (
f"Cache has {len(optimizer._directory_cache)} dirs — "
f"apm_modules likely not excluded"
)

def test_os_walk_prunes_apm_modules(self, project_with_apm_modules):
"""os.walk must not descend into apm_modules subdirectories."""
optimizer = ContextOptimizer(base_dir=str(project_with_apm_modules))
optimizer._analyze_project_structure()

# _should_exclude_subdir must flag apm_modules
apm_modules_path = project_with_apm_modules / "apm_modules"
assert optimizer._should_exclude_subdir(apm_modules_path) is True

def test_find_matching_dirs_ignores_apm_modules(self, project_with_apm_modules):
"""Pattern matching must not return directories inside apm_modules."""
optimizer = ContextOptimizer(base_dir=str(project_with_apm_modules))
optimizer._analyze_project_structure()

matching = optimizer._find_matching_directories("**/*.md")
apm_matches = [p for p in matching if "apm_modules" in str(p)]
assert apm_matches == [], f"apm_modules dirs matched: {apm_matches}"


class TestGlobCacheReuse:
"""Test that glob results are cached and Set[Path] conversion is not repeated."""

def test_set_path_cached_across_calls(self):
"""_file_matches_pattern must cache the Set[Path] conversion."""
with tempfile.TemporaryDirectory() as temp_dir:
base = Path(temp_dir)
(base / "src").mkdir()
(base / "src" / "a.ts").touch()
(base / "src" / "b.ts").touch()

optimizer = ContextOptimizer(base_dir=str(base))
optimizer._analyze_project_structure()

file_a = base / "src" / "a.ts"
file_b = base / "src" / "b.ts"

# First call populates cache
optimizer._file_matches_pattern(file_a, "**/*.ts")
assert "**/*.ts" in optimizer._glob_set_cache

# Second call should reuse the cached set (not recreate it)
cached_set_id = id(optimizer._glob_set_cache["**/*.ts"])
optimizer._file_matches_pattern(file_b, "**/*.ts")
assert id(optimizer._glob_set_cache["**/*.ts"]) == cached_set_id


if __name__ == "__main__":
pytest.main([__file__])