Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions src/apm_cli/compilation/context_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ def _get_all_files(self) -> List[Path]:
if self._file_list_cache is None:
self._file_list_cache = []
for root, dirs, files in os.walk(self.base_dir):
# Skip hidden directories for performance
dirs[:] = [d for d in dirs if not d.startswith('.')]
# Skip hidden and excluded directories for performance
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('node_modules', '__pycache__', 'dist', 'build', 'apm_modules')]
Comment thread
danielmeppiel marked this conversation as resolved.
Outdated
for file in files:
if not file.startswith('.'):
self._file_list_cache.append(Path(root) / file)
Expand Down Expand Up @@ -423,7 +423,7 @@ def _analyze_project_structure(self) -> None:
continue

# Default hardcoded exclusions for backwards compatibility
if any(ignore in str(current_path) for ignore in ['node_modules', '__pycache__', '.git', 'dist', 'build']):
if any(ignore in str(current_path) for ignore in ['node_modules', '__pycache__', '.git', 'dist', 'build', 'apm_modules']):
continue
Comment thread
danielmeppiel marked this conversation as resolved.

# Apply configurable exclusion patterns
Expand Down Expand Up @@ -475,7 +475,7 @@ def _should_exclude_subdir(self, path: Path) -> bool:

# Also check if subdirectory is a default exclusion
dir_name = path.name
if dir_name in ['node_modules', '__pycache__', '.git', 'dist', 'build']:
if dir_name in ['node_modules', '__pycache__', '.git', 'dist', 'build', 'apm_modules']:
return True
Comment thread
danielmeppiel marked this conversation as resolved.

# Skip hidden directories
Expand Down Expand Up @@ -786,9 +786,11 @@ def _file_matches_pattern(self, file_path: Path, pattern: str) -> bool:

# Use cached glob results instead of repeated glob calls
matches = self._cached_glob(expanded_pattern)
# Convert to Path objects for comparison
match_paths = {Path(match) for match in matches}
if rel_path in match_paths:
# Use cached Set[Path] to avoid recreating on every call
cache_key = f"_set_{expanded_pattern}"
if cache_key not in self._glob_cache:
self._glob_cache[cache_key] = {Path(match) for match in matches}
if rel_path in self._glob_cache[cache_key]:
Comment thread
danielmeppiel marked this conversation as resolved.
Outdated
return True
except (ValueError, OSError):
pass
Expand Down
116 changes: 116 additions & 0 deletions tests/unit/compilation/test_context_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,122 @@ def test_single_item_brace_group(self):
result = optimizer._expand_glob_pattern("**/*.{ts}")
assert result == ["**/*.ts"]

def test_three_brace_groups(self):
"""Test expansion of three brace groups."""
optimizer = ContextOptimizer(base_dir="/tmp")
result = optimizer._expand_glob_pattern("{src,lib}/*.{test,spec}.{ts,js}")
expected = [
"src/*.test.ts", "src/*.test.js",
"src/*.spec.ts", "src/*.spec.js",
"lib/*.test.ts", "lib/*.test.js",
"lib/*.spec.ts", "lib/*.spec.js",
]
assert sorted(result) == sorted(expected)


class TestApmModulesExclusion:
"""Test that apm_modules/ is excluded from project scanning (Fixes #154)."""

@pytest.fixture
def project_with_apm_modules(self):
"""Create a project with a realistic apm_modules/ tree."""
with tempfile.TemporaryDirectory() as temp_dir:
base = Path(temp_dir)

# Real project directories
for d in ["src", "src/components", "tests", "docs"]:
(base / d).mkdir(parents=True, exist_ok=True)

# Real project files
for f in [
"src/index.ts", "src/components/App.tsx",
"tests/app.test.ts", "docs/README.md",
]:
(base / f).touch()

# Simulate apm_modules with many nested packages
for i in range(50):
pkg_dir = base / "apm_modules" / f"org/package-{i}" / "sub"
pkg_dir.mkdir(parents=True, exist_ok=True)
(pkg_dir / "SKILL.md").touch()
(pkg_dir / "instructions.md").touch()

yield base

def test_apm_modules_excluded_from_directory_cache(self, project_with_apm_modules):
"""apm_modules directories must not appear in _directory_cache."""
optimizer = ContextOptimizer(base_dir=str(project_with_apm_modules))
optimizer._analyze_project_structure()

cached_paths = set(optimizer._directory_cache.keys())
# Resolve to handle macOS /var -> /private/var symlink
resolved_base = project_with_apm_modules.resolve()

# Real dirs are cached
assert resolved_base / "src" in cached_paths
assert resolved_base / "tests" in cached_paths

# No apm_modules path is cached
apm_paths = [p for p in cached_paths if "apm_modules" in str(p)]
assert apm_paths == [], f"apm_modules leaked into cache: {apm_paths}"

def test_cache_size_unaffected_by_apm_modules(self, project_with_apm_modules):
"""Directory cache size should reflect only project dirs, not apm_modules."""
optimizer = ContextOptimizer(base_dir=str(project_with_apm_modules))
optimizer._analyze_project_structure()

# 50 packages × sub-directory each = 150+ dirs if not excluded.
# Real project has at most ~5 dirs (root, src, src/components, tests, docs).
assert len(optimizer._directory_cache) <= 10, (
f"Cache has {len(optimizer._directory_cache)} dirs — "
f"apm_modules likely not excluded"
)

def test_os_walk_prunes_apm_modules(self, project_with_apm_modules):
"""os.walk must not descend into apm_modules subdirectories."""
optimizer = ContextOptimizer(base_dir=str(project_with_apm_modules))
optimizer._analyze_project_structure()

# _should_exclude_subdir must flag apm_modules
apm_modules_path = project_with_apm_modules / "apm_modules"
assert optimizer._should_exclude_subdir(apm_modules_path) is True

def test_find_matching_dirs_ignores_apm_modules(self, project_with_apm_modules):
"""Pattern matching must not return directories inside apm_modules."""
optimizer = ContextOptimizer(base_dir=str(project_with_apm_modules))
optimizer._analyze_project_structure()

matching = optimizer._find_matching_directories("**/*.md")
apm_matches = [p for p in matching if "apm_modules" in str(p)]
assert apm_matches == [], f"apm_modules dirs matched: {apm_matches}"


class TestGlobCacheReuse:
"""Test that glob results are cached and Set[Path] conversion is not repeated."""

def test_set_path_cached_across_calls(self):
"""_file_matches_pattern must cache the Set[Path] conversion."""
with tempfile.TemporaryDirectory() as temp_dir:
base = Path(temp_dir)
(base / "src").mkdir()
(base / "src" / "a.ts").touch()
(base / "src" / "b.ts").touch()

optimizer = ContextOptimizer(base_dir=str(base))
optimizer._analyze_project_structure()

file_a = base / "src" / "a.ts"
file_b = base / "src" / "b.ts"

# First call populates cache
optimizer._file_matches_pattern(file_a, "**/*.ts")
assert "_set_**/*.ts" in optimizer._glob_cache

# Second call should reuse the cached set (not recreate it)
cached_set_id = id(optimizer._glob_cache["_set_**/*.ts"])
optimizer._file_matches_pattern(file_b, "**/*.ts")
assert id(optimizer._glob_cache["_set_**/*.ts"]) == cached_set_id


if __name__ == "__main__":
pytest.main([__file__])