Skip to content

Commit f7eeb12

Browse files
committed
Bump version to 0.3.0 and add pre-commit configuration; refactor RepoConverter and enhance ignore logic
1 parent 7260e57 commit f7eeb12

File tree

12 files changed

+389
-78
lines changed

12 files changed

+389
-78
lines changed

.pre-commit-config.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
repos:
2+
- repo: https://github.com/pre-commit/pre-commit-hooks
3+
rev: v4.4.0
4+
hooks:
5+
- id: end-of-file-fixer
6+
- repo: https://github.com/astral-sh/ruff-pre-commit
7+
# Ruff version.
8+
rev: v0.6.2
9+
hooks:
10+
# Run the linter.
11+
- id: ruff
12+
args: [ --fix ]
13+
# Run the formatter.
14+
- id: ruff-format

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "repo-context"
3-
version = "0.2.0"
3+
version = "0.3.0"
44
description = "Convert Git repositories into LLM-friendly context format"
55
authors = [{ name = "Mathias Nielsen", email = "[email protected]" }]
66
maintainers = [{ name = "Mathias Nielsen", email = "[email protected]" }]
@@ -26,4 +26,4 @@ repo-context = "repo_context.cli:main"
2626

2727
[build-system]
2828
requires = ["hatchling"]
29-
build-backend = "hatchling.build"
29+
build-backend = "hatchling.build"

repo_context/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
handlers=[RichHandler(console=console, rich_tracebacks=True)],
1515
)
1616

17-
from repo_context.repo_converter import RepoConverter # noqa: E402
17+
from repo_context.converter import RepoConverter # noqa: E402
18+
from repo_context.structure import RepoStructure # noqa: E402
1819

19-
__all__ = ["RepoConverter"]
20+
__all__ = ["RepoConverter", "RepoStructure"]

repo_context/cli.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from pathlib import Path
44
from urllib.parse import urlparse
55

6-
from repo_context.repo_converter import RepoConverter
6+
from repo_context.converter import RepoConverter
77

88
logger = logging.getLogger("repo_context.cli")
99

@@ -60,18 +60,27 @@ def main():
6060
converter = RepoConverter(ignore_patterns=ignore_patterns)
6161

6262
try:
63+
# Clone or use local repository
6364
if urlparse(args.source).scheme:
6465
logger.info(f"Cloning repository from {args.source}")
6566
repo_path, _ = converter.clone_repo(args.source)
6667
else:
6768
repo_path = Path(args.source)
6869

70+
# Convert repository to context
6971
context = converter.convert(repo_path, max_file_lines=args.max_file_lines)
72+
fname = repo_path.stem
7073

71-
for i, c in enumerate(context):
72-
output_path = Path(f"{args.output}/context_{i}.md")
73-
output_path.write_text(c)
74+
# Write context to files
75+
if len(context) == 1:
76+
output_path = Path(f"{args.output}/{fname}.md")
77+
output_path.write_text(context[0])
7478
logger.info(f"Context written to {output_path}")
79+
else:
80+
for i, c in enumerate(context):
81+
output_path = Path(f"{args.output}/{fname}_{i}.md")
82+
output_path.write_text(c)
83+
logger.info(f"Context written to {output_path}")
7584

7685
except Exception as e:
7786
logger.error(f"Error: {e}")

repo_context/repo_converter.py renamed to repo_context/converter.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from repo_context.ignore import EXTENSIONS, FILES, PATTERNS
1111
from repo_context.utils import should_ignore
12+
from repo_context.structure import RepoStructure
1213

1314
logger = logging.getLogger("repo_context.repo_converter")
1415

@@ -20,11 +21,25 @@ def __init__(
2021
max_file_size: int = 1_000_000,
2122
max_workers: int | None = None,
2223
) -> None:
24+
"""
25+
Initialize the converter with specified parameters.
26+
27+
Args:
28+
ignore_patterns (list[str] | None, optional): A list of patterns to ignore. Defaults to None.
29+
max_file_size (int, optional): The maximum file size to process in bytes. Defaults to 1,000,000.
30+
max_workers (int | None, optional): The maximum number of worker threads to use. Defaults to the number of CPU cores.
31+
32+
Attributes:
33+
ignore_patterns (list[str]): The list of patterns to ignore.
34+
max_file_size (int): The maximum file size to process in bytes.
35+
max_workers (int): The maximum number of worker threads to use.
36+
structure (RepoStructure): The repository structure initialized with the ignore patterns.
37+
"""
2338
self.ignore_patterns = ignore_patterns or []
2439
self.max_file_size = max_file_size
2540
self.max_workers = max_workers or cpu_count()
26-
2741
self.ignore_patterns += FILES + EXTENSIONS + PATTERNS
42+
self.structure = RepoStructure(ignore_patterns=self.ignore_patterns)
2843

2944
def clone_repo(self, url: str) -> Path:
3045
"""Clone a repository from URL to temporary directory.
@@ -98,14 +113,22 @@ def convert(self, repo_path: Path, max_file_lines: int | None = None) -> list[st
98113
if not repo_path.exists():
99114
raise FileNotFoundError(f"Repository path {repo_path} does not exist")
100115

116+
context = []
117+
118+
# Get structure of the repository
119+
tree_structure = self.structure.create_tree_structure(repo_path)
120+
if tree_structure:
121+
context.append(tree_structure)
122+
123+
# Get all files in the repository
101124
with logging_redirect_tqdm():
102125
file_paths = [
103126
(str(p), str(repo_path))
104127
for p in tqdm(repo_path.rglob("*"), ncols=120)
105128
if self._is_valid_file(p)
106129
]
107130

108-
context = []
131+
# Process files in parallel
109132
with Pool(self.max_workers) as pool:
110133
with logging_redirect_tqdm():
111134
with tqdm(

repo_context/ignore.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
"uv.lock",
99
"poetry.lock",
1010
".dockerignore",
11+
".coverage",
12+
".pre-commit-config.yaml",
1113
]
1214

1315
EXTENSIONS = [
@@ -27,6 +29,11 @@
2729
"*.pyo",
2830
"*.pyd",
2931
".DS_Store",
32+
"*.zip",
33+
"*.far",
34+
"*.fst",
35+
"*.tsv",
36+
"*.csv",
3037
]
3138

3239
PATTERNS = [
@@ -44,4 +51,5 @@
4451
"publish",
4552
"tests",
4653
"test",
54+
".ruff_cache",
4755
]

repo_context/structure.py

Lines changed: 18 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import logging
22
from pathlib import Path
33

4-
from repo_context.ignore import EXTENSIONS, FILES, PATTERNS
54
from repo_context.utils import should_ignore
65

76
logger = logging.getLogger("repo_context.structure")
@@ -10,30 +9,25 @@
109
class RepoStructure:
1110
def __init__(self, ignore_patterns: list[str] | None = None) -> None:
1211
self.ignore_patterns = ignore_patterns or []
13-
self.ignore_patterns += FILES + EXTENSIONS + PATTERNS
1412

1513
def generate_tree(
1614
self,
1715
directory: Path,
1816
prefix: str = "",
1917
is_last: bool = True,
20-
ignore_patterns: list[str] | None = None,
2118
) -> list[str]:
2219
"""
2320
Recursively generate a tree structure of the directory.
2421
2522
Args:
26-
directory: Path object pointing to the directory
27-
prefix: Prefix for the current line (used for recursion)
28-
is_last: Boolean indicating if this is the last item in current directory
29-
ignore_patterns: List of patterns to ignore
23+
directory (Path): Path object pointing to the directory
24+
prefix (str): Prefix for the current line (used for recursion). default: ""
25+
is_last (bool): Boolean indicating if this is the last item in current directory. default: True
26+
ignore_patterns (list[str] | None): List of patterns to ignore. default: None
3027
3128
Returns:
32-
List[str]: Lines of the tree structure
29+
list[str]: Lines of the tree structure
3330
"""
34-
if ignore_patterns is None:
35-
ignore_patterns = []
36-
3731
if not directory.is_dir():
3832
logger.error(f"'{directory}' is not a valid directory")
3933
return []
@@ -42,65 +36,47 @@ def generate_tree(
4236
items = [
4337
item
4438
for item in sorted(directory.iterdir())
45-
if not should_ignore(item.name, ignore_patterns)
39+
if not should_ignore(item.name, self.ignore_patterns)
4640
]
4741

4842
for i, item in enumerate(items):
4943
is_last_item = i == len(items) - 1
50-
connector = "??? " if is_last_item else "??? "
44+
connector = "└── " if is_last_item else "├── "
5145

5246
tree_lines.append(f"{prefix}{connector}{item.name}")
5347

5448
if item.is_dir():
55-
extension = " " if is_last_item else "? "
49+
extension = " " if is_last_item else " "
5650
tree_lines.extend(
5751
self.generate_tree(
5852
item,
5953
prefix + extension,
6054
is_last_item,
61-
ignore_patterns,
6255
)
6356
)
6457

6558
return tree_lines
6659

67-
def create_tree_structure(
68-
self,
69-
path: str,
70-
output_file: str | None = None,
71-
ignore_patterns: list[str] | None = None,
72-
) -> None:
60+
def create_tree_structure(self, path: str) -> str:
7361
"""
7462
Create and display/save a tree structure of the specified directory.
7563
7664
Args:
7765
path: Path to the directory
78-
output_file: Optional file path to save the tree structure
79-
ignore_patterns: List of patterns to ignore
66+
67+
Returns:
68+
str: The tree structure
8069
"""
8170
directory = Path(path)
8271
if not directory.exists():
83-
logger.error(f"Directory '{path}' does not exist")
84-
return
72+
raise FileNotFoundError(f"Directory '{path}' does not exist")
8573

8674
logger.info(f"Generating tree structure for: {directory.absolute()}")
8775

88-
tree_lines = ["Directory Structure:", directory.name]
89-
tree_lines.extend(
90-
self.generate_tree(directory, ignore_patterns=ignore_patterns or [])
91-
)
76+
tree_lines = ["# Directory Structure", directory.name]
77+
tree_lines.extend(self.generate_tree(directory))
9278

9379
# Join lines with newlines
94-
tree_structure = "\n".join(tree_lines)
95-
96-
# Print to console
97-
logger.info(tree_structure)
98-
99-
# Save to file if specified
100-
if output_file:
101-
output_path = Path(output_file)
102-
try:
103-
output_path.write_text(tree_structure)
104-
logger.info(f"Tree structure saved to: {output_path.absolute()}")
105-
except Exception as e:
106-
logger.error(f"Failed to save tree structure: {e}")
80+
tree_structure = "\n".join(tree_lines) + "\n"
81+
82+
return tree_structure

repo_context/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ def should_ignore(path: Path, ignore_patterns: list[str]) -> bool:
3131
Returns:
3232
True if path should be ignored
3333
"""
34+
if not isinstance(path, Path):
35+
path = Path(path)
36+
3437
fname = path.name
3538
path_str = str(path)
3639
relative_path = get_relative_path(path)

tests/test_repo_converter.py renamed to tests/test_converter.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -46,29 +46,6 @@ def test_clone_repo_invalid_url(converter):
4646
converter.clone_repo("invalid_url")
4747

4848

49-
def test_should_ignore():
50-
converter = RepoConverter()
51-
52-
assert converter.should_ignore(Path(".gitignore"))
53-
assert converter.should_ignore(Path("some/path/.gitignore"))
54-
55-
assert converter.should_ignore(Path("image.png"))
56-
assert converter.should_ignore(Path("deep/path/image.png"))
57-
58-
assert converter.should_ignore(Path(".git/config"))
59-
assert converter.should_ignore(Path("some/path/.git/config"))
60-
61-
assert not converter.should_ignore(Path("regular.txt"))
62-
assert not converter.should_ignore(Path("src/main.py"))
63-
64-
65-
def test_should_ignore_with_ignore_patterns():
66-
converter = RepoConverter(ignore_patterns=["*.pyc", "test/*"])
67-
assert converter.should_ignore(Path("file.pyc"))
68-
assert converter.should_ignore(Path("test/file.py"))
69-
assert not converter.should_ignore(Path("src/file.py"))
70-
71-
7249
def test_is_valid_file(converter, temp_repo):
7350
assert converter._is_valid_file(temp_repo / "file.txt")
7451
assert not converter._is_valid_file(temp_repo / "large.txt")
@@ -88,8 +65,6 @@ def test_convert(converter, temp_repo):
8865
result = converter.convert(temp_repo)[0]
8966
assert "file.txt" in result
9067
assert "test content" in result
91-
assert "empty.txt" not in result
92-
assert "large.txt" not in result
9368
assert "test.ignored" not in result
9469

9570

0 commit comments

Comments
 (0)