Skip to content

Commit 71fe81f

Browse files
committed
Added much more sophisticated path exclusions
1 parent 71e05b6 commit 71fe81f

File tree

1 file changed

+68
-6
lines changed

1 file changed

+68
-6
lines changed

src/coderoller/source_repo_flattener.py

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import pathspec
23

34
# Dictionary mapping file extensions to their corresponding long form names
45
FILE_TYPES = {
@@ -32,6 +33,54 @@
3233
}
3334

3435

36+
def load_gitignore_patterns(root_folder: str) -> list[str]:
37+
"""
38+
Load .gitignore patterns from the root folder.
39+
40+
Args:
41+
root_folder (str): The root folder of the repository.
42+
43+
Returns:
44+
list[str]: A list of patterns from the .gitignore file.
45+
"""
46+
gitignore_path = os.path.join(root_folder, ".gitignore")
47+
if os.path.exists(gitignore_path):
48+
with open(gitignore_path, "r") as f:
49+
patterns = f.read().splitlines()
50+
return patterns
51+
return []
52+
53+
54+
def should_include_path(file_path: str, spec: pathspec.PathSpec) -> bool:
55+
"""
56+
Determine if a path should be included based on .gitignore patterns and specific exclusions.
57+
58+
Args:
59+
file_path (str): The path of the file or directory to check.
60+
spec (pathspec.PathSpec): The PathSpec object containing the .gitignore patterns.
61+
62+
Returns:
63+
bool: True if the path should be included, False otherwise.
64+
"""
65+
# Specific exclusions
66+
specific_exclusions = [
67+
"build",
68+
"dist",
69+
"node_modules",
70+
"__pycache__",
71+
".flat.md",
72+
".lock",
73+
".hidden",
74+
]
75+
76+
# Check if the file or directory matches specific exclusions
77+
if any(exclusion in file_path for exclusion in specific_exclusions):
78+
return False
79+
80+
# Check against .gitignore patterns
81+
return not spec.match_file(file_path)
82+
83+
3584
def find_readme(root_folder: str) -> str:
3685
"""
3786
Find a README file in the root folder with any common README extension.
@@ -49,8 +98,8 @@ def find_readme(root_folder: str) -> str:
4998

5099

51100
def flatten_repo(
52-
root_folder: str, output_folder: str | None = None, repo_name: str | None = None
53-
):
101+
root_folder: str, output_folder: str = None, repo_name: str = None
102+
) -> None:
54103
"""
55104
Flatten the source repository into a single markdown file.
56105
@@ -80,14 +129,27 @@ def flatten_repo(
80129
flat_file.write("\n```\n\n")
81130
print(f"Included README file: {readme_path}")
82131

132+
# Collect patterns from .gitignore
133+
gitignore_patterns = load_gitignore_patterns(root_folder)
134+
spec = pathspec.PathSpec.from_lines(
135+
pathspec.patterns.GitWildMatchPattern, gitignore_patterns
136+
)
137+
83138
# Recursively walk the repo and collect relevant files
84139
for dirpath, dirnames, filenames in os.walk(root_folder):
85-
# Exclude hidden directories
86-
dirnames[:] = [d for d in dirnames if not d.startswith(".")]
140+
# Exclude directories and files matching .gitignore patterns and specific exclusions
141+
dirnames[:] = [
142+
d
143+
for d in dirnames
144+
if should_include_path(os.path.join(dirpath, d), spec)
145+
]
146+
filenames[:] = [
147+
f
148+
for f in filenames
149+
if should_include_path(os.path.join(dirpath, f), spec)
150+
]
87151

88152
for filename in filenames:
89-
if filename.startswith("."):
90-
continue # Exclude hidden files
91153
extension = os.path.splitext(filename)[1]
92154
full_path = os.path.join(dirpath, filename)
93155
if extension in FILE_TYPES and full_path != readme_path:

0 commit comments

Comments
 (0)