From d052dd121f92dad56b3c7123b596fb56c42673fc Mon Sep 17 00:00:00 2001 From: ravi_kumar_pilla Date: Tue, 11 Feb 2025 22:00:37 -0600 Subject: [PATCH] ignore files which are hidden and in gitignore Signed-off-by: ravi_kumar_pilla --- package/kedro_viz/autoreload_file_filter.py | 13 +++------- .../integrations/kedro/lite_parser.py | 10 ++++++- package/kedro_viz/utils.py | 26 ++++++++++++++++++- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/package/kedro_viz/autoreload_file_filter.py b/package/kedro_viz/autoreload_file_filter.py index f8b13c6237..628fb931b7 100644 --- a/package/kedro_viz/autoreload_file_filter.py +++ b/package/kedro_viz/autoreload_file_filter.py @@ -7,6 +7,7 @@ from pathlib import Path from typing import Optional, Set +from kedro_viz.utils import load_gitignore_patterns from pathspec import GitIgnoreSpec from watchfiles import Change, DefaultFilter @@ -35,16 +36,8 @@ def __init__(self, base_path: Optional[Path] = None): super().__init__() # Load .gitignore patterns - gitignore_path = self.cwd / ".gitignore" - try: - with open(gitignore_path, "r", encoding="utf-8") as gitignore_file: - ignore_patterns = gitignore_file.read().splitlines() - self.gitignore_spec: Optional[GitIgnoreSpec] = GitIgnoreSpec.from_lines( - "gitwildmatch", ignore_patterns - ) - except FileNotFoundError: - self.gitignore_spec = None - + self.gitignore_spec = load_gitignore_patterns(self.cwd) + def __call__(self, change: Change, path: str) -> bool: """ Determine whether a file change should be processed. diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index f6a7dbeb8d..a1299dda68 100755 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -8,6 +8,7 @@ from unittest.mock import MagicMock from kedro_viz.integrations.utils import Spinner +from kedro_viz.utils import is_file_ignored, load_gitignore_patterns logger = logging.getLogger(__name__) @@ -243,7 +244,14 @@ def parse(self, target_path: Path) -> Union[Dict[str, Set[str]], None]: unresolved_imports: Dict[str, Set[str]] = {} + # Load .gitignore patterns + gitignore_spec = load_gitignore_patterns(target_path) + if target_path.is_file(): + + if is_file_ignored(target_path): + return unresolved_imports + try: missing_dependencies = self._get_unresolved_imports(target_path) if len(missing_dependencies) > 0: @@ -258,7 +266,7 @@ def parse(self, target_path: Path) -> Union[Dict[str, Set[str]], None]: return unresolved_imports # handling directories - _project_file_paths = set(target_path.rglob("*.py")) + _project_file_paths = set(file_path for file_path in target_path.rglob("*.py") if not is_file_ignored(file_path, target_path, gitignore_spec)) for file_path in _project_file_paths: try: diff --git a/package/kedro_viz/utils.py b/package/kedro_viz/utils.py index a0a4a5abce..4033886031 100644 --- a/package/kedro_viz/utils.py +++ b/package/kedro_viz/utils.py @@ -1,7 +1,10 @@ """Transcoding related utility functions.""" import hashlib -from typing import Tuple +from pathlib import Path +from typing import Optional, Tuple + +from pathspec import GitIgnoreSpec TRANSCODING_SEPARATOR = "@" @@ -57,3 +60,24 @@ def _strip_transcoding(element: str) -> str: def is_dataset_param(dataset_name: str) -> bool: """Return whether a dataset is a parameter""" return dataset_name.lower().startswith("params:") or dataset_name == "parameters" + +def load_gitignore_patterns(project_path: Path) -> Optional[GitIgnoreSpec]: + gitignore_path = project_path / ".gitignore" + + if not gitignore_path.exists(): + return + + with open(gitignore_path, "r", encoding="utf-8") as gitignore_file: + ignore_patterns = gitignore_file.read().splitlines() + gitignore_spec = GitIgnoreSpec.from_lines( + "gitwildmatch", ignore_patterns + ) + return gitignore_spec + +def is_file_ignored(file_path: Path, project_path: Optional[Path], gitignore_spec: Optional[GitIgnoreSpec]) -> bool: + """Returns True if the file should be ignored.""" + if file_path.name.startswith("."): # Ignore hidden files/folders + return True + if gitignore_spec and project_path and gitignore_spec.match_file(str(file_path.relative_to(project_path))): + return True + return False