55from pathlib import Path
66from typing import Dict , List , Optional , Set , Tuple , Union
77
8- from markitdown import (FileConversionException , MarkItDown ,
9- UnsupportedFormatException )
8+ from markitdown import FileConversionException , MarkItDown , UnsupportedFormatException
109
11- from .config import (DEFAULT_EXCLUDE_DIRS , DEFAULT_EXCLUDE_FILES ,
12- DEFAULT_INCLUDE_EXTENSIONS , MARKITDOWN_EXTENSIONS , ReadConfig )
10+ from .config import (
11+ DEFAULT_EXCLUDE_DIRS ,
12+ DEFAULT_EXCLUDE_FILES ,
13+ DEFAULT_INCLUDE_EXTENSIONS ,
14+ MARKITDOWN_EXTENSIONS ,
15+ ReadConfig ,
16+ )
1317
1418
1519def is_git_url (url : str ) -> bool :
@@ -99,7 +103,9 @@ def should_process_file(self, file_path: Union) -> bool:
99103 parts = file_path .parts
100104 for excluded_dir in self .config .exclude_dirs :
101105 if excluded_dir in parts :
102- self .log_debug (f"Excluding { file_path } due to being in excluded directory { excluded_dir } " )
106+ self .log_debug (
107+ f"Excluding { file_path } due to being in excluded directory { excluded_dir } "
108+ )
103109 return False
104110
105111 # Check exclude patterns - handle macOS @ suffix
@@ -120,24 +126,24 @@ def should_process_file(self, file_path: Union) -> bool:
120126 except FileNotFoundError :
121127 return False
122128
123- if self . config . use_markitdown :
124- # If markitdown is active and extensions were specified, use only those
125- if self .config .markitdown_extensions :
126- if file_ext in self .config .markitdown_extensions :
127- self . log_debug ( f"Including { file_path } for markitdown processing" )
128- return True
129- self . log_debug (
130- f"Extension { file_ext } not in markitdown extensions: { self . config . markitdown_extensions } "
131- )
129+ should_use_markitdown = (
130+ self . config . use_markitdown
131+ and self .config .markitdown_extensions
132+ and file_ext in self .config .markitdown_extensions
133+ )
134+
135+ if should_use_markitdown :
136+ self . log_debug ( f"Including { file_path } for markitdown processing" )
137+ return True
132138
133- # If markitdown is not used or the file is not compatible with markitdown,
134- # check if it is in the included extensions
139+ # If not using markitdown or file isn't compatible with markitdown,
140+ # check if it's in the included extensions
135141 if file_ext not in self .config .include_extensions :
136142 self .log_debug (f"Extension { file_ext } not in supported extensions" )
137143 return False
138144
139145 # Check if binary only for non-markitdown files
140- if not ( self . config . use_markitdown and self . config . markitdown_extensions and file_ext in self . config . markitdown_extensions ) :
146+ if not should_use_markitdown :
141147 is_bin = self .is_binary (file_path )
142148 if is_bin :
143149 self .log_debug (f"Excluding { file_path } because it's binary" )
0 commit comments