@@ -69,7 +69,7 @@ def write_from_db(file_path: str, grid_file_id: str) -> None:
6969 return True
7070
7171 @staticmethod
72- def store_binary_files (binary_executable_path : str , state : State ) -> bool :
72+ def store_binary_files (binary_executable_path : str , state : State ):
7373 """
7474 Stores the files in the folder of the given path in the database.
7575
@@ -80,35 +80,60 @@ def store_binary_files(binary_executable_path: str, state: State) -> bool:
8080 if MongoDB ().binary_cache_limit <= 0 :
8181 return False
8282
83- while BinaryCache .__count_cached_binaries () >= MongoDB .binary_cache_limit :
83+ while BinaryCache .__count_cached_binaries () >= MongoDB () .binary_cache_limit :
8484 if BinaryCache .__count_cached_binaries (state_type = 'revision' ) <= 0 :
8585 # There are only version binaries in the cache, which will never be removed
8686 return False
8787 BinaryCache .__remove_least_used_revision_binary_files ()
8888
89+ logger .debug (f"Caching binary files for { state } ..." )
8990 fs = MongoDB ().gridfs
91+
9092 binary_folder_path = os .path .dirname (binary_executable_path )
93+ last_access_ts = datetime .datetime .now ()
94+ def store_file (file_path : str ) -> None :
95+ # Max chunk size is 16 MB (meta-data included)
96+ chunk_size = 1024 * 1024 * 15
97+ with open (file_path , 'rb' ) as file :
98+ file_id = fs .new_file (
99+ file_type = 'binary' ,
100+ browser_name = state .browser_name ,
101+ state_type = state .type ,
102+ state_index = state .index ,
103+ relative_file_path = os .path .relpath (file_path , binary_folder_path ),
104+ access_count = 0 ,
105+ last_access_ts = last_access_ts ,
106+ chunk_size = chunk_size
107+ )
108+ while chunk := file .read (chunk_size ):
109+ file_id .write (chunk )
110+ file_id .close ()
111+
91112 start_time = time .time ()
92- with concurrent .futures .ThreadPoolExecutor (max_workers = 4 ) as executor :
113+ with concurrent .futures .ThreadPoolExecutor (max_workers = 2 ) as executor :
114+ futures = []
93115 for root , _ , files in os .walk (binary_folder_path ):
94116 for file in files :
95117 file_path = os .path .join (root , file )
96- with open (file_path , 'rb' ) as file :
97- executor .submit (
98- fs .put ,
99- file .read (),
100- file_type = 'binary' ,
101- browser_name = state .browser_name ,
102- state_type = state .type ,
103- state_index = state .index ,
104- relative_file_path = os .path .relpath (file_path , binary_folder_path ),
105- access_count = 0 ,
106- last_access_ts = datetime .datetime .now (),
107- )
118+ future = executor .submit (store_file , file_path )
119+ futures .append (future )
120+ logger .debug (f"Number of files to cache: { len (futures )} " )
108121 executor .shutdown (wait = True )
109- elapsed_time = time .time () - start_time
110- logger .debug (f'Stored binary in { elapsed_time :.2f} s' )
111- return True
122+
123+ futures_with_exception = [future for future in futures if future .exception () is not None ]
124+ if futures_with_exception :
125+ logger .error (
126+ (
127+ f"Something went wrong caching binary files for { state } , "
128+ "Removing possibly imcomplete binary files from cache."
129+ ),
130+ exc_info = futures_with_exception [0 ].exception ()
131+ )
132+ BinaryCache .__remove_revision_binary_files (state .type , state .index )
133+ logger .debug (f"Removed possibly incomplete cached binary files for { state } ." )
134+ else :
135+ elapsed_time = time .time () - start_time
136+ logger .debug (f'Stored binary in { elapsed_time :.2f} s' )
112137
113138 @staticmethod
114139 def __count_cached_binaries (state_type : Optional [str ] = None ) -> int :
@@ -130,7 +155,6 @@ def __remove_least_used_revision_binary_files() -> None:
130155 """
131156 Removes the least used revision binary files from the database.
132157 """
133- fs = MongoDB ().gridfs
134158 files_collection = MongoDB ().get_collection ('fs.files' )
135159
136160 grid_cursor = files_collection .find (
@@ -139,6 +163,16 @@ def __remove_least_used_revision_binary_files() -> None:
139163 )
140164 for state_doc in grid_cursor :
141165 state_index = state_doc ['state_index' ]
142- for grid_doc in files_collection .find ({'state_index' : state_index , 'state_type' : 'revision' }):
143- fs .delete (grid_doc ['_id' ])
166+ BinaryCache .__remove_revision_binary_files ('revision' , state_index )
144167 break
168+
169+ @staticmethod
170+ def __remove_revision_binary_files (state_type : str , state_index : int ) -> None :
171+ """
172+ Removes the binary files associated with the parameters.
173+ """
174+ fs = MongoDB ().gridfs
175+ files_collection = MongoDB ().get_collection ('fs.files' )
176+
177+ for grid_doc in files_collection .find ({'state_index' : state_index , 'state_type' : state_type }):
178+ fs .delete (grid_doc ['_id' ])
0 commit comments