From 4f7a3c60ce7d08cedc979f4988ff9c3d5b931bfe Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sat, 7 Sep 2024 12:27:46 +0100 Subject: [PATCH 1/5] Inverted the data movements: downloads happen in the output directory and are then copied to the cache if needed This allows getting rid of the confusing `output_path` variable. --- nf_core/pipelines/download.py | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index 97453b127e..fae4eb28b6 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -1309,8 +1309,7 @@ def singularity_download_image( log.debug(f"Downloading Singularity image: '{container}'") # Set output path to save file to - output_path = cache_path or out_path - output_path_tmp = f"{output_path}.partial" + output_path_tmp = f"{out_path}.partial" log.debug(f"Downloading to: '{output_path_tmp}'") # Set up progress bar @@ -1340,16 +1339,16 @@ def singularity_download_image( fh.write(data) # Rename partial filename to final filename - os.rename(output_path_tmp, output_path) + os.rename(output_path_tmp, out_path) - # Copy cached download if we are using the cache + # Copy download to cache if one is defined if cache_path: - log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'") - progress.update(task, description="Copying from cache to target directory") - shutil.copyfile(cache_path, out_path) + log.debug(f"Copying {container} to cache: '{os.path.basename(out_path)}'") + progress.update(task, description="Copying from target directory to cache") + shutil.copyfile(out_path, cache_path) # Create symlinks to ensure that the images are found even with different registries being used. - self.symlink_singularity_images(output_path) + self.symlink_singularity_images(out_path) progress.remove_task(task) @@ -1361,8 +1360,8 @@ def singularity_download_image( log.debug(f"Deleting incompleted singularity image download:\n'{output_path_tmp}'") if output_path_tmp and os.path.exists(output_path_tmp): os.remove(output_path_tmp) - if output_path and os.path.exists(output_path): - os.remove(output_path) + if out_path and os.path.exists(out_path): + os.remove(out_path) # Re-raise the caught exception raise finally: @@ -1383,8 +1382,6 @@ def singularity_pull_image( Raises: Various exceptions possible from `subprocess` execution of Singularity. """ - output_path = cache_path or out_path - # where the output of 'singularity pull' is first generated before being copied to the NXF_SINGULARITY_CACHDIR. # if not defined by the Singularity administrators, then use the temporary directory to avoid storing the images in the work directory. if os.environ.get("SINGULARITY_CACHEDIR") is None: @@ -1406,11 +1403,11 @@ def singularity_pull_image( "singularity", "pull", "--name", - output_path, + out_path, address, ] elif shutil.which("apptainer"): - singularity_command = ["apptainer", "pull", "--name", output_path, address] + singularity_command = ["apptainer", "pull", "--name", out_path, address] else: raise OSError("Singularity/Apptainer is needed to pull images, but it is not installed or not in $PATH") log.debug(f"Building singularity image: {address}") @@ -1453,14 +1450,14 @@ def singularity_pull_image( error_msg=lines, ) - # Copy cached download if we are using the cache + # Copy download to cache if one is defined if cache_path: - log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'") - progress.update(task, current_log="Copying from cache to target directory") - shutil.copyfile(cache_path, out_path) + log.debug(f"Copying {container} to cache: '{os.path.basename(out_path)}'") + progress.update(task, current_log="Copying from target directory to cache") + shutil.copyfile(out_path, cache_path) # Create symlinks to ensure that the images are found even with different registries being used. - self.symlink_singularity_images(output_path) + self.symlink_singularity_images(out_path) progress.remove_task(task) From 78650af6d406834671f0f754e91ee8767dbf776c Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sat, 7 Sep 2024 12:57:58 +0100 Subject: [PATCH 2/5] Create symlinks in the cache *and* output directory --- nf_core/pipelines/download.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index fae4eb28b6..284afb68c9 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -1290,6 +1290,7 @@ def singularity_copy_cache_image(self, container: str, out_path: str, cache_path log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'") shutil.copyfile(cache_path, out_path) # Create symlinks to ensure that the images are found even with different registries being used. + self.symlink_singularity_images(cache_path) self.symlink_singularity_images(out_path) def singularity_download_image( @@ -1346,6 +1347,8 @@ def singularity_download_image( log.debug(f"Copying {container} to cache: '{os.path.basename(out_path)}'") progress.update(task, description="Copying from target directory to cache") shutil.copyfile(out_path, cache_path) + # Create symlinks to ensure that the images are found even with different registries being used. + self.symlink_singularity_images(cache_path) # Create symlinks to ensure that the images are found even with different registries being used. self.symlink_singularity_images(out_path) @@ -1455,6 +1458,8 @@ def singularity_pull_image( log.debug(f"Copying {container} to cache: '{os.path.basename(out_path)}'") progress.update(task, current_log="Copying from target directory to cache") shutil.copyfile(out_path, cache_path) + # Create symlinks to ensure that the images are found even with different registries being used. + self.symlink_singularity_images(cache_path) # Create symlinks to ensure that the images are found even with different registries being used. self.symlink_singularity_images(out_path) From 113430dc3f895742f6272f87d8a5a88e7de65c6f Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 9 Sep 2024 16:08:56 +0100 Subject: [PATCH 3/5] cache_path is actually always defined --- nf_core/pipelines/download.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index 284afb68c9..9fe3830bf4 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -1086,7 +1086,7 @@ def get_singularity_images(self, current_revision: str = "") -> None: # Organise containers based on what we need to do with them containers_exist: List[str] = [] - containers_cache: List[Tuple[str, str, Optional[str]]] = [] + containers_cache: List[Tuple[str, str, str]] = [] containers_download: List[Tuple[str, str, Optional[str]]] = [] containers_pull: List[Tuple[str, str, Optional[str]]] = [] for container in self.containers: @@ -1283,15 +1283,13 @@ def singularity_image_filenames(self, container: str) -> Tuple[str, Optional[str return (out_path, cache_path) - def singularity_copy_cache_image(self, container: str, out_path: str, cache_path: Optional[str]) -> None: + def singularity_copy_cache_image(self, container: str, out_path: str, cache_path: str) -> None: """Copy Singularity image from NXF_SINGULARITY_CACHEDIR to target folder.""" - # Copy to destination folder if we have a cached version - if cache_path and os.path.exists(cache_path): - log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'") - shutil.copyfile(cache_path, out_path) - # Create symlinks to ensure that the images are found even with different registries being used. - self.symlink_singularity_images(cache_path) - self.symlink_singularity_images(out_path) + log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'") + shutil.copyfile(cache_path, out_path) + # Create symlinks to ensure that the images are found even with different registries being used. + self.symlink_singularity_images(cache_path) + self.symlink_singularity_images(out_path) def singularity_download_image( self, container: str, out_path: str, cache_path: Optional[str], progress: DownloadProgress From ac771d8ba9c0c057ffe667722ef55bac6830c30e Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 9 Sep 2024 16:11:08 +0100 Subject: [PATCH 4/5] Factored out a function to copy a container --- nf_core/pipelines/download.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index 9fe3830bf4..9fa842c519 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -1285,11 +1285,17 @@ def singularity_image_filenames(self, container: str) -> Tuple[str, Optional[str def singularity_copy_cache_image(self, container: str, out_path: str, cache_path: str) -> None: """Copy Singularity image from NXF_SINGULARITY_CACHEDIR to target folder.""" - log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'") - shutil.copyfile(cache_path, out_path) + self.singularity_copy_image(container, cache_path, out_path) # Create symlinks to ensure that the images are found even with different registries being used. self.symlink_singularity_images(cache_path) - self.symlink_singularity_images(out_path) + + def singularity_copy_image(self, container: str, from_path: str, to_path: str) -> None: + """Copy Singularity image between folders. This function is used seamlessly + across the target directory, NXF_SINGULARITY_CACHEDIR, and NXF_SINGULARITY_LIBRARYDIR.""" + log.debug(f"Copying {container} to cache: '{os.path.basename(from_path)}'") + shutil.copyfile(from_path, to_path) + # Create symlinks to ensure that the images are found even with different registries being used. + self.symlink_singularity_images(to_path) def singularity_download_image( self, container: str, out_path: str, cache_path: Optional[str], progress: DownloadProgress @@ -1342,11 +1348,8 @@ def singularity_download_image( # Copy download to cache if one is defined if cache_path: - log.debug(f"Copying {container} to cache: '{os.path.basename(out_path)}'") progress.update(task, description="Copying from target directory to cache") - shutil.copyfile(out_path, cache_path) - # Create symlinks to ensure that the images are found even with different registries being used. - self.symlink_singularity_images(cache_path) + self.singularity_copy_image(container, out_path, cache_path) # Create symlinks to ensure that the images are found even with different registries being used. self.symlink_singularity_images(out_path) @@ -1453,11 +1456,8 @@ def singularity_pull_image( # Copy download to cache if one is defined if cache_path: - log.debug(f"Copying {container} to cache: '{os.path.basename(out_path)}'") progress.update(task, current_log="Copying from target directory to cache") - shutil.copyfile(out_path, cache_path) - # Create symlinks to ensure that the images are found even with different registries being used. - self.symlink_singularity_images(cache_path) + self.singularity_copy_image(container, out_path, cache_path) # Create symlinks to ensure that the images are found even with different registries being used. self.symlink_singularity_images(out_path) From 6293dd0a2116b3145944011936fe354f18be90b2 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 10 Sep 2024 14:43:01 +0100 Subject: [PATCH 5/5] Copy from the library directory if set --- nf_core/pipelines/download.py | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index 9fa842c519..8d9f435e23 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -1087,11 +1087,12 @@ def get_singularity_images(self, current_revision: str = "") -> None: # Organise containers based on what we need to do with them containers_exist: List[str] = [] containers_cache: List[Tuple[str, str, str]] = [] + containers_library: List[Tuple[str, str, str, Optional[str]]] = [] containers_download: List[Tuple[str, str, Optional[str]]] = [] containers_pull: List[Tuple[str, str, Optional[str]]] = [] for container in self.containers: # Fetch the output and cached filenames for this container - out_path, cache_path = self.singularity_image_filenames(container) + out_path, cache_path, library_path = self.singularity_image_filenames(container) # Check that the directories exist out_path_dir = os.path.dirname(out_path) @@ -1109,11 +1110,16 @@ def get_singularity_images(self, current_revision: str = "") -> None: containers_exist.append(container) continue - # We have a copy of this in the NXF_SINGULARITY_CACHE dir + # We have a copy of this in NXF_SINGULARITY_CACHEDIR if cache_path and os.path.exists(cache_path): containers_cache.append((container, out_path, cache_path)) continue + # We have a copy of this in NXF_SINGULARITY_LIBRARYDIR + if library_path and os.path.exists(library_path): + containers_library.append((container, library_path, out_path, cache_path)) + continue + # Direct download within Python if container.startswith("http"): containers_download.append((container, out_path, cache_path)) @@ -1145,6 +1151,12 @@ def get_singularity_images(self, current_revision: str = "") -> None: self.singularity_copy_cache_image(*container) progress.update(task, advance=1) + if containers_library: + for container in containers_library: + progress.update(task, description="Copying singularity images from library") + self.singularity_copy_library_image(*container) + progress.update(task, advance=1) + if containers_download or containers_pull: # if clause gives slightly better UX, because Download is no longer displayed if nothing is left to be downloaded. with concurrent.futures.ThreadPoolExecutor(max_workers=self.parallel_downloads) as pool: @@ -1226,7 +1238,7 @@ def get_singularity_images(self, current_revision: str = "") -> None: # Task should advance in any case. Failure to pull will not kill the download process. progress.update(task, advance=1) - def singularity_image_filenames(self, container: str) -> Tuple[str, Optional[str]]: + def singularity_image_filenames(self, container: str) -> Tuple[str, Optional[str], Optional[str]]: """Check Singularity cache for image, copy to destination folder if found. Args: @@ -1234,11 +1246,12 @@ def singularity_image_filenames(self, container: str) -> Tuple[str, Optional[str or a Docker Hub repository ID. Returns: - tuple (str, str): Returns a tuple of (out_path, cache_path). + (str, str, str): Returns a tuple of (out_path, cache_path, library_path). out_path is the final target output path. it may point to the NXF_SINGULARITY_CACHEDIR, if cache utilisation was set to 'amend'. If cache utilisation was set to 'copy', it will point to the target folder, a subdirectory of the output directory. In the latter case, cache_path may either be None (image is not yet cached locally) or point to the image in the NXF_SINGULARITY_CACHEDIR, so it will not be downloaded from the web again, but directly copied from there. See get_singularity_images() for implementation. + library_path is the points to the container in NXF_SINGULARITY_LIBRARYDIR, if the latter is defined. """ # Generate file paths @@ -1281,7 +1294,11 @@ def singularity_image_filenames(self, container: str) -> Tuple[str, Optional[str elif self.container_cache_utilisation in ["amend", "copy"]: raise FileNotFoundError("Singularity cache is required but no '$NXF_SINGULARITY_CACHEDIR' set!") - return (out_path, cache_path) + library_path = None + if os.environ.get("NXF_SINGULARITY_LIBRARYDIR"): + library_path = os.path.join(os.environ["NXF_SINGULARITY_LIBRARYDIR"], out_name) + + return (out_path, cache_path, library_path) def singularity_copy_cache_image(self, container: str, out_path: str, cache_path: str) -> None: """Copy Singularity image from NXF_SINGULARITY_CACHEDIR to target folder.""" @@ -1289,6 +1306,14 @@ def singularity_copy_cache_image(self, container: str, out_path: str, cache_path # Create symlinks to ensure that the images are found even with different registries being used. self.symlink_singularity_images(cache_path) + def singularity_copy_library_image( + self, container: str, library_path: str, out_path: str, cache_path: Optional[str] + ) -> None: + """Copy Singularity image from NXF_SINGULARITY_LIBRARYDIR to target folder, and possibly NXF_SINGULARITY_CACHEDIR.""" + self.singularity_copy_image(container, library_path, out_path) + if cache_path: + self.singularity_copy_image(container, library_path, cache_path) + def singularity_copy_image(self, container: str, from_path: str, to_path: str) -> None: """Copy Singularity image between folders. This function is used seamlessly across the target directory, NXF_SINGULARITY_CACHEDIR, and NXF_SINGULARITY_LIBRARYDIR."""