microsoft · mcgov · Nov 11, 2024 · Nov 11, 2024 · Nov 12, 2024 · Nov 12, 2024
@@ -1,5 +1,5 @@
 import re
-from typing import TYPE_CHECKING, Optional, Tuple, Type
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Type
 from urllib.parse import urlparse
 
 from retry import retry
@@ -24,6 +24,10 @@ class Wget(Tool):
     def command(self) -> str:
         return "wget"
 
+    def _initialize(self, *args: Any, **kwargs: Any) -> None:
+        self._url_file_cache: Dict[str, str] = dict()
+        return super()._initialize(*args, **kwargs)
+
     @property
     def can_install(self) -> bool:
         return True
@@ -45,8 +49,19 @@ def get(
         force_run: bool = False,
         timeout: int = 600,
     ) -> str:
+        cached_filename = self._url_file_cache.get(url, None)
+        if cached_filename:
+            if force_run:
+                del self._url_file_cache[url]
+            else:
+                return cached_filename
+
         is_valid_url(url)
 
+        if not filename:
+            filename = urlparse(url).path.split("/")[-1]
+            self._log.debug(f"filename is not provided, use {filename} from url.")
+
         file_path, download_path = self._ensure_download_path(file_path, filename)
 
         # remove existing file and dir to download again.
@@ -84,25 +99,27 @@ def get(
                     f" stdout: {command_result.stdout}"
                     f" templog: {temp_log}"
                 )
+            self.node.tools[Rm].remove_file(log_file, sudo=sudo)
         else:
             download_file_path = download_path
 
         if command_result.is_timeout:
             raise LisaTimeoutException(
                 f"wget command is timed out after {timeout} seconds."
             )
-        actual_file_path = self.node.execute(
+        ls_result = self.node.execute(
             f"ls {download_file_path}",
             shell=True,
             sudo=sudo,
             expected_exit_code=0,
             expected_exit_code_failure_message="File path does not exist, "
             f"{download_file_path}",
         )
+        actual_file_path = ls_result.stdout.strip()
+        self._url_file_cache[url] = actual_file_path
         if executable:
             self.node.execute(f"chmod +x {actual_file_path}", sudo=sudo)
-        self.node.tools[Rm].remove_file(log_file, sudo=sudo)
-        return actual_file_path.stdout
+        return actual_file_path
 
     def verify_internet_access(self) -> bool:
         try:
@@ -155,6 +172,13 @@ def get(
         force_run: bool = False,
         timeout: int = 600,
     ) -> str:
+        cached_filename = self._url_file_cache.get(url, None)
+        if cached_filename:
+            if force_run:
+                del self._url_file_cache[url]
+            else:
+                return cached_filename
+
         ls = self.node.tools[Ls]
 
         if not filename:
@@ -182,5 +206,5 @@ def get(
             force_run=force_run,
             timeout=timeout,
         )
-
+        self._url_file_cache[url] = download_path
         return download_path
@@ -35,6 +35,7 @@ def extract(
         gzip: bool = False,
         sudo: bool = False,
         raise_error: bool = True,
+        skip_existing_files: bool = False,
     ) -> None:
         # create folder when it doesn't exist
         assert_that(strip_components).described_as(
@@ -48,6 +49,21 @@ def extract(
         if strip_components:
             # optionally strip N top level components from a tar file
             tar_cmd += f" --strip-components={strip_components}"
+
+        if skip_existing_files:
+            # NOTE:
+            # This option is for when you are using
+            #  Wget.get(..., force_run=False)
+            #
+            # Do not use this option if:
+            # - You may need to extract multiple versions of a
+            #   given tarball on a node
+            # - You have provided a default output filename to Wget.get
+            #   to fetch the tarball
+            #
+            # This skip-old-files option could silently skip extracting
+            # the second version of the tarball.
+            tar_cmd += " --skip-old-files"
         result = self.run(tar_cmd, shell=True, force_run=True, sudo=sudo)
         if raise_error:
             result.assert_exit_code(
@@ -127,6 +143,7 @@ def extract(
         gzip: bool = False,
         sudo: bool = False,
         raise_error: bool = True,
+        skip_existing_files: bool = False,
     ) -> None:
         mkdir = self.node.tools[Mkdir]
         mkdir.create_directory(dest_dir)

@@ -3,6 +3,7 @@
 
 from pathlib import PurePath
 from typing import Any, Callable, Dict, List, Optional, Sequence, Type, Union
+from urllib.parse import urlparse
 
 from assertpy import assert_that
 from semver import VersionInfo
@@ -126,7 +127,6 @@ def download(self) -> PurePath:
         for suffix in [".tar.gz", ".tar.bz2", ".tar"]:
             if self._tar_url.endswith(suffix):
                 is_tarball = True
-                tarfile_suffix = suffix
                 break
         assert_that(is_tarball).described_as(
             (
@@ -136,9 +136,7 @@ def download(self) -> PurePath:
         ).is_true()
         if self._is_remote_tarball:
             tarfile = node.tools[Wget].get(
-                self._tar_url,
-                file_path=str(work_path),
-                overwrite=False,
+                self._tar_url, overwrite=False, file_path=str(node.get_working_path())
             )
             remote_path = node.get_pure_path(tarfile)
             self.tar_filename = remote_path.name
@@ -149,16 +147,18 @@ def download(self) -> PurePath:
                 local_path=PurePath(self._tar_url),
                 node_path=remote_path,
             )
+        tar_root_folder = node.tools[Tar].get_root_folder(str(remote_path))
         # create tarfile dest dir
-        self.asset_path = work_path.joinpath(
-            self.tar_filename[: -(len(tarfile_suffix))]
-        )
+        self.asset_path = work_path.joinpath(tar_root_folder)
         # unpack into the dest dir
         # force name as tarfile name
+        # add option to skip files which already exist on disk
+        # in the event we have already extracted this specific tar
         node.tools[Tar].extract(
             file=str(remote_path),
             dest_dir=str(work_path),
             gzip=True,
+            skip_existing_files=True,
         )
         return self.asset_path
 
@@ -350,7 +350,16 @@ def check_dpdk_support(node: Node) -> None:
 
 
 def is_url_for_tarball(url: str) -> bool:
-    return ".tar" in PurePath(url).suffixes
+    # fetch the resource from the url
+    # ex. get example/thing.tar from www.github.com/example/thing.tar.gz
+    url_path = urlparse(url).path
+    if not url_path:
+        return False
+    suffixes = PurePath(url_path).suffixes
+    if not suffixes:
+        return False
+    # check if '.tar' in [ '.tar', '.gz' ]
+    return ".tar" in suffixes
 
 
 def is_url_for_git_repo(url: str) -> bool:

@@ -135,7 +135,7 @@ def get_rdma_core_installer(
         if is_url_for_git_repo(rdma_source):
             # else, if we have a user provided rdma-core source, use it
             downloader: Downloader = GitDownloader(node, rdma_source, rdma_branch)
-        elif is_url_for_tarball(rdma_branch):
+        elif is_url_for_tarball(rdma_source):
             downloader = TarDownloader(node, rdma_source)
         else:
             # throw on unrecognized rdma core source type