Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DPDK: Fix source for tarball #3505

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions lisa/base_tools/wget.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re
from typing import TYPE_CHECKING, Optional, Tuple, Type
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Type
from urllib.parse import urlparse

from retry import retry
Expand All @@ -24,6 +24,10 @@ class Wget(Tool):
def command(self) -> str:
return "wget"

def _initialize(self, *args: Any, **kwargs: Any) -> None:
self._url_file_cache: Dict[str, str] = dict()
return super()._initialize(*args, **kwargs)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't need return, because the function signature return value is None. It means nothing to return.


@property
def can_install(self) -> bool:
return True
Expand All @@ -45,8 +49,19 @@ def get(
force_run: bool = False,
timeout: int = 600,
) -> str:
cached_filename = self._url_file_cache.get(url, None)
if cached_filename:
if force_run:
del self._url_file_cache[url]
else:
return cached_filename

is_valid_url(url)

if not filename:
squirrelsc marked this conversation as resolved.
Show resolved Hide resolved
filename = urlparse(url).path.split("/")[-1]
self._log.debug(f"filename is not provided, use {filename} from url.")

file_path, download_path = self._ensure_download_path(file_path, filename)

# remove existing file and dir to download again.
Expand Down Expand Up @@ -84,25 +99,27 @@ def get(
f" stdout: {command_result.stdout}"
f" templog: {temp_log}"
)
self.node.tools[Rm].remove_file(log_file, sudo=sudo)
else:
download_file_path = download_path

if command_result.is_timeout:
raise LisaTimeoutException(
f"wget command is timed out after {timeout} seconds."
)
actual_file_path = self.node.execute(
ls_result = self.node.execute(
f"ls {download_file_path}",
shell=True,
sudo=sudo,
expected_exit_code=0,
expected_exit_code_failure_message="File path does not exist, "
f"{download_file_path}",
)
actual_file_path = ls_result.stdout.strip()
self._url_file_cache[url] = actual_file_path
if executable:
self.node.execute(f"chmod +x {actual_file_path}", sudo=sudo)
self.node.tools[Rm].remove_file(log_file, sudo=sudo)
return actual_file_path.stdout
return actual_file_path

def verify_internet_access(self) -> bool:
try:
Expand Down Expand Up @@ -155,6 +172,13 @@ def get(
force_run: bool = False,
timeout: int = 600,
) -> str:
cached_filename = self._url_file_cache.get(url, None)
if cached_filename:
if force_run:
del self._url_file_cache[url]
else:
return cached_filename

ls = self.node.tools[Ls]

if not filename:
Expand Down Expand Up @@ -182,5 +206,5 @@ def get(
force_run=force_run,
timeout=timeout,
)

self._url_file_cache[url] = download_path
return download_path
17 changes: 17 additions & 0 deletions lisa/tools/tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def extract(
gzip: bool = False,
sudo: bool = False,
raise_error: bool = True,
skip_existing_files: bool = False,
) -> None:
# create folder when it doesn't exist
assert_that(strip_components).described_as(
Expand All @@ -48,6 +49,21 @@ def extract(
if strip_components:
# optionally strip N top level components from a tar file
tar_cmd += f" --strip-components={strip_components}"

if skip_existing_files:
# NOTE:
# This option is for when you are using
# Wget.get(..., force_run=False)
#
# Do not use this option if:
# - You may need to extract multiple versions of a
# given tarball on a node
# - You have provided a default output filename to Wget.get
# to fetch the tarball
#
# This skip-old-files option could silently skip extracting
# the second version of the tarball.
tar_cmd += " --skip-old-files"
result = self.run(tar_cmd, shell=True, force_run=True, sudo=sudo)
if raise_error:
result.assert_exit_code(
Expand Down Expand Up @@ -127,6 +143,7 @@ def extract(
gzip: bool = False,
sudo: bool = False,
raise_error: bool = True,
skip_existing_files: bool = False,
) -> None:
mkdir = self.node.tools[Mkdir]
mkdir.create_directory(dest_dir)
Expand Down
25 changes: 17 additions & 8 deletions microsoft/testsuites/dpdk/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pathlib import PurePath
from typing import Any, Callable, Dict, List, Optional, Sequence, Type, Union
from urllib.parse import urlparse

from assertpy import assert_that
from semver import VersionInfo
Expand Down Expand Up @@ -126,7 +127,6 @@ def download(self) -> PurePath:
for suffix in [".tar.gz", ".tar.bz2", ".tar"]:
if self._tar_url.endswith(suffix):
is_tarball = True
tarfile_suffix = suffix
break
assert_that(is_tarball).described_as(
(
Expand All @@ -136,9 +136,7 @@ def download(self) -> PurePath:
).is_true()
if self._is_remote_tarball:
tarfile = node.tools[Wget].get(
self._tar_url,
file_path=str(work_path),
overwrite=False,
self._tar_url, overwrite=False, file_path=str(node.get_working_path())
)
remote_path = node.get_pure_path(tarfile)
self.tar_filename = remote_path.name
Expand All @@ -149,16 +147,18 @@ def download(self) -> PurePath:
local_path=PurePath(self._tar_url),
node_path=remote_path,
)
tar_root_folder = node.tools[Tar].get_root_folder(str(remote_path))
# create tarfile dest dir
self.asset_path = work_path.joinpath(
self.tar_filename[: -(len(tarfile_suffix))]
)
self.asset_path = work_path.joinpath(tar_root_folder)
# unpack into the dest dir
# force name as tarfile name
# add option to skip files which already exist on disk
# in the event we have already extracted this specific tar
node.tools[Tar].extract(
file=str(remote_path),
dest_dir=str(work_path),
gzip=True,
skip_existing_files=True,
)
return self.asset_path

Expand Down Expand Up @@ -350,7 +350,16 @@ def check_dpdk_support(node: Node) -> None:


def is_url_for_tarball(url: str) -> bool:
return ".tar" in PurePath(url).suffixes
# fetch the resource from the url
# ex. get example/thing.tar from www.github.com/example/thing.tar.gz
url_path = urlparse(url).path
if not url_path:
return False
suffixes = PurePath(url_path).suffixes
if not suffixes:
return False
# check if '.tar' in [ '.tar', '.gz' ]
return ".tar" in suffixes


def is_url_for_git_repo(url: str) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion microsoft/testsuites/dpdk/dpdkutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def get_rdma_core_installer(
if is_url_for_git_repo(rdma_source):
# else, if we have a user provided rdma-core source, use it
downloader: Downloader = GitDownloader(node, rdma_source, rdma_branch)
elif is_url_for_tarball(rdma_branch):
elif is_url_for_tarball(rdma_source):
downloader = TarDownloader(node, rdma_source)
else:
# throw on unrecognized rdma core source type
Expand Down
Loading