Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test/scripts: add scripts for convenient download of image build CI cache (HMS-5356) #1155

Merged
merged 4 commits into from
Jan 22, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 186 additions & 0 deletions test/scripts/dl-image-build-cache
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#!/usr/bin/env python3

"""
Download the image build CI cache files for the current branch from AWS S3.

This script downloads the image build cache files for the current branch from AWS S3.
The script generates the current manifests to determine the image build cache files to download.
"""

import argparse
import os
import sys
import tempfile
from fnmatch import fnmatch
from typing import Dict, List, Optional

import imgtestlib as testlib


def get_argparser():

class ExtendAction(argparse.Action):
"""
Custom argparse action to append multiple values to a list option
to prevent overwriting the list with each new value.

This may be removed when Python 3.8 is the minimum supported version (in osbuild).
"""
def __call__(self, parser, namespace, values, option_string=None):
items = getattr(namespace, self.dest) or []
items.extend(values)
setattr(namespace, self.dest, items)

parser = argparse.ArgumentParser(description=__doc__)
parser.register('action', 'extend', ExtendAction)

parser.add_argument(
"--distro", action="extend", metavar="DISTRO", nargs="+",
help="Distro for which the image build cache is downloaded. Can be specified multiple times. " +
"If not provided, all distros are downloaded.",
)
parser.add_argument(
"--arch", action="extend", metavar="ARCH", nargs="+",
help="Architecture for which the image build cache is downloaded. Can be specified multiple times. " +
"If not provided, all architectures are downloaded.",
)
parser.add_argument(
"--image-type", action="extend", metavar="TYPE", nargs="+",
help="Image type for which the image build cache is downloaded. Can be specified multiple times. " +
"If not provided, all image types are downloaded. " +
"The option is mutually exclusive with --skip-image-type.",
)
parser.add_argument(
"--skip-image-type", action="extend", metavar="TYPE_GLOB", nargs="+",
help="Image types to skip when downloading the image build cache. Can be specified multiple times. " +
"The option is mutually exclusive with --image-type.",
)
parser.add_argument(
"--config", action="extend", metavar="NAME_GLOB", nargs="+",
help="Config name globs used to filter which image build cache files are downloaded. " +
"Can be specified multiple times. If not provided, all configs are downloaded.",
)
parser.add_argument(
"--output", type=str, metavar="DIR",
help="Directory to download the image build cache to. " +
"If not provided, `./s3cache_osbuild-<ref>_runner-<runner-distro>` is used.",
)
parser.add_argument(
"--dl-image", action="store_true", default=False,
help="Download the built image files from the cache. " +
"These are not downloaded by default because of their size.",
)

return parser


def gen_manifest_data_to_build_cache_info(
manifest_gen_data: Dict, config_names: Optional[List[str]]=None,
skip_img_types: Optional[List[str]]=None) -> List[Dict[str, str]]:
"""
Transform the manifest generation data as returned by `read_manifests()` into a list of data structures
used to download the current image build cache files.

Passing a list of config name globs will filter the results to only include manifests generated for those configs.
"""
build_cache_infos = []
for manifest_gen_value in manifest_gen_data.values():
build_request = manifest_gen_value["data"]["build-request"]
distro = build_request["distro"]
arch = build_request["arch"]
image_type = build_request["image-type"]
config_name = build_request["config"]["name"]
manifest_id = manifest_gen_value["id"]

if config_names and not any(fnmatch(config_name, config_glob) for config_glob in config_names):
continue

if skip_img_types and any(fnmatch(image_type, img_type_glob) for img_type_glob in skip_img_types):
continue

build_cache_infos.append({
"distro": distro,
"arch": arch,
"image-type": image_type,
"config": config_name,
"manifest-id": manifest_id,
})

return build_cache_infos


def main():
parser = get_argparser()
args = parser.parse_args()

if args.image_type and args.skip_image_type:
parser.error("--image-type and --skip-image-type are mutually exclusive")

runner_distro = testlib.get_common_ci_runner_distro()
osbuild_ref = testlib.get_osbuild_commit(runner_distro)
if osbuild_ref is None:
raise RuntimeError(f"Failed to determine osbuild commit for {runner_distro} from the Schutzfile")

output_dir = args.output
if output_dir is None:
output_dir = f"./s3cache_osbuild-{osbuild_ref}_runner-{runner_distro}"

with tempfile.TemporaryDirectory() as tmpdir:
print("📜 Generating current manifests to determine their IDs")
_ = testlib.gen_manifests(tmpdir, arches=args.arch, distros=args.distro, images=args.image_type)
manifest_gen_data = testlib.read_manifests(tmpdir)

build_cache_infos = gen_manifest_data_to_build_cache_info(manifest_gen_data, args.config, args.skip_image_type)

if len(build_cache_infos) == 0:
print("⚠️ No image build cache files found for the specified configurations", file=sys.stderr)
sys.exit(1)

print(f"📥 Downloading the image build cache files for osbuild-ref:{osbuild_ref} and ci-runner:{runner_distro}")
print(f"📥 Will download files for {len(build_cache_infos)} configurations into {output_dir}")

s3_include_only = None
if not args.dl_image:
s3_include_only = ["*.json", "bib-*"]

failed_downloads = []

for build_cache_info in build_cache_infos:
distro = build_cache_info["distro"]
arch = build_cache_info["arch"]
image_type = build_cache_info["image-type"]
config = build_cache_info["config"]
manifest_id = build_cache_info["manifest-id"]

target_dir = os.path.join(output_dir, testlib.gen_build_name(distro, arch, image_type, config))

out, dl_ok = testlib.dl_build_cache(
target_dir, distro, arch, osbuild_ref, runner_distro, manifest_id, s3_include_only)
if not dl_ok:
failed_downloads.append(build_cache_info)
continue
print(out)

if failed_downloads:
print(
f"❌ Failed to download the image build cache for {len(failed_downloads)} configurations:",
file=sys.stderr
)
for build_cache_info in failed_downloads:
distro = build_cache_info["distro"]
arch = build_cache_info["arch"]
image_type = build_cache_info["image-type"]
config = build_cache_info["config"]
manifest_id = build_cache_info["manifest-id"]
print(f" {distro}/{arch}/{image_type}/{config} with manifest ID {manifest_id}", file=sys.stderr)
sys.exit(1)

print(f"✅ Successfully downloaded the image build cache for {len(build_cache_infos)} configurations")


if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("Interrupted by user", file=sys.stderr)
sys.exit(1)
76 changes: 76 additions & 0 deletions test/scripts/dl-one-image-build-cache
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env python3

"""
Download the image build CI cache files for a specific image from AWS S3.

This script downloads the image build cache files for a specific image from AWS S3.
The script reads the build info JSON file to determine the image build cache files to download.
"""

import argparse
import os
import sys

import imgtestlib as testlib


def get_argparser():
parser = argparse.ArgumentParser(description=__doc__)

parser.add_argument(
"--build-info", type=str, metavar="JSON_FILE",
help="Path to the build info JSON file containing the image build cache information. " +
"If not provided, the script will try to read '<build-dir>/info.json.'",
)
parser.add_argument(
"build_dir", type=os.path.abspath, metavar="BUILD_DIR",
help="Directory where the image build cache files are downloaded to. " +
"It may already contain the build cache files from a previous run.",
)

return parser


def main():
parser = get_argparser()
args = parser.parse_args()

build_dir = args.build_dir
build_info_dir = os.path.dirname(args.build_info) if args.build_info else build_dir

print(f"📜 Reading 'info.json' from {build_info_dir}")
build_info = testlib.read_build_info(build_info_dir)

distro = build_info["distro"]
arch = build_info["arch"]
osbuild_ref = build_info["osbuild-commit"]
manifest_id = build_info["manifest-checksum"]
runner_distro = build_info.get("runner-distro")

if runner_distro is None:
runner_distro = testlib.get_common_ci_runner_distro()
print("⚠️ Runner distro not found in the build info. " +
f"Using the CI runner distro from the current branch: {runner_distro}", file=sys.stderr)

print("📥 Downloading the image build cache files for:")
print(f" distro: {distro}")
print(f" arch: {arch}")
print(f" manifest-id: {manifest_id}")
print(f" osbuild-ref: {osbuild_ref}")
print(f" runner-distro: {runner_distro}")

out, dl_ok = testlib.dl_build_cache(build_dir, distro, arch, osbuild_ref, runner_distro, manifest_id)
print(out)
if not dl_ok:
print("❌ Failed to download the image build cache", file=sys.stderr)
sys.exit(1)

print("✅ Successfully downloaded the image build cache")


if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("Interrupted by user", file=sys.stderr)
sys.exit(1)
57 changes: 43 additions & 14 deletions test/scripts/imgtestlib.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
import subprocess as sp
import sys
from glob import glob
from typing import Dict
from typing import Dict, List, Optional

TEST_CACHE_ROOT = ".cache/osbuild-images"
CONFIGS_PATH = "./test/configs"
@@ -108,21 +108,31 @@ def list_images(distros=None, arches=None, images=None):
return json.loads(out)


def dl_build_info(destination, distro=None, arch=None, osbuild_ref=None, runner_distro=None):
# pylint: disable=too-many-arguments,too-many-positional-arguments
def dl_build_cache(
destination, distro: Optional[str]=None, arch: Optional[str]=None, osbuild_ref: Optional[str]=None,
runner_distro: Optional[str]=None, manifest_id: Optional[str]=None, include_only: Optional[List[str]]=None):
"""
Downloads all the configs from the s3 bucket.
Downloads image build cache files from the s3 bucket.

If 'include' is not specified, all files are downloaded. Otherwise, all files will be excluded and the items
in the 'include' list will be passed as '--include' arguments to the 'aws s3 sync' command.
"""
s3url = gen_build_info_s3_dir_path(distro, arch, osbuild_ref=osbuild_ref, runner_distro=runner_distro)
print(f"⬇️ Downloading configs from {s3url}")
# only download info.json (exclude everything, then include) files, otherwise we get manifests and whole images
job = sp.run(["aws", "s3", "sync",
"--no-progress", # wont show progress but will print file list
"--exclude=*",
"--include=*/info.json",
"--include=*/bib-*",
s3url, destination],
capture_output=True,
check=False)
s3url = gen_build_info_s3_dir_path(distro, arch, manifest_id, osbuild_ref, runner_distro)
dl_what = "all files" if include_only is None else "only " + ', '.join(f"'{i}'" for i in include_only)
print(f"⬇️ Downloading {dl_what} from {s3url}")

cmd = [
"aws", "s3", "sync",
"--no-progress", # wont show progress but will print file list
]
if include_only:
cmd.extend(["--exclude=*"])
for i in include_only:
cmd.extend([f"--include={i}"])
cmd.extend([s3url, destination])

job = sp.run(cmd, capture_output=True, check=False)
ok = job.returncode == 0
if not ok:
print(f"⚠️ Failed to sync contents of {s3url}:")
@@ -131,6 +141,15 @@ def dl_build_info(destination, distro=None, arch=None, osbuild_ref=None, runner_
return job.stdout.decode(), ok


def dl_build_info(destination, distro=None, arch=None, osbuild_ref=None, runner_distro=None):
"""
Downloads all the configs from the s3 bucket.
"""
# only download info.json and bib-* files, otherwise we get manifests and whole images
include = ["*/info.json", "*/bib-*"]
return dl_build_cache(destination, distro, arch, osbuild_ref, runner_distro, include_only=include)


def get_manifest_id(manifest_data):
md = json.dumps(manifest_data).encode()
out, _ = runcmd(["osbuild", "--inspect", "-"], stdin=md)
@@ -532,6 +551,16 @@ def get_common_ci_runner():
return runner


def get_common_ci_runner_distro():
"""
CI runner distro for common tasks.

Returns the distro part from the value of the common.gitlab-ci-runner key in the Schutzfile.
For example, if the value is "aws/fedora-999", this function will return "fedora-999".
"""
return get_common_ci_runner().split("/")[1]


def find_image_file(build_path: str) -> str:
"""
Find the path to the image by reading the manifest to get the name of the last pipeline and searching for the file