From 7b666a2a3500f7dc7d30d447e27c3d4ac37597dc Mon Sep 17 00:00:00 2001 From: Jen Sims Date: Tue, 20 Aug 2024 09:58:14 -0400 Subject: [PATCH 1/5] fold back in docker tarball changes --- .../infoextractors/docker_tarball_file.py | 87 +++++++++++++++++++ surfactant/plugin/manager.py | 2 + 2 files changed, 89 insertions(+) create mode 100644 surfactant/infoextractors/docker_tarball_file.py diff --git a/surfactant/infoextractors/docker_tarball_file.py b/surfactant/infoextractors/docker_tarball_file.py new file mode 100644 index 00000000..a63805f2 --- /dev/null +++ b/surfactant/infoextractors/docker_tarball_file.py @@ -0,0 +1,87 @@ +# Copyright 2024 Lawrence Livermore National Security, LLC +# see: ${repository}/LICENSE +# +# SPDX-License-Identifier: MIT + +import tarfile +from pathlib import PurePosixPath +import json +from typing import IO, Any, Union + +import surfactant.plugin +from surfactant.sbomtypes import SBOM, Software + + +def get_manifest_file_from_tarball(tarball: tarfile.TarFile) -> IO[bytes] | None: + return tarball.extractfile( + {tarinfo.name: tarinfo for tarinfo in tarball.getmembers()}["manifest.json"] + ) + + +def get_config_file_from_tarball( + tarball: tarfile.TarFile, path: str +) -> Union[IO[bytes], None]: + return tarball.extractfile( + {tarinfo.name: tarinfo for tarinfo in tarball.getmembers()}[path] + ) + + +def get_config_path_from_manifest(manifest: list[dict[str, Any]]) -> list[str]: + path = "Config" + return [entry[path] for entry in manifest] + + +def get_repo_tags_from_manifest(manifest: list[dict[str, Any]]) -> list[str]: + path = "RepoTags" + return [entry[path] for entry in manifest] + + +def portable_path_list(*paths: str): + """Convert paths to a portable format acknowledged by""" + return tuple(str(PurePosixPath(path_str)) for path_str in paths) + + +def supports_file(filename: str, filetype: str) -> bool: + EXPECTED_FILETYPE = "TAR" + + expected_members = portable_path_list( + "index.json", + "manifest.json", + "oci-layout", + "repositories", + "blobs/sha256", + ) + + if filetype != EXPECTED_FILETYPE: + return False + + with tarfile.open(filename) as this_tarfile: + found_members = portable_path_list( + *[member.name for member in this_tarfile.getmembers()] + ) + + return all(expected_member in found_members for expected_member in expected_members) + + +@surfactant.plugin.hookimpl +def extract_file_info( + sbom: SBOM, software: Software, filename: str, filetype: str +) -> object: + if not supports_file(filename, filetype): + return None + return extract_image_info(filename) + + +def extract_image_info(filename: str): + """Return image configuration objects mapped by their paths.""" + root_key = "dockerImageConfigs" + image_info: dict[str, list[dict[str, Any]]] = {root_key: []} + with tarfile.open(filename) as tarball: + # we know the manifest file is present or we wouldn't be this far + assert (manifest_file := get_manifest_file_from_tarball(tarball)) + manifest = json.load(manifest_file) + for config_path in manifest.get_config_path_from_manifest(manifest): + assert (config_file := get_config_file_from_tarball(tarball, config_path)) + config = json.load(config_file) + image_info[root_key].append(config) + return image_info diff --git a/surfactant/plugin/manager.py b/surfactant/plugin/manager.py index 71d861f8..de492b63 100644 --- a/surfactant/plugin/manager.py +++ b/surfactant/plugin/manager.py @@ -18,6 +18,7 @@ def _register_plugins(pm: pluggy.PluginManager) -> None: a_out_file, coff_file, docker_image, + docker_tarball_file, elf_file, java_file, js_file, @@ -45,6 +46,7 @@ def _register_plugins(pm: pluggy.PluginManager) -> None: a_out_file, coff_file, docker_image, + docker_tarball_file, elf_file, java_file, js_file, From 0f72be2640dfc25cc50a58752c9e9a23e63dfe54 Mon Sep 17 00:00:00 2001 From: Jen Sims Date: Tue, 20 Aug 2024 10:26:23 -0400 Subject: [PATCH 2/5] used new docker type id --- surfactant/infoextractors/docker_tarball_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfactant/infoextractors/docker_tarball_file.py b/surfactant/infoextractors/docker_tarball_file.py index a63805f2..3ce5efe5 100644 --- a/surfactant/infoextractors/docker_tarball_file.py +++ b/surfactant/infoextractors/docker_tarball_file.py @@ -42,7 +42,7 @@ def portable_path_list(*paths: str): def supports_file(filename: str, filetype: str) -> bool: - EXPECTED_FILETYPE = "TAR" + EXPECTED_FILETYPE = "DOCKER_TAR" expected_members = portable_path_list( "index.json", From ec7e7aaeb48fd72cc5437c06eb99293eb37b6704 Mon Sep 17 00:00:00 2001 From: Jen Sims Date: Tue, 27 Aug 2024 13:44:14 -0400 Subject: [PATCH 3/5] updated docker file for image extraction details --- surfactant/infoextractors/docker_image.py | 112 +++++++++++++++++----- surfactant/plugin/manager.py | 2 - 2 files changed, 89 insertions(+), 25 deletions(-) diff --git a/surfactant/infoextractors/docker_image.py b/surfactant/infoextractors/docker_image.py index d62e8682..dbaae2a7 100644 --- a/surfactant/infoextractors/docker_image.py +++ b/surfactant/infoextractors/docker_image.py @@ -6,9 +6,15 @@ import json import subprocess import tempfile +import tarfile +from typing import IO, Any, Union from loguru import logger +### =============================== +### Utility Predicates +### =============================== + import surfactant.plugin from surfactant.sbomtypes import SBOM, Software @@ -24,34 +30,35 @@ def is_docker_scout_installed(): except FileNotFoundError: return False +def is_oci_archive(filename: str) -> bool: + """Return True if given file is a tarball + roughly matching the OCI specification""" -# Check if Docker Scout is installed when this Python module gets loaded -disable_docker_scout = not is_docker_scout_installed() - + with tarfile.open(filename) as this_tarfile: # oci-layout only path ensured + return "oci-layout" in this_tarfile.getmembers() def supports_file(filetype: str) -> bool: return filetype in ("DOCKER_TAR", "DOCKER_GZIP") +### =============================== +### Archive Utilities +### =============================== +def gunzip_tarball(filename: str) -> object: + """ Unzip a gzipped tarball to a temporary file + and return the name of the corresponding file. """ -@surfactant.plugin.hookimpl -def extract_file_info(sbom: SBOM, software: Software, filename: str, filetype: str) -> object: - if disable_docker_scout or not supports_file(filetype): - return None - return extract_docker_info(filetype, filename) - - -def extract_docker_info(filetype: str, filename: str) -> object: - if filetype == "DOCKER_GZIP": - with open(filename, "rb") as gzip_in: - gzip_data = gzip_in.read() - with tempfile.NamedTemporaryFile() as gzip_out: - gzip_out.write(gzip.decompress(gzip_data)) - return run_docker_scout(gzip_out.name) - return run_docker_scout(filename) - - -# Function that extract_docker_info delegates to to actually run Docker scout -def run_docker_scout(filename: str) -> object: + with open(filename, "rb") as gzip_in: + gzip_data = gzip_in.read() + with tempfile.NamedTemporaryFile() as gzip_out: + gzip_out.write(gzip.decompress(gzip_data)) + return gzip_out.name + +### =============================== +### Extraction Procedures +### =============================== +def extract_info_via_docker_scout(filename: str) -> object: + """ Dispatch to `docker-scout` subprocess, + returning captured SPDX output""" result = subprocess.run( ["docker", "scout", "sbom", "--format", "spdx", f"fs://{filename}"], capture_output=True, @@ -61,4 +68,63 @@ def run_docker_scout(filename: str) -> object: logger.warning(f"Running Docker Scout on {filename} failed") return {} spdx_out = json.loads(result.stdout) - return {"dockerSPDX": spdx_out} + return spdx_out + +def extract_configs(filename: str): + """Return image configuration objects mapped by their paths.""" + def get_manifest_file_from_tarball(tarball: tarfile.TarFile) -> IO[bytes] | None: + return tarball.extractfile( + {tarinfo.name: tarinfo for tarinfo in tarball.getmembers()}["manifest.json"] + ) + + def get_config_file_from_tarball( + tarball: tarfile.TarFile, path: str + ) -> Union[IO[bytes], None]: + return tarball.extractfile( + {tarinfo.name: tarinfo for tarinfo in tarball.getmembers()}[path] + ) + + def get_config_path_from_manifest(manifest: list[dict[str, Any]]) -> list[str]: + path = "Config" + return [entry[path] for entry in manifest] + + # currently unused + def get_repo_tags_from_manifest(manifest: list[dict[str, Any]]) -> list[str]: + path = "RepoTags" + return [entry[path] for entry in manifest] + + image_configs = [] + with tarfile.open(filename) as tarball: + # we know the manifest file is present or we wouldn't be this far + assert (manifest_file := get_manifest_file_from_tarball(tarball)) + manifest = json.load(manifest_file) + for config_path in get_config_path_from_manifest(manifest): + assert (config_file := get_config_file_from_tarball(tarball, config_path)) + config = json.load(config_file) + image_configs.append(config) + return image_configs + +### ================================= +### Hook Implementation +### ================================= + +@surfactant.plugin.hookimpl +def extract_file_info(sbom: SBOM, software: Software, filename: str, filetype: str) -> object: + if not supports_file(filetype): + return None + + ## Conditionally extract tarball if gzipped + filename = gunzip_tarball(filename) if filetype == "DOCKER_GZIP" else filename + + ## Establish metadata object + metadata = {} + + ## Extract config files + metadata["dockerImageConfigs"] = extract_configs(filename) + + ## Use docker-scout if available + if is_docker_scout_installed(): + metadata["dockerSPDX"] = extract_info_via_docker_scout(filename) + + ## Return final metadata object + return metadata diff --git a/surfactant/plugin/manager.py b/surfactant/plugin/manager.py index de492b63..71d861f8 100644 --- a/surfactant/plugin/manager.py +++ b/surfactant/plugin/manager.py @@ -18,7 +18,6 @@ def _register_plugins(pm: pluggy.PluginManager) -> None: a_out_file, coff_file, docker_image, - docker_tarball_file, elf_file, java_file, js_file, @@ -46,7 +45,6 @@ def _register_plugins(pm: pluggy.PluginManager) -> None: a_out_file, coff_file, docker_image, - docker_tarball_file, elf_file, java_file, js_file, From a7886e94bd1decf67302243bc9aa962bed353ff3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 27 Aug 2024 17:47:21 +0000 Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- surfactant/infoextractors/docker_image.py | 39 +++++++++++-------- .../infoextractors/docker_tarball_file.py | 18 +++------ 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/surfactant/infoextractors/docker_image.py b/surfactant/infoextractors/docker_image.py index dbaae2a7..a8bc5cfb 100644 --- a/surfactant/infoextractors/docker_image.py +++ b/surfactant/infoextractors/docker_image.py @@ -5,8 +5,8 @@ import gzip import json import subprocess -import tempfile import tarfile +import tempfile from typing import IO, Any, Union from loguru import logger @@ -14,7 +14,6 @@ ### =============================== ### Utility Predicates ### =============================== - import surfactant.plugin from surfactant.sbomtypes import SBOM, Software @@ -30,35 +29,39 @@ def is_docker_scout_installed(): except FileNotFoundError: return False + def is_oci_archive(filename: str) -> bool: - """Return True if given file is a tarball + """Return True if given file is a tarball roughly matching the OCI specification""" - with tarfile.open(filename) as this_tarfile: # oci-layout only path ensured - return "oci-layout" in this_tarfile.getmembers() + with tarfile.open(filename) as this_tarfile: # oci-layout only path ensured + return "oci-layout" in this_tarfile.getmembers() + def supports_file(filetype: str) -> bool: return filetype in ("DOCKER_TAR", "DOCKER_GZIP") + ### =============================== ### Archive Utilities ### =============================== def gunzip_tarball(filename: str) -> object: - """ Unzip a gzipped tarball to a temporary file - and return the name of the corresponding file. """ + """Unzip a gzipped tarball to a temporary file + and return the name of the corresponding file.""" with open(filename, "rb") as gzip_in: gzip_data = gzip_in.read() with tempfile.NamedTemporaryFile() as gzip_out: gzip_out.write(gzip.decompress(gzip_data)) return gzip_out.name - + + ### =============================== ### Extraction Procedures ### =============================== def extract_info_via_docker_scout(filename: str) -> object: - """ Dispatch to `docker-scout` subprocess, - returning captured SPDX output""" + """Dispatch to `docker-scout` subprocess, + returning captured SPDX output""" result = subprocess.run( ["docker", "scout", "sbom", "--format", "spdx", f"fs://{filename}"], capture_output=True, @@ -70,16 +73,16 @@ def extract_info_via_docker_scout(filename: str) -> object: spdx_out = json.loads(result.stdout) return spdx_out + def extract_configs(filename: str): """Return image configuration objects mapped by their paths.""" + def get_manifest_file_from_tarball(tarball: tarfile.TarFile) -> IO[bytes] | None: return tarball.extractfile( {tarinfo.name: tarinfo for tarinfo in tarball.getmembers()}["manifest.json"] ) - def get_config_file_from_tarball( - tarball: tarfile.TarFile, path: str - ) -> Union[IO[bytes], None]: + def get_config_file_from_tarball(tarball: tarfile.TarFile, path: str) -> Union[IO[bytes], None]: return tarball.extractfile( {tarinfo.name: tarinfo for tarinfo in tarball.getmembers()}[path] ) @@ -92,7 +95,7 @@ def get_config_path_from_manifest(manifest: list[dict[str, Any]]) -> list[str]: def get_repo_tags_from_manifest(manifest: list[dict[str, Any]]) -> list[str]: path = "RepoTags" return [entry[path] for entry in manifest] - + image_configs = [] with tarfile.open(filename) as tarball: # we know the manifest file is present or we wouldn't be this far @@ -104,19 +107,21 @@ def get_repo_tags_from_manifest(manifest: list[dict[str, Any]]) -> list[str]: image_configs.append(config) return image_configs + ### ================================= ### Hook Implementation ### ================================= + @surfactant.plugin.hookimpl def extract_file_info(sbom: SBOM, software: Software, filename: str, filetype: str) -> object: if not supports_file(filetype): return None - + ## Conditionally extract tarball if gzipped filename = gunzip_tarball(filename) if filetype == "DOCKER_GZIP" else filename - - ## Establish metadata object + + ## Establish metadata object metadata = {} ## Extract config files diff --git a/surfactant/infoextractors/docker_tarball_file.py b/surfactant/infoextractors/docker_tarball_file.py index 3ce5efe5..2ffa504f 100644 --- a/surfactant/infoextractors/docker_tarball_file.py +++ b/surfactant/infoextractors/docker_tarball_file.py @@ -3,9 +3,9 @@ # # SPDX-License-Identifier: MIT +import json import tarfile from pathlib import PurePosixPath -import json from typing import IO, Any, Union import surfactant.plugin @@ -18,12 +18,8 @@ def get_manifest_file_from_tarball(tarball: tarfile.TarFile) -> IO[bytes] | None ) -def get_config_file_from_tarball( - tarball: tarfile.TarFile, path: str -) -> Union[IO[bytes], None]: - return tarball.extractfile( - {tarinfo.name: tarinfo for tarinfo in tarball.getmembers()}[path] - ) +def get_config_file_from_tarball(tarball: tarfile.TarFile, path: str) -> Union[IO[bytes], None]: + return tarball.extractfile({tarinfo.name: tarinfo for tarinfo in tarball.getmembers()}[path]) def get_config_path_from_manifest(manifest: list[dict[str, Any]]) -> list[str]: @@ -56,17 +52,13 @@ def supports_file(filename: str, filetype: str) -> bool: return False with tarfile.open(filename) as this_tarfile: - found_members = portable_path_list( - *[member.name for member in this_tarfile.getmembers()] - ) + found_members = portable_path_list(*[member.name for member in this_tarfile.getmembers()]) return all(expected_member in found_members for expected_member in expected_members) @surfactant.plugin.hookimpl -def extract_file_info( - sbom: SBOM, software: Software, filename: str, filetype: str -) -> object: +def extract_file_info(sbom: SBOM, software: Software, filename: str, filetype: str) -> object: if not supports_file(filename, filetype): return None return extract_image_info(filename) From b51f688a1ac62824f618a75c07108f8f9ee605aa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 13 Mar 2025 15:23:55 +0000 Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- surfactant/infoextractors/docker_image.py | 1 - 1 file changed, 1 deletion(-) diff --git a/surfactant/infoextractors/docker_image.py b/surfactant/infoextractors/docker_image.py index 180ade68..4b677be7 100644 --- a/surfactant/infoextractors/docker_image.py +++ b/surfactant/infoextractors/docker_image.py @@ -15,7 +15,6 @@ from surfactant.configmanager import ConfigManager from surfactant.sbomtypes import SBOM, Software - ### =============================== ### Utility Predicates ### ===============================