diff --git a/sbom-utility-scripts/scripts/base-images-sbom-script/app/base_images_sbom_script.py b/sbom-utility-scripts/scripts/base-images-sbom-script/app/base_images_sbom_script.py index 0b75d978..da747f93 100644 --- a/sbom-utility-scripts/scripts/base-images-sbom-script/app/base_images_sbom_script.py +++ b/sbom-utility-scripts/scripts/base-images-sbom-script/app/base_images_sbom_script.py @@ -89,6 +89,14 @@ def get_base_images_sbom_components(base_images_digests, is_last_from_scratch): return components +def detect_sbom_format(sbom): + if sbom.get("bomFormat") == "CycloneDX": + return "cyclonedx" + elif sbom.get("spdxVersion"): + return "spdx" + else: + raise ValueError("Unknown SBOM format") + def parse_args(): parser = argparse.ArgumentParser( description="Updates the sbom file with base images data based on the provided files" @@ -160,7 +168,7 @@ def main(): sbom = json.load(f) base_images_sbom_components = get_base_images_sbom_components(base_images_digests, is_last_from_scratch) - if args.sbom_type == "cyclonedx": + if detect_sbom_format(sbom) == "cyclonedx": if "formulation" in sbom: sbom["formulation"].append({"components": base_images_sbom_components}) else: @@ -171,24 +179,25 @@ def main(): packages = [] relationships = [] - # Try to calculate middle element represeting the container image or directory, which was + # Try to calculate root package represeting the container image or directory, which was # used to build the SBOM, based on the relationships maps. # SPDX has relationsship ROOT-ID DESCRIBES MIDDLE-ID which express the fact the SBOM documents # describes container image or directory represented by MIDDLE-ID package. - middle_element1 = None + root_package1 = None for r, contains in map1.items(): - # middle element is the one which contains another elements and is in relationship with - # the root element where it stand as relatedSpdxElement + # root package is the one which contains another elements and is in relationship with + # the document element where it stand as relatedSpdxElement if contains and inverse_map1.get(r) == root_element1: - middle_element1 = r - # If not middle element is found then create one with ID "Uknown" as source for the SBOM + root_package1 = r + # If not root package is found then create one with ID "Uknown" as source for the SBOM # is not known. - if not middle_element1: - middle_element1 = "SPDXRef-DocumentRoot-Unknown-" + if not root_package1: + root_package1 = "SPDXRef-DocumentRoot-Unknown-" packages.append( { "SPDXID": "SPDXRef-DocumentRoot-Unknown-", "name": "", + "downloadLocation": "NOASSERTION", } ) relationships.append( @@ -210,6 +219,7 @@ def main(): { "SPDXID": SPDXID, "name": component["name"], + "downloadLocation": "NOASSERTION", # See more info about external refs here: # https://spdx.github.io/spdx-spec/v2.3/package-information/#7211-description "externalRefs": [ @@ -223,7 +233,7 @@ def main(): # as json string "annotations": [ { - "annotator": "konflux:jsonencoded", + "annotator": "Tool:konflux:jsonencoded", "annotationDate": annotation_date, "annotationType": "OTHER", "comment": json.dumps( @@ -240,7 +250,7 @@ def main(): relationships.append( { "spdxElementId": SPDXID, - "relatedSpdxElement": middle_element1, + "relatedSpdxElement": root_package1, "relationshipType": "BUILD_TOOL_OF", } ) diff --git a/sbom-utility-scripts/scripts/base-images-sbom-script/app/test_base_images_sbom_script.py b/sbom-utility-scripts/scripts/base-images-sbom-script/app/test_base_images_sbom_script.py index c3c6807e..8a22f2b5 100644 --- a/sbom-utility-scripts/scripts/base-images-sbom-script/app/test_base_images_sbom_script.py +++ b/sbom-utility-scripts/scripts/base-images-sbom-script/app/test_base_images_sbom_script.py @@ -450,6 +450,7 @@ def test_main_input_sbom_does_not_contain_formulation(tmp_path, mocker): # minimal input sbom file sbom_file.write_text( """{ + "bomFormat": "CycloneDX", "project_name": "MyProject", "version": "1.0", "components": [] @@ -532,8 +533,9 @@ def test_main_input_sbom_spdx_minimal(tmp_path, mocker, isodate): sbom_file.write_text( """{ "SPDXID": "SPDXRef-Document", - "project_name": "MyProject", - "version": "1.0", + "spdxVersion": "SPDX-2.3", + "name": "MyProject", + "documentNamespace": "http://example.com/uid-1234", "packages": [], "relationships": [] }""" @@ -562,12 +564,14 @@ def test_main_input_sbom_spdx_minimal(tmp_path, mocker, isodate): "packages": [ { "SPDXID": "SPDXRef-DocumentRoot-Unknown-", + "downloadLocation": "NOASSERTION", "name": "", }, { "SPDXID": "SPDXRef-container-quay.io/mkosiarc_rhtap/single-container-app-" "9520a72cbb69edfca5cac88ea2a9e0e09142ec934952b9420d686e77765f002c", "name": "quay.io/mkosiarc_rhtap/single-container-app", + "downloadLocation": "NOASSERTION", "externalRefs": [ { "referenceType": "purl", @@ -579,7 +583,7 @@ def test_main_input_sbom_spdx_minimal(tmp_path, mocker, isodate): ], "annotations": [ { - "annotator": "konflux:jsonencoded", + "annotator": "Tool:konflux:jsonencoded", "annotationDate": "2021-07-01T00:00:00Z", "annotationType": "OTHER", "comment": '{"name":"konflux:container:is_builder_image:for_stage","value":"0"}', @@ -590,6 +594,7 @@ def test_main_input_sbom_spdx_minimal(tmp_path, mocker, isodate): "name": "registry.access.redhat.com/ubi8/ubi", "SPDXID": "SPDXRef-container-registry.access.redhat.com/ubi8/ubi-" "0f22256f634f8205fbd9c438c387ccf2d4859250e04104571c93fdb89a62bae1", + "downloadLocation": "NOASSERTION", "externalRefs": [ { "referenceCategory": "PACKAGE-MANAGER", @@ -601,7 +606,7 @@ def test_main_input_sbom_spdx_minimal(tmp_path, mocker, isodate): ], "annotations": [ { - "annotator": "konflux:jsonencoded", + "annotator": "Tool:konflux:jsonencoded", "annotationDate": "2021-07-01T00:00:00Z", "annotationType": "OTHER", "comment": '{"name":"konflux:container:is_base_image","value":"true"}', @@ -646,6 +651,7 @@ def test_main_input_sbom_does_not_contain_formulation_and_base_image_from_scratc sbom_file.write_text( """{ "project_name": "MyProject", + "bomFormat": "CycloneDX", "version": "1.0", "components": [] }""" @@ -720,6 +726,7 @@ def test_main_input_sbom_contains_formulation(tmp_path, mocker): sbom_file.write_text( """ { + "bomFormat": "CycloneDX", "project_name": "MyProject", "version": "1.0", "components": [], diff --git a/sbom-utility-scripts/scripts/merge-cachi2-sboms-script/merge_cachi2_sboms.py b/sbom-utility-scripts/scripts/merge-cachi2-sboms-script/merge_cachi2_sboms.py index b1e2885d..f81f62e1 100644 --- a/sbom-utility-scripts/scripts/merge-cachi2-sboms-script/merge_cachi2_sboms.py +++ b/sbom-utility-scripts/scripts/merge-cachi2-sboms-script/merge_cachi2_sboms.py @@ -6,6 +6,15 @@ from packageurl import PackageURL +def detect_sbom_format(sbom): + if sbom.get("bomFormat") == "CycloneDX": + return "cyclonedx" + elif sbom.get("spdxVersion"): + return "spdx" + else: + raise ValueError("Unknown SBOM format") + + def _is_syft_local_golang_component(component: dict) -> bool: """ Check if a Syft Golang reported component is a local replacement. @@ -401,15 +410,16 @@ def map_relationships(relationships): break return parent_element, relations_map, relations_inverse_map - def calculate_middle_element(root_element, map, inverse_map): - """Calculate middle element of the relationship. - Middle element is considered as element which is related to root element and is not root element. + def calculate_root_package(root_element, map, inverse_map): + """Calculate root package from relationship map. + Root package is considered as package which contains other packages and + is described by the document itself. """ - middle_element = None + root_package = None for r, contains in map.items(): if contains and inverse_map.get(r) == root_element: - middle_element = r - return middle_element + root_package = r + return root_package relationships = [] @@ -417,15 +427,15 @@ def calculate_middle_element(root_element, map, inverse_map): root_element2, map2, inverse_map2 = map_relationships(relationships2) package_ids = [package["SPDXID"] for package in packages] - middle_element1 = calculate_middle_element(root_element1, map1, inverse_map1) - middle_element2 = calculate_middle_element(root_element2, map2, inverse_map2) + root_package1 = calculate_root_package(root_element1, map1, inverse_map1) + root_package2 = calculate_root_package(root_element2, map2, inverse_map2) for relation in relationships2: _relation = relation.copy() # If relations is Root decribes middle element, skip it if ( - _relation["relatedSpdxElement"] == middle_element2 + _relation["relatedSpdxElement"] == root_package2 and _relation["spdxElementId"] == root_element2 and _relation["relationshipType"] == "DESCRIBES" ): @@ -436,10 +446,10 @@ def calculate_middle_element(root_element, map, inverse_map): _relation["spdxElementId"] = root_element1 elif relation["relatedSpdxElement"] == root_element2: _relation["relatedSpdxElement"] = root_element1 - if _relation["spdxElementId"] == middle_element2: - _relation["spdxElementId"] = middle_element1 - if _relation["relatedSpdxElement"] == middle_element2: - _relation["relatedSpdxElement"] = middle_element1 + if _relation["spdxElementId"] == root_package2: + _relation["spdxElementId"] = root_package1 + if _relation["relatedSpdxElement"] == root_package2: + _relation["relatedSpdxElement"] = root_package1 # include only relations to packages which exists in merged packages. if _relation["relatedSpdxElement"] in package_ids: @@ -482,7 +492,7 @@ def get_package_key(pkg): return filtered_packages + cachi2_sbom["packages"] -def merge_sboms(cachi2_sbom_path: str, syft_sbom_path: str, format: str = "cyclonedx") -> str: +def merge_sboms(cachi2_sbom_path: str, syft_sbom_path: str) -> str: """Merge Cachi2 components into the Syft SBOM while removing duplicates.""" with open(cachi2_sbom_path) as file: cachi2_sbom = json.load(file) @@ -490,7 +500,12 @@ def merge_sboms(cachi2_sbom_path: str, syft_sbom_path: str, format: str = "cyclo with open(syft_sbom_path) as file: syft_sbom = json.load(file) - if format == "cyclonedx": + format1 = detect_sbom_format(cachi2_sbom) + format2 = detect_sbom_format(syft_sbom) + if format1 != format2: + raise ValueError("SBOMs are in different formats") + + if format1 == "cyclonedx": syft_sbom["components"] = merge_components(syft_sbom, cachi2_sbom) _merge_tools_metadata(syft_sbom, cachi2_sbom) else: @@ -527,6 +542,6 @@ def merge_sboms(cachi2_sbom_path: str, syft_sbom_path: str, format: str = "cyclo args = parser.parse_args() - merged_sbom = merge_sboms(args.cachi2_sbom_path, args.syft_sbom_path, format=args.sbom_format) + merged_sbom = merge_sboms(args.cachi2_sbom_path, args.syft_sbom_path) print(merged_sbom) diff --git a/sbom-utility-scripts/scripts/merge-cachi2-sboms-script/test_merge_cachi2_sboms.py b/sbom-utility-scripts/scripts/merge-cachi2-sboms-script/test_merge_cachi2_sboms.py index 88139855..3de069c7 100644 --- a/sbom-utility-scripts/scripts/merge-cachi2-sboms-script/test_merge_cachi2_sboms.py +++ b/sbom-utility-scripts/scripts/merge-cachi2-sboms-script/test_merge_cachi2_sboms.py @@ -54,7 +54,7 @@ def isodate() -> Generator: def test_merge_sboms_spdx(data_dir: Path, isodate: Generator) -> None: - result = merge_sboms(f"{data_dir}/cachi2.bom.spdx.json", f"{data_dir}/syft.bom.spdx.json", format="spdx") + result = merge_sboms(f"{data_dir}/cachi2.bom.spdx.json", f"{data_dir}/syft.bom.spdx.json") with open(f"{data_dir}/merged.bom.spdx.json") as file: expected_sbom = json.load(file) @@ -66,9 +66,7 @@ def test_merge_both_formats_equal(data_dir: Path, isodate: Generator) -> None: """Test that the merge result is the same for both formats.""" result_cdx = json.loads(merge_sboms(f"{data_dir}/cachi2.bom.json", f"{data_dir}/syft.bom.json")) - result_spdx = json.loads( - merge_sboms(f"{data_dir}/cachi2.bom.spdx.json", f"{data_dir}/syft.bom.spdx.json", format="spdx") - ) + result_spdx = json.loads(merge_sboms(f"{data_dir}/cachi2.bom.spdx.json", f"{data_dir}/syft.bom.spdx.json")) cdx_components = [] for component in result_cdx["components"]: cdx_components.append( diff --git a/sbom-utility-scripts/scripts/merge_syft_sboms_spdx.py b/sbom-utility-scripts/scripts/merge_syft_sboms_spdx.py deleted file mode 100644 index 31ada1e3..00000000 --- a/sbom-utility-scripts/scripts/merge_syft_sboms_spdx.py +++ /dev/null @@ -1,166 +0,0 @@ -import json - -class _ANY: - def __eq__(self, other): - return True - - def __hash__(self): - return hash("Any") - - -ANY = _ANY() - - -def merge_annotations(annotations1, annotations2): - annotation_tuples = [] - for annotation in annotations1: - annotation_tuples.append( - ( - annotation["annotator"], - annotation["comment"], - annotation["annotationDate"], - annotation["annotationType"], - ) - ) - for annotation in annotations2: - annotation_tuples.append( - ( - annotation["annotator"], - annotation["comment"], - annotation["annotationDate"], - annotation["annotationType"], - ) - ) - annotations = set(annotation_tuples) - return [ - { - "annotator": annotation[0], - "comment": annotation[1], - "annotationDate": annotation[2], - "annotationType": annotation[3], - } - for annotation in annotations - ] - -def merge_relationships(relationships1, relationships2, packages): - def map_relationships(relationships): - relations_map = {} - relations_inverse_map = {} - - for relation in relationships: - relations_map.setdefault(relation["spdxElementId"], []).append(relation["relatedSpdxElement"]) - relations_inverse_map[relation["relatedSpdxElement"]] = relation["spdxElementId"] - - for parent_element in relations_map.keys(): - if parent_element not in relations_inverse_map: - break - return parent_element, relations_map, relations_inverse_map - - relationships = [] - - root_element1, map1, inverse_map1 = map_relationships(relationships1) - root_element2, map2, inverse_map2 = map_relationships(relationships2) - package_ids = [package["SPDXID"] for package in packages] - for r, contains in map2.items(): - if contains and inverse_map2.get(r) == root_element2: - middle_element2 = r - for r, contains in map1.items(): - if contains and inverse_map1.get(r) == root_element1: - middle_element1 = r - - for relation in relationships2: - _relation = { - "spdxElementId": relation["spdxElementId"], - "relatedSpdxElement": relation["relatedSpdxElement"], - "relationshipType": relation["relationshipType"], - } - if _relation["spdxElementId"] == root_element2: - _relation["spdxElementId"] = root_element1 - elif relation["relatedSpdxElement"] == root_element2: - _relation["relatedSpdxElement"] = root_element1 - - if _relation["relatedSpdxElement"] in package_ids: - relationships.append(_relation) - elif _relation["spdxElementId"] in package_ids: - relationships.append(_relation) - - for relation in relationships1: - _relation = { - "spdxElementId": relation["spdxElementId"], - "relatedSpdxElement": relation["relatedSpdxElement"], - "relationshipType": relation["relationshipType"], - } - if _relation["relatedSpdxElement"] == middle_element1: - continue - if _relation["spdxElementId"] == middle_element1: - _relation["spdxElementId"] = middle_element2 - if relation["relatedSpdxElement"] in package_ids: - relationships.append(_relation) - return relationships - - -def merge_packages(sbom1: dict, sbom2: dict) -> dict: - """Merge SBOM packages from two SBOMs.""" - - package_map1 = {(p["name"], p.get("versionInfo", ANY)): p for p in cachi2_sbom["packages"]} - - packages2 = [] - for p in sbom2.get("packages", []): - if (p["name"], p.get("versionInfo", ANY)) in list(package_map1.keys()): - try: - package1 = package_map1[(p["name"], p.get("versionInfo"))] - except KeyError: - package1 = package_map1[(p["name"], ANY)] - package1["externalRefs"] = sorted( - merge_external_refs(package1.get("externalRefs", []), p.get("externalRefs", [])), - key=lambda x: ( - x["referenceCategory"], - x["referenceType"], - x["referenceLocator"], - ), - ) - package1["annotations"] = merge_annotations(package1.get("annotations", []), p.get("annotations", [])) - else: - packages2.append(p) - - return packages2 + sbom1['packages'] - -def merge_metadata(sbom1: dict[Any, Any], sbom2: dict[Any, Any]) -> None: - """Merge the content of tools in the metadata section of the SBOM. - """ - creators = sbom2["creationInfo"]["creators"] - - for creator in creators: - sbom1["creationInfo"]["creators"].append(creator) - - -# load SBOMs -with open("./sbom-image.json") as f: - image_sbom = json.load(f) - -with open("./sbom-source.json") as f: - source_sbom = json.load(f) - -packages = merge_packages(image_sbom, source_sbom) -relationships = merge_relationships(image_sbom.get("relationships", []), - source_sbom.get("relationships", []), - packages) - -packages_in_relationships = [] -for relation in relationships: - packages_in_relationships.append(relation["spdxElementId"]) - packages_in_relationships.append(relation["relatedSpdxElement"]) -filtered_packages = [] - -# Remove packages which don't have any relationships -for package in packages: - if package["SPDXID"] in packages_in_relationships: - filtered_packages.append(package) - -merge_metadata(image_sbom, source_sbom) -image_sbom["packages"] = filtered_packages -image_sbom["relationships"] = relationships - -# write the CycloneDX unified SBOM -with open("./sbom-spdx.json", "w") as f: - json.dump(image_sbom, f, indent=4)