Skip to content

Commit e2e1b4c

Browse files
committed
- Added :jsonencoded suffix to coverted spdx properties
- Formated test spdx files - Improved merging spdx files - Added test to compared merged cdx and spdx outputs
1 parent b43582e commit e2e1b4c

File tree

10 files changed

+1423
-911
lines changed

10 files changed

+1423
-911
lines changed

sbom-utility-scripts/scripts/base-images-sbom-script/app/base_images_sbom_script.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -171,13 +171,18 @@ def main():
171171
packages = []
172172
relationships = []
173173

174-
# Try to calculate middle element based on the relationships maps.
175-
# SPDX has usually root element which contains a wrapper element which then contains
176-
# all of the other elements
174+
# Try to calculate middle element represeting the container image or directory, which was
175+
# used to build the SBOM, based on the relationships maps.
176+
# SPDX has relationsship ROOT-ID DESCRIBES MIDDLE-ID which express the fact the SBOM documents
177+
# describes container image or directory represented by MIDDLE-ID package.
177178
middle_element1 = None
178179
for r, contains in map1.items():
180+
# middle element is the one which contains another elements and is in relationship with
181+
# the root element where it stand as relatedSpdxElement
179182
if contains and inverse_map1.get(r) == root_element1:
180183
middle_element1 = r
184+
# If not middle element is found then create one with ID "Uknown" as source for the SBOM
185+
# is not known.
181186
if not middle_element1:
182187
middle_element1 = "SPDXRef-DocumentRoot-Unknown-"
183188
packages.append(
@@ -218,7 +223,7 @@ def main():
218223
# as json string
219224
"annotations": [
220225
{
221-
"annotator": "konflux",
226+
"annotator": "konflux:jsonencoded",
222227
"annotationDate": annotation_date,
223228
"annotationType": "OTHER",
224229
"comment": json.dumps(
@@ -239,7 +244,9 @@ def main():
239244
"relationshipType": "BUILD_TOOL_OF",
240245
}
241246
)
247+
# merge newly created packages for build tools with existing packages
242248
sbom["packages"] = sbom.get("packages", []) + packages
249+
# merge newly created relationships of the build tools with existing relationships
243250
sbom["relationships"] = sbom.get("relationships", []) + relationships
244251

245252
with args.sbom.open("w") as f:

sbom-utility-scripts/scripts/base-images-sbom-script/app/test_base_images_sbom_script.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ def test_main_input_sbom_spdx_minimal(tmp_path, mocker, isodate):
579579
],
580580
"annotations": [
581581
{
582-
"annotator": "konflux",
582+
"annotator": "konflux:jsonencoded",
583583
"annotationDate": "2021-07-01T00:00:00Z",
584584
"annotationType": "OTHER",
585585
"comment": '{"name":"konflux:container:is_builder_image:for_stage","value":"0"}',
@@ -601,7 +601,7 @@ def test_main_input_sbom_spdx_minimal(tmp_path, mocker, isodate):
601601
],
602602
"annotations": [
603603
{
604-
"annotator": "konflux",
604+
"annotator": "konflux:jsonencoded",
605605
"annotationDate": "2021-07-01T00:00:00Z",
606606
"annotationType": "OTHER",
607607
"comment": '{"name":"konflux:container:is_base_image","value":"true"}',

sbom-utility-scripts/scripts/base-images-sbom-script/app/tox.ini

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ env_list = flake8,black,test
44
[testenv:test]
55
deps = -r requirements-test.txt
66
-r requirements.txt
7-
commands = pytest test_base_images_sbom_script.py
7+
commands = pytest -vv test_base_images_sbom_script.py
88

99
[testenv:flake8]
1010
deps = flake8

sbom-utility-scripts/scripts/merge-cachi2-sboms-script/merge_cachi2_sboms.py

+84-51
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,7 @@
33
from argparse import ArgumentParser
44
from typing import Any, Callable
55
from urllib.parse import quote_plus, urlsplit
6-
7-
8-
class _ANY:
9-
def __eq__(self, other):
10-
return True
11-
12-
def __hash__(self):
13-
return hash("Any")
14-
15-
16-
ANY = _ANY()
6+
from packageurl import PackageURL
177

188

199
def _is_syft_local_golang_component(component: dict) -> bool:
@@ -109,8 +99,14 @@ def _unique_key_cachi2_spdx(package: dict) -> list[str]:
10999
keys = []
110100
for ref in package.get("externalRefs", []):
111101
if ref["referenceType"] == "purl":
112-
url = urlsplit(ref["referenceLocator"])
113-
keys.append(url.scheme + ":" + url.path)
102+
parsed_purl = PackageURL.from_string(ref["referenceLocator"])
103+
name = parsed_purl.type + "/" + (parsed_purl.namespace or "") + "/" + parsed_purl.name
104+
version = parsed_purl.version or ""
105+
if parsed_purl.type == "pypi":
106+
name = name.lower()
107+
if parsed_purl.type == "golang":
108+
version = quote_plus(version)
109+
keys.append(name + "@" + version)
114110
return keys
115111

116112

@@ -144,33 +140,38 @@ def _unique_key_syft(component: dict) -> str:
144140

145141
def _unique_keys_syft_spdx(package: dict) -> str:
146142
"""
147-
Create a unique key for Syft reported components.
143+
Create a unique keys for Syft reported components.
148144
149-
This is done by taking a lowercase namespace/name, and URL encoding the version.
145+
This is done in following way:
146+
- If package doesn't have purl, return [<name>@<versionInfo>] as unique keys
147+
- If package has purl(s), take each purl and parse it and take type/namespace/name and version of it
148+
-- If package is pypy, convert type/namespace/name to loweracse
149+
-- If package is golang, encode version
150+
- Append this final key to list of unique keys
150151
151152
Syft does not set any qualifier for NPM, Pip or Golang, so there's no need to remove them
152153
as done in _unique_key_cachi2.
153-
154-
If a Syft component lacks a purl (e.g. type OS), we'll use its name and version instead.
155154
"""
156155
for ref in package.get("externalRefs", []):
157156
if ref["referenceType"] == "purl":
158157
break
159158
else:
160-
return package.get("name", "") + "@" + package.get("versionInfo", "")
159+
return [package.get("name", "") + "@" + package.get("versionInfo", "")]
161160

162161
keys = []
163162

164163
for ref in package.get("externalRefs", []):
165164
if ref["referenceType"] == "purl":
166165
purl = ref["referenceLocator"]
167-
if "@" in purl:
168-
name, version = purl.split("@")
166+
parsed_purl = PackageURL.from_string(purl)
167+
if parsed_purl.version:
168+
version = parsed_purl.version
169+
name = (parsed_purl.type + "/" + (parsed_purl.namespace or "") + "/" + parsed_purl.name).lower()
169170

170-
if name.startswith("pkg:pypi"):
171+
if parsed_purl.type == "pypi":
171172
name = name.lower()
172173

173-
if name.startswith("pkg:golang"):
174+
if parsed_purl.type == "golang":
174175
version = quote_plus(version)
175176
keys.append(f"{name}@{version}")
176177
else:
@@ -309,7 +310,15 @@ def merge_external_refs(refs1, refs2):
309310
ref_tuples = []
310311
unique_refs2 = []
311312

312-
for ref in refs1:
313+
for _ref in refs1:
314+
ref = _ref.copy()
315+
if ref["referenceType"].lower() == "purl":
316+
parsed_purl = PackageURL.from_string(ref["referenceLocator"])
317+
purl_dict = parsed_purl.to_dict()
318+
purl_dict["qualifiers"] = {}
319+
parsed_purl = PackageURL(**purl_dict)
320+
ref["referenceLocator"] = parsed_purl.to_string()
321+
313322
ref_tuples.append(
314323
(
315324
ref["referenceCategory"].lower(),
@@ -318,7 +327,14 @@ def merge_external_refs(refs1, refs2):
318327
)
319328
)
320329

321-
for ref in refs2:
330+
for _ref in refs2:
331+
ref = _ref.copy()
332+
if ref["referenceType"].lower() == "purl":
333+
parsed_purl = PackageURL.from_string(ref["referenceLocator"])
334+
purl_dict = parsed_purl.to_dict()
335+
purl_dict["qualifiers"] = {}
336+
parsed_purl = PackageURL(**purl_dict)
337+
ref["referenceLocator"] = parsed_purl.to_string()
322338
if (
323339
ref["referenceCategory"].lower(),
324340
ref["referenceType"].lower(),
@@ -365,6 +381,14 @@ def merge_relationships(relationships1, relationships2, packages):
365381
"""Merge SPDX relationships."""
366382

367383
def map_relationships(relationships):
384+
"""Map relationships of spdx element.
385+
Method returns triplet containing root element, map of relations and inverse map of relations.
386+
Root element is considered as element which is not listed as related document
387+
in any of the relationships. Relationship map is dict of {key: value} where key is spdx
388+
element and list of related elements is the value.
389+
Inverse map is dict of {key: value} where key is related spdx element in the relation ship
390+
and value is spdx element.
391+
"""
368392
relations_map = {}
369393
relations_inverse_map = {}
370394

@@ -377,44 +401,54 @@ def map_relationships(relationships):
377401
break
378402
return parent_element, relations_map, relations_inverse_map
379403

404+
def calculate_middle_element(root_element, map, inverse_map):
405+
"""Calculate middle element of the relationship.
406+
Middle element is considered as element which is related to root element and is not root element.
407+
"""
408+
middle_element = None
409+
for r, contains in map.items():
410+
if contains and inverse_map.get(r) == root_element:
411+
middle_element = r
412+
return middle_element
413+
380414
relationships = []
381415

382416
root_element1, map1, inverse_map1 = map_relationships(relationships1)
383417
root_element2, map2, inverse_map2 = map_relationships(relationships2)
384418
package_ids = [package["SPDXID"] for package in packages]
385-
for r, contains in map2.items():
386-
if contains and inverse_map2.get(r) == root_element2:
387-
middle_element2 = r
388-
for r, contains in map1.items():
389-
if contains and inverse_map1.get(r) == root_element1:
390-
middle_element1 = r
419+
420+
middle_element1 = calculate_middle_element(root_element1, map1, inverse_map1)
421+
middle_element2 = calculate_middle_element(root_element2, map2, inverse_map2)
391422

392423
for relation in relationships2:
393-
_relation = {
394-
"spdxElementId": relation["spdxElementId"],
395-
"relatedSpdxElement": relation["relatedSpdxElement"],
396-
"relationshipType": relation["relationshipType"],
397-
}
424+
_relation = relation.copy()
425+
426+
# If relations is Root decribes middle element, skip it
427+
if (
428+
_relation["relatedSpdxElement"] == middle_element2
429+
and _relation["spdxElementId"] == root_element2
430+
and _relation["relationshipType"] == "DESCRIBES"
431+
):
432+
continue
433+
# if spdxElementId is root_element2, replace it with root_element1
434+
# if not and relatedSpdxElement is root_element2, replace it with root_element1
398435
if _relation["spdxElementId"] == root_element2:
399436
_relation["spdxElementId"] = root_element1
400437
elif relation["relatedSpdxElement"] == root_element2:
401438
_relation["relatedSpdxElement"] = root_element1
439+
if _relation["spdxElementId"] == middle_element2:
440+
_relation["spdxElementId"] = middle_element1
441+
if _relation["relatedSpdxElement"] == middle_element2:
442+
_relation["relatedSpdxElement"] = middle_element1
402443

444+
# include only relations to packages which exists in merged packages.
403445
if _relation["relatedSpdxElement"] in package_ids:
404446
relationships.append(_relation)
405447
elif _relation["spdxElementId"] in package_ids:
406448
relationships.append(_relation)
407449

408450
for relation in relationships1:
409-
_relation = {
410-
"spdxElementId": relation["spdxElementId"],
411-
"relatedSpdxElement": relation["relatedSpdxElement"],
412-
"relationshipType": relation["relationshipType"],
413-
}
414-
if _relation["relatedSpdxElement"] == middle_element1:
415-
continue
416-
if _relation["spdxElementId"] == middle_element1:
417-
_relation["spdxElementId"] = middle_element2
451+
_relation = relation.copy()
418452
if relation["relatedSpdxElement"] in package_ids:
419453
relationships.append(_relation)
420454
return relationships
@@ -423,17 +457,17 @@ def map_relationships(relationships):
423457
def merge_packages(syft_sbom: dict, cachi2_sbom: dict) -> dict:
424458
"""Merge Cachi2 packages into the Syft SBOM while removing duplicates."""
425459

460+
def get_package_key(pkg):
461+
return json.dumps(sorted(set(_unique_keys_syft_spdx(pkg))), separators=(",", ":"))
462+
426463
is_duplicate_package = _get_syft_package_filter(cachi2_sbom["packages"])
427-
cachi2_packages_map = {(p["name"], p.get("versionInfo", ANY)): p for p in cachi2_sbom["packages"]}
464+
cachi2_packages_map = {get_package_key(p): p for p in cachi2_sbom["packages"]}
428465

429466
filtered_packages = []
430467
for p in syft_sbom.get("packages", []):
431468
if is_duplicate_package(p):
432-
if (p["name"], p.get("versionInfo", ANY)) in list(cachi2_packages_map.keys()):
433-
try:
434-
cpackage = cachi2_packages_map[(p["name"], p.get("versionInfo"))]
435-
except KeyError:
436-
cpackage = cachi2_packages_map[(p["name"], ANY)]
469+
if get_package_key(p) in cachi2_packages_map:
470+
cpackage = cachi2_packages_map[get_package_key(p)]
437471
cpackage["externalRefs"] = sorted(
438472
merge_external_refs(cpackage.get("externalRefs", []), p.get("externalRefs", [])),
439473
key=lambda x: (
@@ -445,7 +479,6 @@ def merge_packages(syft_sbom: dict, cachi2_sbom: dict) -> dict:
445479
cpackage["annotations"] = merge_annotations(cpackage.get("annotations", []), p.get("annotations", []))
446480
else:
447481
filtered_packages.append(p)
448-
449482
return filtered_packages + cachi2_sbom["packages"]
450483

451484

Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
pytest
2+
packageurl-python==0.14.0

sbom-utility-scripts/scripts/merge-cachi2-sboms-script/requirements-test.txt

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# This file is autogenerated by pip-compile with Python 3.11
2+
# This file is autogenerated by pip-compile with Python 3.12
33
# by the following command:
44
#
55
# pip-compile --generate-hashes --output-file=requirements-test.txt requirements-test.in
@@ -8,6 +8,10 @@ iniconfig==2.0.0 \
88
--hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \
99
--hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
1010
# via pytest
11+
packageurl-python==0.14.0 \
12+
--hash=sha256:cf5e55cdcd61e6de858f47c4986aa87ba493bfa56ba58de11103dfdc2c00e4e1 \
13+
--hash=sha256:ff09147cddaae9e5c59ffcb12df8ec0e1b774b45099399f28c36b1a3dfdf52e2
14+
# via -r requirements-test.in
1115
packaging==24.1 \
1216
--hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \
1317
--hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124

0 commit comments

Comments
 (0)