3
3
from argparse import ArgumentParser
4
4
from typing import Any , Callable
5
5
from urllib .parse import quote_plus , urlsplit
6
-
7
-
8
- class _ANY :
9
- def __eq__ (self , other ):
10
- return True
11
-
12
- def __hash__ (self ):
13
- return hash ("Any" )
14
-
15
-
16
- ANY = _ANY ()
6
+ from packageurl import PackageURL
17
7
18
8
19
9
def _is_syft_local_golang_component (component : dict ) -> bool :
@@ -109,8 +99,14 @@ def _unique_key_cachi2_spdx(package: dict) -> list[str]:
109
99
keys = []
110
100
for ref in package .get ("externalRefs" , []):
111
101
if ref ["referenceType" ] == "purl" :
112
- url = urlsplit (ref ["referenceLocator" ])
113
- keys .append (url .scheme + ":" + url .path )
102
+ parsed_purl = PackageURL .from_string (ref ["referenceLocator" ])
103
+ name = parsed_purl .type + "/" + (parsed_purl .namespace or "" ) + "/" + parsed_purl .name
104
+ version = parsed_purl .version or ""
105
+ if parsed_purl .type == "pypi" :
106
+ name = name .lower ()
107
+ if parsed_purl .type == "golang" :
108
+ version = quote_plus (version )
109
+ keys .append (name + "@" + version )
114
110
return keys
115
111
116
112
@@ -144,33 +140,38 @@ def _unique_key_syft(component: dict) -> str:
144
140
145
141
def _unique_keys_syft_spdx (package : dict ) -> str :
146
142
"""
147
- Create a unique key for Syft reported components.
143
+ Create a unique keys for Syft reported components.
148
144
149
- This is done by taking a lowercase namespace/name, and URL encoding the version.
145
+ This is done in following way:
146
+ - If package doesn't have purl, return [<name>@<versionInfo>] as unique keys
147
+ - If package has purl(s), take each purl and parse it and take type/namespace/name and version of it
148
+ -- If package is pypy, convert type/namespace/name to loweracse
149
+ -- If package is golang, encode version
150
+ - Append this final key to list of unique keys
150
151
151
152
Syft does not set any qualifier for NPM, Pip or Golang, so there's no need to remove them
152
153
as done in _unique_key_cachi2.
153
-
154
- If a Syft component lacks a purl (e.g. type OS), we'll use its name and version instead.
155
154
"""
156
155
for ref in package .get ("externalRefs" , []):
157
156
if ref ["referenceType" ] == "purl" :
158
157
break
159
158
else :
160
- return package .get ("name" , "" ) + "@" + package .get ("versionInfo" , "" )
159
+ return [ package .get ("name" , "" ) + "@" + package .get ("versionInfo" , "" )]
161
160
162
161
keys = []
163
162
164
163
for ref in package .get ("externalRefs" , []):
165
164
if ref ["referenceType" ] == "purl" :
166
165
purl = ref ["referenceLocator" ]
167
- if "@" in purl :
168
- name , version = purl .split ("@" )
166
+ parsed_purl = PackageURL .from_string (purl )
167
+ if parsed_purl .version :
168
+ version = parsed_purl .version
169
+ name = (parsed_purl .type + "/" + (parsed_purl .namespace or "" ) + "/" + parsed_purl .name ).lower ()
169
170
170
- if name . startswith ( "pkg: pypi") :
171
+ if parsed_purl . type == " pypi" :
171
172
name = name .lower ()
172
173
173
- if name . startswith ( "pkg: golang") :
174
+ if parsed_purl . type == " golang" :
174
175
version = quote_plus (version )
175
176
keys .append (f"{ name } @{ version } " )
176
177
else :
@@ -309,7 +310,15 @@ def merge_external_refs(refs1, refs2):
309
310
ref_tuples = []
310
311
unique_refs2 = []
311
312
312
- for ref in refs1 :
313
+ for _ref in refs1 :
314
+ ref = _ref .copy ()
315
+ if ref ["referenceType" ].lower () == "purl" :
316
+ parsed_purl = PackageURL .from_string (ref ["referenceLocator" ])
317
+ purl_dict = parsed_purl .to_dict ()
318
+ purl_dict ["qualifiers" ] = {}
319
+ parsed_purl = PackageURL (** purl_dict )
320
+ ref ["referenceLocator" ] = parsed_purl .to_string ()
321
+
313
322
ref_tuples .append (
314
323
(
315
324
ref ["referenceCategory" ].lower (),
@@ -318,7 +327,14 @@ def merge_external_refs(refs1, refs2):
318
327
)
319
328
)
320
329
321
- for ref in refs2 :
330
+ for _ref in refs2 :
331
+ ref = _ref .copy ()
332
+ if ref ["referenceType" ].lower () == "purl" :
333
+ parsed_purl = PackageURL .from_string (ref ["referenceLocator" ])
334
+ purl_dict = parsed_purl .to_dict ()
335
+ purl_dict ["qualifiers" ] = {}
336
+ parsed_purl = PackageURL (** purl_dict )
337
+ ref ["referenceLocator" ] = parsed_purl .to_string ()
322
338
if (
323
339
ref ["referenceCategory" ].lower (),
324
340
ref ["referenceType" ].lower (),
@@ -365,6 +381,14 @@ def merge_relationships(relationships1, relationships2, packages):
365
381
"""Merge SPDX relationships."""
366
382
367
383
def map_relationships (relationships ):
384
+ """Map relationships of spdx element.
385
+ Method returns triplet containing root element, map of relations and inverse map of relations.
386
+ Root element is considered as element which is not listed as related document
387
+ in any of the relationships. Relationship map is dict of {key: value} where key is spdx
388
+ element and list of related elements is the value.
389
+ Inverse map is dict of {key: value} where key is related spdx element in the relation ship
390
+ and value is spdx element.
391
+ """
368
392
relations_map = {}
369
393
relations_inverse_map = {}
370
394
@@ -377,44 +401,54 @@ def map_relationships(relationships):
377
401
break
378
402
return parent_element , relations_map , relations_inverse_map
379
403
404
+ def calculate_middle_element (root_element , map , inverse_map ):
405
+ """Calculate middle element of the relationship.
406
+ Middle element is considered as element which is related to root element and is not root element.
407
+ """
408
+ middle_element = None
409
+ for r , contains in map .items ():
410
+ if contains and inverse_map .get (r ) == root_element :
411
+ middle_element = r
412
+ return middle_element
413
+
380
414
relationships = []
381
415
382
416
root_element1 , map1 , inverse_map1 = map_relationships (relationships1 )
383
417
root_element2 , map2 , inverse_map2 = map_relationships (relationships2 )
384
418
package_ids = [package ["SPDXID" ] for package in packages ]
385
- for r , contains in map2 .items ():
386
- if contains and inverse_map2 .get (r ) == root_element2 :
387
- middle_element2 = r
388
- for r , contains in map1 .items ():
389
- if contains and inverse_map1 .get (r ) == root_element1 :
390
- middle_element1 = r
419
+
420
+ middle_element1 = calculate_middle_element (root_element1 , map1 , inverse_map1 )
421
+ middle_element2 = calculate_middle_element (root_element2 , map2 , inverse_map2 )
391
422
392
423
for relation in relationships2 :
393
- _relation = {
394
- "spdxElementId" : relation ["spdxElementId" ],
395
- "relatedSpdxElement" : relation ["relatedSpdxElement" ],
396
- "relationshipType" : relation ["relationshipType" ],
397
- }
424
+ _relation = relation .copy ()
425
+
426
+ # If relations is Root decribes middle element, skip it
427
+ if (
428
+ _relation ["relatedSpdxElement" ] == middle_element2
429
+ and _relation ["spdxElementId" ] == root_element2
430
+ and _relation ["relationshipType" ] == "DESCRIBES"
431
+ ):
432
+ continue
433
+ # if spdxElementId is root_element2, replace it with root_element1
434
+ # if not and relatedSpdxElement is root_element2, replace it with root_element1
398
435
if _relation ["spdxElementId" ] == root_element2 :
399
436
_relation ["spdxElementId" ] = root_element1
400
437
elif relation ["relatedSpdxElement" ] == root_element2 :
401
438
_relation ["relatedSpdxElement" ] = root_element1
439
+ if _relation ["spdxElementId" ] == middle_element2 :
440
+ _relation ["spdxElementId" ] = middle_element1
441
+ if _relation ["relatedSpdxElement" ] == middle_element2 :
442
+ _relation ["relatedSpdxElement" ] = middle_element1
402
443
444
+ # include only relations to packages which exists in merged packages.
403
445
if _relation ["relatedSpdxElement" ] in package_ids :
404
446
relationships .append (_relation )
405
447
elif _relation ["spdxElementId" ] in package_ids :
406
448
relationships .append (_relation )
407
449
408
450
for relation in relationships1 :
409
- _relation = {
410
- "spdxElementId" : relation ["spdxElementId" ],
411
- "relatedSpdxElement" : relation ["relatedSpdxElement" ],
412
- "relationshipType" : relation ["relationshipType" ],
413
- }
414
- if _relation ["relatedSpdxElement" ] == middle_element1 :
415
- continue
416
- if _relation ["spdxElementId" ] == middle_element1 :
417
- _relation ["spdxElementId" ] = middle_element2
451
+ _relation = relation .copy ()
418
452
if relation ["relatedSpdxElement" ] in package_ids :
419
453
relationships .append (_relation )
420
454
return relationships
@@ -423,17 +457,17 @@ def map_relationships(relationships):
423
457
def merge_packages (syft_sbom : dict , cachi2_sbom : dict ) -> dict :
424
458
"""Merge Cachi2 packages into the Syft SBOM while removing duplicates."""
425
459
460
+ def get_package_key (pkg ):
461
+ return json .dumps (sorted (set (_unique_keys_syft_spdx (pkg ))), separators = ("," , ":" ))
462
+
426
463
is_duplicate_package = _get_syft_package_filter (cachi2_sbom ["packages" ])
427
- cachi2_packages_map = {( p [ "name" ], p . get ( "versionInfo" , ANY ) ): p for p in cachi2_sbom ["packages" ]}
464
+ cachi2_packages_map = {get_package_key ( p ): p for p in cachi2_sbom ["packages" ]}
428
465
429
466
filtered_packages = []
430
467
for p in syft_sbom .get ("packages" , []):
431
468
if is_duplicate_package (p ):
432
- if (p ["name" ], p .get ("versionInfo" , ANY )) in list (cachi2_packages_map .keys ()):
433
- try :
434
- cpackage = cachi2_packages_map [(p ["name" ], p .get ("versionInfo" ))]
435
- except KeyError :
436
- cpackage = cachi2_packages_map [(p ["name" ], ANY )]
469
+ if get_package_key (p ) in cachi2_packages_map :
470
+ cpackage = cachi2_packages_map [get_package_key (p )]
437
471
cpackage ["externalRefs" ] = sorted (
438
472
merge_external_refs (cpackage .get ("externalRefs" , []), p .get ("externalRefs" , [])),
439
473
key = lambda x : (
@@ -445,7 +479,6 @@ def merge_packages(syft_sbom: dict, cachi2_sbom: dict) -> dict:
445
479
cpackage ["annotations" ] = merge_annotations (cpackage .get ("annotations" , []), p .get ("annotations" , []))
446
480
else :
447
481
filtered_packages .append (p )
448
-
449
482
return filtered_packages + cachi2_sbom ["packages" ]
450
483
451
484
0 commit comments