diff --git a/syft/format/common/spdxhelpers/to_syft_model.go b/syft/format/common/spdxhelpers/to_syft_model.go index e1bf640a7af..224ae7ade13 100644 --- a/syft/format/common/spdxhelpers/to_syft_model.go +++ b/syft/format/common/spdxhelpers/to_syft_model.go @@ -6,6 +6,7 @@ import ( "fmt" "net/url" "path" + "path/filepath" "regexp" "strconv" "strings" @@ -50,6 +51,8 @@ func ToSyftModel(doc *spdx.Document) (*sbom.SBOM, error) { collectSyftFiles(s, spdxIDMap, doc) + populatePackageLocationsFromRelationships(s, spdxIDMap, doc) + s.Relationships = toSyftRelationships(spdxIDMap, doc) return s, nil @@ -691,3 +694,119 @@ func packageIDsToSkip(doc *spdx.Document) *strset.Set { } return skipIDs } + +func populatePackageLocationsFromRelationships(s *sbom.SBOM, spdxIDMap map[string]any, doc *spdx.Document) { + for _, r := range doc.Relationships { + if !isValidRelationshipForLocationPopulation(r, doc) { + continue + } + + fromPkg, toLocation, ok := extractPackageToLocationMapping(r, spdxIDMap) + if !ok { + continue + } + + if !isLocationEvidenceRelationship(r, toLocation) { + continue + } + + addLocationToPackage(s, fromPkg, toLocation) + } +} + +func isValidRelationshipForLocationPopulation(r *spdx.Relationship, doc *spdx.Document) bool { + if r == nil { + return false + } + if r.RefA.DocumentRefID != "" && requireAndTrimPrefix(r.RefA.DocumentRefID, "DocumentRef-") != string(doc.SPDXIdentifier) { + log.Debugf("ignoring relationship to external document for location population: %+v", r) + return false + } + return true +} + +func extractPackageToLocationMapping(r *spdx.Relationship, spdxIDMap map[string]any) (pkg.Package, file.Location, bool) { + fromObj := spdxIDMap[string(r.RefA.ElementRefID)] + toObj := spdxIDMap[string(r.RefB.ElementRefID)] + + fromPkg, fromOk := fromObj.(pkg.Package) + toLocation, toOk := toObj.(file.Location) + return fromPkg, toLocation, fromOk && toOk +} + +func isLocationEvidenceRelationship(r *spdx.Relationship, toLocation file.Location) bool { + switch helpers.RelationshipType(r.Relationship) { + case helpers.OtherRelationship: + return strings.Contains(r.RelationshipComment, "evident-by:") + case helpers.ContainsRelationship: + // Only treat specific file types as package discovery evidence + return isPackageDiscoveryEvidence(toLocation.RealPath) + } + return false +} + +// isPackageDiscoveryEvidence determines if a file represents evidence of where +// a package was discovered (e.g., package manager metadata files) rather than +// just a file owned by the package (e.g., license files, binaries). +func isPackageDiscoveryEvidence(filePath string) bool { + if filePath == "" { + return true // Default to true for empty paths + } + + fileName := filepath.Base(filePath) + upperFileName := strings.ToUpper(fileName) + + // License and documentation directories - should be excluded + directoryPatterns := []string{ + "/share/licenses/", "share/licenses/", "/usr/share/licenses/", "usr/share/licenses/", + "/share/doc/", "share/doc/", "/usr/share/doc/", "usr/share/doc/", + "/share/man/", "share/man/", "/usr/share/man/", "usr/share/man/", + } + + for _, pattern := range directoryPatterns { + if strings.Contains(filePath, pattern) { + return false + } + } + + // Common license/documentation file name patterns - should be excluded + fileNamePatterns := []string{ + "COPYING", "LICENSE", "LICENCE", "COPYRIGHT", + "README", "CHANGELOG", "HISTORY", "NEWS", + } + + for _, pattern := range fileNamePatterns { + upperPattern := strings.ToUpper(pattern) + if upperFileName == upperPattern || + strings.HasPrefix(upperFileName, upperPattern+".") { + return false + } + } + + // For all other files, assume they could be package discovery evidence + // This maintains backward compatibility while filtering out obvious non-evidence files + return true +} + +func addLocationToPackage(s *sbom.SBOM, fromPkg pkg.Package, toLocation file.Location) { + existingPkg := s.Artifacts.Packages.Package(fromPkg.ID()) + if existingPkg == nil { + log.Debugf("unable to find package %s in collection when populating locations", fromPkg.ID()) + return + } + + locations := existingPkg.Locations.ToSlice() + for _, existing := range locations { + if existing.RealPath == toLocation.RealPath && existing.FileSystemID == toLocation.FileSystemID { + return + } + } + + locations = append(locations, toLocation) + updatedPkg := *existingPkg + updatedPkg.Locations = file.NewLocationSet(locations...) + s.Artifacts.Packages.Delete(existingPkg.ID()) + s.Artifacts.Packages.Add(updatedPkg) + + log.Debugf("added location %s to package %s from SPDX relationship", toLocation.RealPath, fromPkg.Name) +} diff --git a/syft/format/common/spdxhelpers/to_syft_model_test.go b/syft/format/common/spdxhelpers/to_syft_model_test.go index e07bcca5bac..f5574e22249 100644 --- a/syft/format/common/spdxhelpers/to_syft_model_test.go +++ b/syft/format/common/spdxhelpers/to_syft_model_test.go @@ -795,3 +795,494 @@ func Test_skipsPackagesWithGeneratedFromRelationship(t *testing.T) { assert.NotNil(t, s.Artifacts.Packages.Package("1")) assert.Nil(t, s.Artifacts.Packages.Package("1-src")) } + +func Test_populatePackageLocationsFromRelationships(t *testing.T) { + tests := []struct { + name string + doc *spdx.Document + expectedPkgLocs map[string][]string // package ID -> expected location paths + }{ + { + name: "syft-generated SBOM with evident-by relationships", + doc: &spdx.Document{ + SPDXVersion: "SPDX-2.3", + SPDXIdentifier: "DOCUMENT", + Packages: []*spdx.Package{ + { + PackageName: "test-package", + PackageSPDXIdentifier: "package-1", + PackageVersion: "1.0.0", + }, + }, + Files: []*spdx.File{ + { + FileName: "/app/package.json", + FileSPDXIdentifier: "file-1", + }, + { + FileName: "/app/manifest.txt", + FileSPDXIdentifier: "file-2", + }, + }, + Relationships: []*spdx.Relationship{ + { + RefA: common.DocElementID{ + ElementRefID: "package-1", + }, + RefB: common.DocElementID{ + ElementRefID: "file-1", + }, + Relationship: spdx.RelationshipOther, + RelationshipComment: "evident-by: indicates the package's existence is evident by the given file", + }, + { + RefA: common.DocElementID{ + ElementRefID: "package-1", + }, + RefB: common.DocElementID{ + ElementRefID: "file-2", + }, + Relationship: spdx.RelationshipOther, + RelationshipComment: "evident-by: indicates the package's existence is evident by the given file", + }, + }, + }, + expectedPkgLocs: map[string][]string{ + "package-1": {"/app/package.json", "/app/manifest.txt"}, + }, + }, + { + name: "standard SPDX SBOM with CONTAINS relationships", + doc: &spdx.Document{ + SPDXVersion: "SPDX-2.3", + SPDXIdentifier: "DOCUMENT", + Packages: []*spdx.Package{ + { + PackageName: "standard-package", + PackageSPDXIdentifier: "package-2", + PackageVersion: "2.0.0", + }, + }, + Files: []*spdx.File{ + { + FileName: "/usr/bin/app", + FileSPDXIdentifier: "file-3", + }, + }, + Relationships: []*spdx.Relationship{ + { + RefA: common.DocElementID{ + ElementRefID: "package-2", + }, + RefB: common.DocElementID{ + ElementRefID: "file-3", + }, + Relationship: spdx.RelationshipContains, + }, + }, + }, + expectedPkgLocs: map[string][]string{ + "package-2": {"/usr/bin/app"}, + }, + }, + { + name: "mixed relationships - only location evidence should be processed", + doc: &spdx.Document{ + SPDXVersion: "SPDX-2.3", + SPDXIdentifier: "DOCUMENT", + Packages: []*spdx.Package{ + { + PackageName: "mixed-package", + PackageSPDXIdentifier: "package-3", + PackageVersion: "3.0.0", + }, + }, + Files: []*spdx.File{ + { + FileName: "/opt/config.conf", + FileSPDXIdentifier: "file-4", + }, + { + FileName: "/opt/readme.txt", + FileSPDXIdentifier: "file-5", + }, + }, + Relationships: []*spdx.Relationship{ + { + RefA: common.DocElementID{ + ElementRefID: "package-3", + }, + RefB: common.DocElementID{ + ElementRefID: "file-4", + }, + Relationship: spdx.RelationshipContains, + }, + { + RefA: common.DocElementID{ + ElementRefID: "package-3", + }, + RefB: common.DocElementID{ + ElementRefID: "file-5", + }, + Relationship: spdx.RelationshipOther, + RelationshipComment: "some other relationship comment", + }, + }, + }, + expectedPkgLocs: map[string][]string{ + "package-3": {"/opt/config.conf"}, // only the CONTAINS relationship should add location + }, + }, + { + name: "no location relationships", + doc: &spdx.Document{ + SPDXVersion: "SPDX-2.3", + SPDXIdentifier: "DOCUMENT", + Packages: []*spdx.Package{ + { + PackageName: "no-loc-package", + PackageSPDXIdentifier: "package-4", + PackageVersion: "4.0.0", + }, + }, + Files: []*spdx.File{ + { + FileName: "/var/log/app.log", + FileSPDXIdentifier: "file-6", + }, + }, + Relationships: []*spdx.Relationship{ + { + RefA: common.DocElementID{ + ElementRefID: "package-4", + }, + RefB: common.DocElementID{ + ElementRefID: "file-6", + }, + Relationship: spdx.RelationshipOther, + RelationshipComment: "unrelated comment", + }, + }, + }, + expectedPkgLocs: map[string][]string{ + "package-4": {}, // no locations should be added + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Convert the SPDX document to Syft model + result, err := ToSyftModel(tt.doc) + require.NoError(t, err) + require.NotNil(t, result) + + // Check that packages have the expected locations + for pkgID, expectedPaths := range tt.expectedPkgLocs { + pkg := result.Artifacts.Packages.Package(artifact.ID(pkgID)) + require.NotNil(t, pkg, "package %s should exist", pkgID) + + actualPaths := make([]string, 0) + for _, loc := range pkg.Locations.ToSlice() { + actualPaths = append(actualPaths, loc.RealPath) + } + + if len(expectedPaths) == 0 { + assert.Empty(t, actualPaths, "package %s should have no locations", pkgID) + } else { + assert.ElementsMatch(t, expectedPaths, actualPaths, + "package %s should have expected locations", pkgID) + } + } + }) + } +} + +func Test_populatePackageLocationsFromRelationships_duplicateLocations(t *testing.T) { + doc := &spdx.Document{ + SPDXVersion: "SPDX-2.3", + SPDXIdentifier: "DOCUMENT", + Packages: []*spdx.Package{ + { + PackageName: "dup-test-package", + PackageSPDXIdentifier: "package-dup", + PackageVersion: "1.0.0", + }, + }, + Files: []*spdx.File{ + { + FileName: "/same/file.txt", + FileSPDXIdentifier: "file-dup-1", + }, + { + FileName: "/same/file.txt", + FileSPDXIdentifier: "file-dup-2", + }, + }, + Relationships: []*spdx.Relationship{ + { + RefA: common.DocElementID{ + ElementRefID: "package-dup", + }, + RefB: common.DocElementID{ + ElementRefID: "file-dup-1", + }, + Relationship: spdx.RelationshipContains, + }, + { + RefA: common.DocElementID{ + ElementRefID: "package-dup", + }, + RefB: common.DocElementID{ + ElementRefID: "file-dup-2", + }, + Relationship: spdx.RelationshipContains, + }, + }, + } + + result, err := ToSyftModel(doc) + require.NoError(t, err) + require.NotNil(t, result) + + pkg := result.Artifacts.Packages.Package(artifact.ID("package-dup")) + require.NotNil(t, pkg) + + // Should only have one location despite two relationships pointing to the same file path + locations := pkg.Locations.ToSlice() + assert.Len(t, locations, 1) + assert.Equal(t, "/same/file.txt", locations[0].RealPath) +} + +func Test_licenseFileFiltering(t *testing.T) { + tests := []struct { + name string + files []struct{ id, path string } + expectedIncluded []string // files that should be included as package locations + expectedExcluded []string // files that should be excluded from package locations + }{ + { + name: "license files are excluded from package locations", + files: []struct{ id, path string }{ + {"file-license-1", "usr/share/licenses/ncurses-base/COPYING"}, + {"file-license-2", "/usr/share/licenses/glibc/LICENSE"}, + {"file-license-3", "/share/licenses/openssl/COPYRIGHT"}, + {"file-binary-1", "/usr/bin/ncurses-app"}, + {"file-config-1", "/etc/ncurses.conf"}, + }, + expectedIncluded: []string{"/usr/bin/ncurses-app", "/etc/ncurses.conf"}, + expectedExcluded: []string{ + "usr/share/licenses/ncurses-base/COPYING", + "/usr/share/licenses/glibc/LICENSE", + "/share/licenses/openssl/COPYRIGHT", + }, + }, + { + name: "documentation files are excluded from package locations", + files: []struct{ id, path string }{ + {"file-doc-1", "/usr/share/doc/package/README"}, + {"file-doc-2", "/share/doc/package/CHANGELOG"}, + {"file-doc-3", "/usr/share/man/man1/package.1"}, + {"file-lib-1", "/usr/lib/package/libpackage.so"}, + {"file-bin-1", "/usr/bin/package"}, + }, + expectedIncluded: []string{"/usr/lib/package/libpackage.so", "/usr/bin/package"}, + expectedExcluded: []string{ + "/usr/share/doc/package/README", + "/share/doc/package/CHANGELOG", + "/usr/share/man/man1/package.1", + }, + }, + { + name: "common license filename patterns are excluded", + files: []struct{ id, path string }{ + {"file-license-1", "/some/path/COPYING"}, + {"file-license-2", "/another/path/LICENSE.txt"}, + {"file-license-3", "/third/path/LICENCE"}, + {"file-license-4", "/fourth/path/COPYRIGHT"}, + {"file-readme-1", "/some/path/README.md"}, + {"file-changelog-1", "/some/path/CHANGELOG"}, + {"file-legitimate-1", "/usr/bin/app"}, + {"file-legitimate-2", "/etc/config.json"}, + }, + expectedIncluded: []string{"/usr/bin/app", "/etc/config.json"}, + expectedExcluded: []string{ + "/some/path/COPYING", + "/another/path/LICENSE.txt", + "/third/path/LICENCE", + "/fourth/path/COPYRIGHT", + "/some/path/README.md", + "/some/path/CHANGELOG", + }, + }, + { + name: "case insensitive matching for license filenames", + files: []struct{ id, path string }{ + {"file-license-1", "/path/copying"}, + {"file-license-2", "/path/license"}, + {"file-license-3", "/path/readme"}, + {"file-legitimate-1", "/usr/bin/copying-tool"}, // should be included + {"file-legitimate-2", "/usr/lib/licensed-lib.so"}, // should be included + }, + expectedIncluded: []string{"/usr/bin/copying-tool", "/usr/lib/licensed-lib.so"}, + expectedExcluded: []string{"/path/copying", "/path/license", "/path/readme"}, + }, + { + name: "mixed file types with comprehensive coverage", + files: []struct{ id, path string }{ + // License files - should be excluded + {"file-license-1", "usr/share/licenses/pkg/COPYING"}, + {"file-license-2", "/usr/share/licenses/pkg/LICENSE"}, + // Documentation - should be excluded + {"file-doc-1", "/usr/share/doc/pkg/README"}, + {"file-doc-2", "/share/man/man1/pkg.1"}, + // Legitimate package files - should be included + {"file-bin-1", "/usr/bin/pkg"}, + {"file-lib-1", "/usr/lib/pkg/libpkg.so"}, + {"file-config-1", "/etc/pkg/config.conf"}, + {"file-data-1", "/var/lib/pkg/data.db"}, + }, + expectedIncluded: []string{ + "/usr/bin/pkg", + "/usr/lib/pkg/libpkg.so", + "/etc/pkg/config.conf", + "/var/lib/pkg/data.db", + }, + expectedExcluded: []string{ + "usr/share/licenses/pkg/COPYING", + "/usr/share/licenses/pkg/LICENSE", + "/usr/share/doc/pkg/README", + "/share/man/man1/pkg.1", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Build SPDX document with the test files + var spdxFiles []*spdx.File + var relationships []*spdx.Relationship + + for _, f := range tt.files { + spdxFiles = append(spdxFiles, &spdx.File{ + FileName: f.path, + FileSPDXIdentifier: common.ElementID(f.id), + }) + + // Create CONTAINS relationship from package to file + relationships = append(relationships, &spdx.Relationship{ + RefA: common.DocElementID{ + ElementRefID: common.ElementID("test-package"), + }, + RefB: common.DocElementID{ + ElementRefID: common.ElementID(f.id), + }, + Relationship: spdx.RelationshipContains, + }) + } + + doc := &spdx.Document{ + SPDXVersion: "SPDX-2.3", + SPDXIdentifier: "DOCUMENT", + Packages: []*spdx.Package{ + { + PackageName: "test-package", + PackageSPDXIdentifier: common.ElementID("test-package"), + PackageVersion: "1.0.0", + }, + }, + Files: spdxFiles, + Relationships: relationships, + } + + // Convert to Syft model + result, err := ToSyftModel(doc) + require.NoError(t, err) + require.NotNil(t, result) + + // Get the test package + pkg := result.Artifacts.Packages.Package(artifact.ID("test-package")) + require.NotNil(t, pkg, "test package should exist") + + // Extract actual location paths + actualPaths := make([]string, 0) + for _, loc := range pkg.Locations.ToSlice() { + actualPaths = append(actualPaths, loc.RealPath) + } + + // Verify that expected files are included + for _, expectedPath := range tt.expectedIncluded { + assert.Contains(t, actualPaths, expectedPath, + "Expected file %s to be included in package locations", expectedPath) + } + + // Verify that excluded files are NOT included + for _, excludedPath := range tt.expectedExcluded { + assert.NotContains(t, actualPaths, excludedPath, + "Expected file %s to be excluded from package locations", excludedPath) + } + + // Verify the total count is correct + assert.Len(t, actualPaths, len(tt.expectedIncluded), + "Package should have exactly %d locations, got %d: %v", + len(tt.expectedIncluded), len(actualPaths), actualPaths) + }) + } +} + +func Test_isPackageDiscoveryEvidence(t *testing.T) { + tests := []struct { + name string + filePath string + expected bool + }{ + // License file paths - should be excluded (false) + {"license file in usr/share/licenses", "usr/share/licenses/pkg/COPYING", false}, + {"license file in share/licenses", "share/licenses/pkg/LICENSE", false}, + {"nested license file", "usr/share/licenses/subpkg/COPYRIGHT", false}, + + // Documentation paths - should be excluded (false) + {"doc file in usr/share/doc", "usr/share/doc/pkg/README", false}, + {"doc file in share/doc", "share/doc/pkg/INSTALL", false}, + {"man page", "usr/share/man/man1/pkg.1", false}, + {"man page in share/man", "share/man/man8/pkg.8", false}, + + // License filenames - should be excluded (false) + {"COPYING file", "some/path/COPYING", false}, + {"LICENSE file", "another/path/LICENSE", false}, + {"LICENCE file", "third/path/LICENCE", false}, + {"COPYRIGHT file", "fourth/path/COPYRIGHT", false}, + {"README file", "some/path/README", false}, + {"CHANGELOG file", "some/path/CHANGELOG", false}, + {"HISTORY file", "some/path/HISTORY", false}, + {"NEWS file", "some/path/NEWS", false}, + + // Case insensitive matching + {"lowercase copying", "path/copying", false}, + {"lowercase license", "path/license", false}, + {"lowercase readme", "path/readme", false}, + + // Legitimate package files - should be included (true) + {"binary executable", "usr/bin/pkg", true}, + {"library file", "usr/lib/pkg/libpkg.so", true}, + {"config file", "etc/pkg/config.conf", true}, + {"data file", "var/lib/pkg/data.db", true}, + {"script file", "usr/share/pkg/scripts/install.sh", true}, + + // Edge cases - files with license-like names but in valid contexts + {"file with copying in name", "usr/bin/copying-tool", true}, + {"file with license in name", "usr/lib/licensed-lib.so", true}, + {"file with readme in name", "usr/bin/readme-viewer", true}, + + // Empty or unusual paths + {"empty path", "", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := isPackageDiscoveryEvidence(tt.filePath) + assert.Equal(t, tt.expected, result, + "isPackageDiscoveryEvidence(%s) should return %v", tt.filePath, tt.expected) + }) + } +}