diff --git a/cmd/datadog-sbom-generator/__snapshots__/main_test.snap b/cmd/datadog-sbom-generator/__snapshots__/main_test.snap index 35b6d18c..aae84e8b 100644 --- a/cmd/datadog-sbom-generator/__snapshots__/main_test.snap +++ b/cmd/datadog-sbom-generator/__snapshots__/main_test.snap @@ -2641,6 +2641,10 @@ Scanned /fixtures/integration-npm/with-workspace/yarn.lock file and fou "name": "datadog:package-manager", "value": "NPM" }, + { + "name": "datadog:requires-transitive-enrichment", + "value": "true" + }, { "name": "datadog:version-range", "value": "^4.17.21" @@ -2698,6 +2702,10 @@ Scanned /fixtures/integration-package-json/package.json file and found { "name": "datadog:package-manager", "value": "uv" + }, + { + "name": "datadog:requires-transitive-enrichment", + "value": "true" } ], "evidence": { @@ -2708,6 +2716,37 @@ Scanned /fixtures/integration-package-json/package.json file and found ] } }, + { + "bom-ref": "pkg:pypi/numpy", + "type": "library", + "name": "numpy", + "purl": "pkg:pypi/numpy", + "properties": [ + { + "name": "datadog:is-direct", + "value": "true" + }, + { + "name": "datadog:package-manager", + "value": "uv" + }, + { + "name": "datadog:requires-transitive-enrichment", + "value": "true" + }, + { + "name": "datadog:version-range", + "value": "/u003e=1.24" + } + ], + "evidence": { + "occurrences": [ + { + "location": "{/"block/":{/"file_name/":/"pyproject.toml/",/"line_start/":8,/"line_end/":8,/"column_start/":5,/"column_end/":19,/"role/":/"manifest/"},/"name/":{/"file_name/":/"pyproject.toml/",/"line_start/":8,/"line_end/":8,/"column_start/":6,/"column_end/":11,/"role/":/"manifest/"},/"version/":{/"file_name/":/"pyproject.toml/",/"line_start/":8,/"line_end/":8,/"column_start/":11,/"column_end/":17,/"role/":/"manifest/"}}" + } + ] + } + }, { "bom-ref": "pkg:pypi/pytest@8.2.0", "type": "library", @@ -2726,6 +2765,10 @@ Scanned /fixtures/integration-package-json/package.json file and found { "name": "datadog:package-manager", "value": "uv" + }, + { + "name": "datadog:requires-transitive-enrichment", + "value": "true" } ], "evidence": { @@ -2750,6 +2793,10 @@ Scanned /fixtures/integration-package-json/package.json file and found { "name": "datadog:package-manager", "value": "uv" + }, + { + "name": "datadog:requires-transitive-enrichment", + "value": "true" } ], "evidence": { @@ -2767,7 +2814,7 @@ Scanned /fixtures/integration-package-json/package.json file and found [TestRun/Scan_pyproject.toml_without_lock_file - 2] Scanning directory './fixtures/integration-pyproject', resolved absolute path '/fixtures/integration-pyproject' -Scanned /fixtures/integration-pyproject/pyproject.toml file and found 3 packages +Scanned /fixtures/integration-pyproject/pyproject.toml file and found 4 packages [reachability] Reachability analysis is disabled --- diff --git a/pkg/lockfile/fixtures/pyproject-toml-extractor/poetry-pinned/pyproject.toml b/pkg/lockfile/fixtures/pyproject-toml-extractor/poetry-pinned/pyproject.toml index 7780efb0..c68a3d73 100644 --- a/pkg/lockfile/fixtures/pyproject-toml-extractor/poetry-pinned/pyproject.toml +++ b/pkg/lockfile/fixtures/pyproject-toml-extractor/poetry-pinned/pyproject.toml @@ -12,3 +12,4 @@ requests = "==2.28.0" flask = "==2.3.2" boto3 = "1.26.0" numpy = "^1.24" +scipy = "1.*" diff --git a/pkg/lockfile/internal/testutil/helpers.go b/pkg/lockfile/internal/testutil/helpers.go index e8bb91cd..3ade9567 100644 --- a/pkg/lockfile/internal/testutil/helpers.go +++ b/pkg/lockfile/internal/testutil/helpers.go @@ -47,9 +47,10 @@ func PackageToString(pkg lockfile.PackageDetails) string { } return fmt.Sprintf( - "%s@%s {ecosystem=%q targetVersions=%v packageManager=%q commit=%q depGroups=%v exclusions=%v targetFrameworks=%v isDirect=%t blockLocation=%s nameLocation=%s versionLocation=%s}", + "%s@%s {versionRange=%q ecosystem=%q targetVersions=%v packageManager=%q commit=%q depGroups=%v exclusions=%v targetFrameworks=%v isDirect=%t requiresTransitiveEnrichment=%t blockLocation=%s nameLocation=%s versionLocation=%s}", pkg.Name, pkg.Version, + pkg.VersionRange, pkg.Ecosystem, pkg.TargetVersions, pkg.PackageManager, @@ -58,6 +59,7 @@ func PackageToString(pkg lockfile.PackageDetails) string { pkg.Exclusions, pkg.TargetFrameworks, pkg.IsDirect, + pkg.RequiresTransitiveEnrichment, formatLoc(pkg.BlockLocation), formatLocPtr(pkg.NameLocation), formatLocPtr(pkg.VersionLocation), diff --git a/pkg/lockfile/javascript/parse-package-json.go b/pkg/lockfile/javascript/parse-package-json.go index 83345921..c958a687 100644 --- a/pkg/lockfile/javascript/parse-package-json.go +++ b/pkg/lockfile/javascript/parse-package-json.go @@ -268,17 +268,18 @@ func (e PackageJSONExtractor) Extract(f lockfile.DepFile, context lockfile.ScanC } packages[dedupKey] = lockfile.PackageDetails{ - Name: resolvedName, - Version: version, - VersionRange: versionRange, - PackageManager: packageJSONPackageManager, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{section.group}, - BlockLocation: blockLocation, - NameLocation: nameLocation, - VersionLocation: versionLocation, - LocationRole: models.LocationRoleManifest, + Name: resolvedName, + Version: version, + VersionRange: versionRange, + PackageManager: packageJSONPackageManager, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{section.group}, + BlockLocation: blockLocation, + NameLocation: nameLocation, + VersionLocation: versionLocation, + LocationRole: models.LocationRoleManifest, } } } diff --git a/pkg/lockfile/javascript/parse-package-json_test.go b/pkg/lockfile/javascript/parse-package-json_test.go index aa83b0e1..cd024d19 100644 --- a/pkg/lockfile/javascript/parse-package-json_test.go +++ b/pkg/lockfile/javascript/parse-package-json_test.go @@ -103,14 +103,15 @@ func TestParsePackageJSON_Basic(t *testing.T) { testutil.ExpectPackagesWithoutLocations(t, packages, []lockfile.PackageDetails{ { - Name: "lodash", - Version: "", - VersionRange: "^4.17.21", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "lodash", + Version: "", + VersionRange: "^4.17.21", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, }) } @@ -130,33 +131,36 @@ func TestParsePackageJSON_AllDepTypes(t *testing.T) { testutil.ExpectPackagesWithoutLocations(t, packages, []lockfile.PackageDetails{ { - Name: "express", - Version: "", - VersionRange: "^4.18.0", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "express", + Version: "", + VersionRange: "^4.18.0", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, { - Name: "jest", - Version: "", - VersionRange: "~29.7.0", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"dev"}, - LocationRole: models.LocationRoleManifest, + Name: "jest", + Version: "", + VersionRange: "~29.7.0", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"dev"}, + LocationRole: models.LocationRoleManifest, }, { - Name: "fsevents", - Version: "2.3.3", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"optional"}, - LocationRole: models.LocationRoleManifest, + Name: "fsevents", + Version: "2.3.3", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"optional"}, + LocationRole: models.LocationRoleManifest, }, }) } @@ -196,14 +200,15 @@ func TestParsePackageJSON_WithSiblingLockfileButLockfileParsersDisabled(t *testi testutil.ExpectPackagesWithoutLocations(t, packages, []lockfile.PackageDetails{ { - Name: "lodash", - Version: "", - VersionRange: "^4.17.21", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "lodash", + Version: "", + VersionRange: "^4.17.21", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, }) } @@ -255,88 +260,97 @@ func TestParsePackageJSON_ComplexVersions(t *testing.T) { testutil.ExpectPackagesWithoutLocations(t, packages, []lockfile.PackageDetails{ { - Name: "pinned", - Version: "1.0.0", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "pinned", + Version: "1.0.0", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, { - Name: "eq-pinned", - Version: "2.0.0", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "eq-pinned", + Version: "2.0.0", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, { - Name: "caret", - Version: "", - VersionRange: "^2.3.4", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "caret", + Version: "", + VersionRange: "^2.3.4", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, { - Name: "tilde", - Version: "", - VersionRange: "~1.5.3", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "tilde", + Version: "", + VersionRange: "~1.5.3", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, { - Name: "range", - Version: "", - VersionRange: ">=1.0.0 <2.0.0", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "range", + Version: "", + VersionRange: ">=1.0.0 <2.0.0", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, { - Name: "real-pkg", - Version: "3.0.0", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "real-pkg", + Version: "3.0.0", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, { - Name: "local-file", - Version: "", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "local-file", + Version: "", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, { - Name: "url-dep", - Version: "", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "url-dep", + Version: "", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, { - Name: "tag-dep", - Version: "", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "tag-dep", + Version: "", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, }) } @@ -356,22 +370,24 @@ func TestParsePackageJSON_AliasCollision(t *testing.T) { testutil.ExpectPackagesWithoutLocations(t, packages, []lockfile.PackageDetails{ { - Name: "react", - Version: "17.0.2", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "react", + Version: "17.0.2", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, { - Name: "react", - Version: "18.3.1", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "react", + Version: "18.3.1", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, }) } @@ -392,13 +408,14 @@ func TestParsePackageJSON_AliasRangeCollision(t *testing.T) { testutil.ExpectPackagesWithoutLocations(t, packages, []lockfile.PackageDetails{ { - Name: "react", - VersionRange: "^17", - PackageManager: models.NPM, - Ecosystem: models.EcosystemNPM, - IsDirect: true, - DepGroups: []string{"prod"}, - LocationRole: models.LocationRoleManifest, + Name: "react", + VersionRange: "^17", + PackageManager: models.NPM, + Ecosystem: models.EcosystemNPM, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: []string{"prod"}, + LocationRole: models.LocationRoleManifest, }, }) } @@ -424,10 +441,9 @@ func TestParsePackageJSON_WorkspaceWithRootLockfile(t *testing.T) { testutil.ExpectPackagesWithoutLocations(t, packages, []lockfile.PackageDetails{}) } -// TestParsePackageJSON_SetsLocationRoleManifest verifies that every extracted package -// has LocationRole set to LocationRoleManifest. ExpectPackagesWithoutLocations ignores -// this field, so the assertion is made directly here. -func TestParsePackageJSON_SetsLocationRoleManifest(t *testing.T) { +// TestParsePackageJSON_SetsManifestMetadata verifies fields ignored by +// ExpectPackagesWithoutLocations. +func TestParsePackageJSON_SetsManifestMetadata(t *testing.T) { t.Parallel() dir, err := os.Getwd() if err != nil { @@ -449,6 +465,9 @@ func TestParsePackageJSON_SetsLocationRoleManifest(t *testing.T) { t.Errorf("package %s@%s: LocationRole = %q, want %q", pkg.Name, pkg.Version, pkg.LocationRole, models.LocationRoleManifest) } + if !pkg.RequiresTransitiveEnrichment { + t.Errorf("package %s@%s: RequiresTransitiveEnrichment = false, want true", pkg.Name, pkg.Version) + } } } diff --git a/pkg/lockfile/python/parse-pyproject-toml.go b/pkg/lockfile/python/parse-pyproject-toml.go index 473611fd..317efc0d 100644 --- a/pkg/lockfile/python/parse-pyproject-toml.go +++ b/pkg/lockfile/python/parse-pyproject-toml.go @@ -3,9 +3,7 @@ package python import ( "fmt" "io" - "maps" "path/filepath" - "slices" "strings" "github.com/BurntSushi/toml" @@ -147,34 +145,39 @@ func (e PyProjectTOMLExtractor) Extract(f lockfile.DepFile, context lockfile.Sca lines := fileposition.BytesToLines(content) pm := detectPackageManager(&pyproject) - packages := map[string]lockfile.PackageDetails{} + collector := pyprojectPackageCollector{ + packages: map[string]lockfile.PackageDetails{}, + lines: lines, + path: f.Path(), + packageManager: pm, + } for _, dep := range pyproject.Project.Dependencies { - if name, rawName, version, ok := parsePEP508Pin(dep); ok { - block, nameLocation, versionLocation := extractPositions(lines, f.Path(), rawName, version, false) - addOrMergeGroups(packages, name, version, []string{"prod"}, pm, block, nameLocation, versionLocation) + dependency, ok := parsePEP508Dependency(dep) + if ok { + collector.addDependency(dependency, []string{string(models.DepGroupProd)}, false) } } - for group, deps := range pyproject.Project.OptionalDependencies { + for optionalDependencyGroup, deps := range pyproject.Project.OptionalDependencies { for _, dep := range deps { - if name, rawName, version, ok := parsePEP508Pin(dep); ok { - block, nameLocation, versionLocation := extractPositions(lines, f.Path(), rawName, version, false) - addOrMergeGroups(packages, name, version, []string{group}, pm, block, nameLocation, versionLocation) + dependency, ok := parsePEP508Dependency(dep) + if ok { + collector.addDependency(dependency, []string{optionalDependencyGroup}, false) } } } - for group, items := range pyproject.DependencyGroups { - for _, item := range items { + for dependencyGroupName, dependencyGroupItems := range pyproject.DependencyGroups { + for _, item := range dependencyGroupItems { dep, ok := item.(string) if !ok { - // skip {include-group = "..."} table entries + // PEP 735 dependency groups can include table entries such as {include-group = "..."}. continue } - if name, rawName, version, ok := parsePEP508Pin(dep); ok { - block, nameLocation, versionLocation := extractPositions(lines, f.Path(), rawName, version, false) - addOrMergeGroups(packages, name, version, []string{group}, pm, block, nameLocation, versionLocation) + dependency, ok := parsePEP508Dependency(dep) + if ok { + collector.addDependency(dependency, []string{dependencyGroupName}, false) } } } @@ -185,147 +188,160 @@ func (e PyProjectTOMLExtractor) Extract(f lockfile.DepFile, context lockfile.Sca if name == "python" { continue } - if version, ok := parsePoetryPin(val); ok { - normalized := normalizedRequirementName(name) - block, nameLocation, versionLocation := extractPositions(lines, f.Path(), name, version, true) - addOrMergeGroups(packages, normalized, version, []string{"prod"}, pm, block, nameLocation, versionLocation) + if version, versionRange, ok := parsePoetryDependency(val); ok { + collector.addDependency(pep508Dependency{ + Name: normalizedRequirementName(name), + RawName: name, + Version: version, + VersionRange: versionRange, + }, []string{string(models.DepGroupProd)}, true) } } for name, val := range pyproject.Tool.Poetry.DevDependencies { - if version, ok := parsePoetryPin(val); ok { - normalized := normalizedRequirementName(name) - block, nameLocation, versionLocation := extractPositions(lines, f.Path(), name, version, true) - addOrMergeGroups(packages, normalized, version, []string{"dev"}, pm, block, nameLocation, versionLocation) + if version, versionRange, ok := parsePoetryDependency(val); ok { + collector.addDependency(pep508Dependency{ + Name: normalizedRequirementName(name), + RawName: name, + Version: version, + VersionRange: versionRange, + }, []string{string(models.DepGroupDev)}, true) } } - for groupName, group := range pyproject.Tool.Poetry.Group { - for name, val := range group.Dependencies { - if version, ok := parsePoetryPin(val); ok { - normalized := normalizedRequirementName(name) - block, nameLocation, versionLocation := extractPositions(lines, f.Path(), name, version, true) - addOrMergeGroups(packages, normalized, version, []string{groupName}, pm, block, nameLocation, versionLocation) + for poetryGroupName, poetryGroup := range pyproject.Tool.Poetry.Group { + for name, val := range poetryGroup.Dependencies { + if version, versionRange, ok := parsePoetryDependency(val); ok { + collector.addDependency(pep508Dependency{ + Name: normalizedRequirementName(name), + RawName: name, + Version: version, + VersionRange: versionRange, + }, []string{poetryGroupName}, true) } } } } - return slices.Collect(maps.Values(packages)), nil + return sortedPyprojectPackages(collector.packages), nil } -// addOrMergeGroups adds a package to the map, or if it already exists (same name+version), -// merges the new dep groups into the existing entry rather than dropping the duplicate. -func addOrMergeGroups(packages map[string]lockfile.PackageDetails, name, version string, groups []string, pm models.PackageManager, block models.FilePosition, nameLocation, versionLocation *models.FilePosition) { - key := name + "@" + version - if existing, exists := packages[key]; exists { - for _, g := range groups { - if !slices.Contains(existing.DepGroups, g) { - existing.DepGroups = append(existing.DepGroups, g) - } - } - packages[key] = existing - - return - } - packages[key] = lockfile.PackageDetails{ - Name: name, - Version: version, - PackageManager: pm, - Ecosystem: models.EcosystemPyPI, - IsDirect: true, - DepGroups: groups, - BlockLocation: block, - NameLocation: nameLocation, - VersionLocation: versionLocation, - LocationRole: models.LocationRoleManifest, - } +type pep508Dependency struct { + Name string + RawName string + Version string + VersionRange string } -// parsePEP508Pin parses a PEP 508 dependency string and returns the normalized name, the raw -// (pre-normalization) name as written in the file, and the version — only when the dependency -// is an exact pin (==). Returns ok=false for all other specifiers. -func parsePEP508Pin(dep string) (name, rawName, version string, ok bool) { +// parsePEP508Dependency parses a PEP 508 dependency string into a normalized name, +// the raw name as written in the file, and either an exact version or original version range. +func parsePEP508Dependency(dep string) (pep508Dependency, bool) { // strip environment markers (PEP 508) dep, _, _ = strings.Cut(dep, ";") dep = strings.TrimSpace(dep) + if strings.Contains(dep, " @ ") { + return pep508Dependency{}, false + } // strip parenthesized specifier: "requests (==2.28.0)" -> "requests ==2.28.0" dep = strings.NewReplacer("(", "", ")", "").Replace(dep) - // reject if any non-exact operator is present - for _, op := range []string{"===", "!=", ">=", "<=", "~=", ">", "<"} { - if strings.Contains(dep, op) { - return "", "", "", false - } + opIndex, op := findFirstPEP508Specifier(dep) + if opIndex == -1 || op == "===" { + return pep508Dependency{}, false } - rawNamePart, rawVersion, found := strings.Cut(dep, "==") - if !found { - return "", "", "", false + // strip extras: "requests[security]" -> "requests" + fileRawName, _, _ := strings.Cut(strings.TrimSpace(dep[:opIndex]), "[") + fileRawName = strings.TrimSpace(fileRawName) + specifier := strings.TrimSpace(dep[opIndex:]) + + if fileRawName == "" || specifier == "" { + return pep508Dependency{}, false } - // reject multi-constraint specs: "==2.28.0,!=2.28.0" - if strings.Contains(rawVersion, ",") { - return "", "", "", false + if op == "==" { + rawVersion := strings.TrimSpace(specifier[len(op):]) + if rawVersion != "" && !strings.Contains(rawVersion, ",") && isConcreteVersion(rawVersion) { + return pep508Dependency{ + Name: normalizedRequirementName(fileRawName), + RawName: fileRawName, + Version: rawVersion, + }, true + } } - rawVersion = strings.TrimSpace(rawVersion) - // strip extras: "requests[security]" -> "requests" - fileRawName, _, _ := strings.Cut(strings.TrimSpace(rawNamePart), "[") - fileRawName = strings.TrimSpace(fileRawName) + return pep508Dependency{ + Name: normalizedRequirementName(fileRawName), + RawName: fileRawName, + VersionRange: specifier, + }, true +} - if fileRawName == "" || rawVersion == "" || !isConcreteVersion(rawVersion) { - return "", "", "", false +func findFirstPEP508Specifier(dep string) (int, string) { + firstIndex := -1 + firstOp := "" + for _, op := range []string{"===", "==", "!=", ">=", "<=", "~=", ">", "<"} { + index := strings.Index(dep, op) + if index == -1 { + continue + } + if firstIndex == -1 || index < firstIndex { + firstIndex = index + firstOp = op + } } - return normalizedRequirementName(fileRawName), fileRawName, rawVersion, true + return firstIndex, firstOp } -// parsePoetryPin parses a Poetry dependency value (string or inline table) and returns -// the version only when it is an exact pin (== prefix) with a concrete version. -func parsePoetryPin(val any) (version string, ok bool) { +// parsePoetryDependency parses a Poetry dependency value (string or inline table) and +// returns either an exact version or the original version range. +func parsePoetryDependency(val any) (version, versionRange string, ok bool) { var versionStr string switch v := val.(type) { case string: versionStr = v case map[string]any: + for _, directRefKey := range []string{"path", "git", "url"} { + if _, exists := v[directRefKey]; exists { + return "", "", false + } + } versionStr, ok = v["version"].(string) if !ok { - return "", false + return "", "", false } default: - return "", false + return "", "", false } versionStr = strings.TrimSpace(versionStr) + if versionStr == "" || strings.HasPrefix(versionStr, "===") { + return "", "", false + } - // Poetry bare version string "2.28.0" is an implicit exact pin + // Poetry bare version string "2.28.0" is an implicit exact pin. + // Other digit-starting constraints, such as "1.*", are still ranges. if len(versionStr) > 0 && !strings.ContainsAny(string(versionStr[0]), "=!<>~^*") { if strings.Contains(versionStr, ",") { - return "", false + return "", versionStr, true } if isConcreteVersion(versionStr) { - return versionStr, true + return versionStr, "", true } - return "", false - } - - // reject === (arbitrary equality) and any non-== operator - if !strings.HasPrefix(versionStr, "==") || strings.HasPrefix(versionStr, "===") { - return "", false + return "", versionStr, true } - after := strings.TrimSpace(versionStr[2:]) + if strings.HasPrefix(versionStr, "==") { + after := strings.TrimSpace(versionStr[2:]) - // reject multi-constraint: "==2.28.0,!=2.28.1" is not an exact pin - if strings.Contains(after, ",") { - return "", false - } + if !strings.Contains(after, ",") && isConcreteVersion(after) { + return after, "", true + } - if isConcreteVersion(after) { - return after, true + return "", versionStr, true } - return "", false + return "", versionStr, true } // isConcreteVersion returns true if version looks like a fully-specified version diff --git a/pkg/lockfile/python/parse-pyproject-toml_test.go b/pkg/lockfile/python/parse-pyproject-toml_test.go index f37877ad..b1a2e73e 100644 --- a/pkg/lockfile/python/parse-pyproject-toml_test.go +++ b/pkg/lockfile/python/parse-pyproject-toml_test.go @@ -1,8 +1,11 @@ package python_test import ( + "bytes" + "log" "os" "path/filepath" + "strings" "testing" "github.com/DataDog/datadog-sbom-generator/pkg/lockfile" @@ -79,27 +82,41 @@ func TestParsePyProjectTOML_PEP621_PinnedExtracted(t *testing.T) { expectNilErr(t, err) testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 5, 5, 5, 24), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 16, 22), LocationRole: models.LocationRoleManifest, }, { - Name: "flask", Version: "2.3.2", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "flask", Version: "2.3.2", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 6, 6, 5, 20), NameLocation: posPtr(path, 6, 6, 6, 11), VersionLocation: posPtr(path, 6, 6, 13, 18), LocationRole: models.LocationRoleManifest, }, { - Name: "boto3", Version: "1.26.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "numpy", VersionRange: ">=1.24", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 7, 7, 5, 19), NameLocation: posPtr(path, 7, 7, 6, 11), VersionLocation: posPtr(path, 7, 7, 11, 17), LocationRole: models.LocationRoleManifest, + }, + { + Name: "boto3", Version: "1.26.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 9, 9, 5, 21), NameLocation: posPtr(path, 9, 9, 6, 11), VersionLocation: posPtr(path, 9, 9, 13, 19), LocationRole: models.LocationRoleManifest, }, }) } -func TestParsePyProjectTOML_PEP621_UnpinnedSkipped(t *testing.T) { +func TestParsePyProjectTOML_PEP621_RangesExtractedAndUnversionedSkipped(t *testing.T) { t.Parallel() - packages, err := python.ParsePyProjectTOML(fixturePath(t, "../fixtures/pyproject-toml-extractor/no-pinned/pyproject.toml")) + path := fixturePath(t, "../fixtures/pyproject-toml-extractor/no-pinned/pyproject.toml") + packages, err := python.ParsePyProjectTOML(path) expectNilErr(t, err) - testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{}) + testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ + { + Name: "requests", VersionRange: ">=2.28", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 5, 5, 5, 22), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 14, 20), LocationRole: models.LocationRoleManifest, + }, + { + Name: "flask", VersionRange: "~=2.3", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 6, 6, 5, 18), NameLocation: posPtr(path, 6, 6, 6, 11), VersionLocation: posPtr(path, 6, 6, 11, 16), LocationRole: models.LocationRoleManifest, + }, + }) } // ============================================================================ @@ -115,19 +132,19 @@ func TestParsePyProjectTOML_PEP621_OptionalDeps(t *testing.T) { expectNilErr(t, err) testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 5, 5, 5, 24), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 16, 22), LocationRole: models.LocationRoleManifest, }, { - Name: "pytest", Version: "7.4.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"dev"}, + Name: "pytest", Version: "7.4.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"dev"}, BlockLocation: pos(path, 10, 10, 5, 21), NameLocation: posPtr(path, 10, 10, 6, 12), VersionLocation: posPtr(path, 10, 10, 14, 19), LocationRole: models.LocationRoleManifest, }, { - Name: "mypy", Version: "1.5.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"dev"}, + Name: "mypy", Version: "1.5.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"dev"}, BlockLocation: pos(path, 11, 11, 5, 19), NameLocation: posPtr(path, 11, 11, 6, 10), VersionLocation: posPtr(path, 11, 11, 12, 17), LocationRole: models.LocationRoleManifest, }, { - Name: "sphinx", Version: "7.1.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"docs"}, + Name: "sphinx", Version: "7.1.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"docs"}, BlockLocation: pos(path, 14, 14, 5, 21), NameLocation: posPtr(path, 14, 14, 6, 12), VersionLocation: posPtr(path, 14, 14, 14, 19), LocationRole: models.LocationRoleManifest, }, }) @@ -146,19 +163,19 @@ func TestParsePyProjectTOML_DependencyGroups_PinnedExtracted(t *testing.T) { expectNilErr(t, err) testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 5, 5, 5, 24), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 16, 22), LocationRole: models.LocationRoleManifest, }, { - Name: "pytest", Version: "7.4.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"dev"}, + Name: "pytest", Version: "7.4.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"dev"}, BlockLocation: pos(path, 12, 12, 5, 21), NameLocation: posPtr(path, 12, 12, 6, 12), VersionLocation: posPtr(path, 12, 12, 14, 19), LocationRole: models.LocationRoleManifest, }, { - Name: "ruff", Version: "0.1.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"dev"}, + Name: "ruff", Version: "0.1.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"dev"}, BlockLocation: pos(path, 13, 13, 5, 19), NameLocation: posPtr(path, 13, 13, 6, 10), VersionLocation: posPtr(path, 13, 13, 12, 17), LocationRole: models.LocationRoleManifest, }, { - Name: "hypothesis", Version: "6.100.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"test"}, + Name: "hypothesis", Version: "6.100.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"test"}, BlockLocation: pos(path, 17, 17, 5, 27), NameLocation: posPtr(path, 17, 17, 6, 16), VersionLocation: posPtr(path, 17, 17, 18, 25), LocationRole: models.LocationRoleManifest, }, }) @@ -175,20 +192,28 @@ func TestParsePyProjectTOML_Poetry_PinnedExtracted(t *testing.T) { packages, err := python.ParsePyProjectTOML(path) expectNilErr(t, err) - // python version constraint must be skipped; unpinned numpy (^) must be skipped + // python version constraint must be skipped; the numpy range is preserved for reducer resolution testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 11, 11, 1, 22), NameLocation: posPtr(path, 11, 11, 1, 9), VersionLocation: posPtr(path, 11, 11, 13, 21), LocationRole: models.LocationRoleManifest, }, { - Name: "flask", Version: "2.3.2", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "flask", Version: "2.3.2", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 12, 12, 1, 18), NameLocation: posPtr(path, 12, 12, 1, 6), VersionLocation: posPtr(path, 12, 12, 10, 17), LocationRole: models.LocationRoleManifest, }, { - Name: "boto3", Version: "1.26.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "boto3", Version: "1.26.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 13, 13, 1, 17), NameLocation: posPtr(path, 13, 13, 1, 6), VersionLocation: posPtr(path, 13, 13, 10, 16), LocationRole: models.LocationRoleManifest, }, + { + Name: "numpy", VersionRange: "^1.24", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 14, 14, 1, 16), NameLocation: posPtr(path, 14, 14, 1, 6), VersionLocation: posPtr(path, 14, 14, 10, 15), LocationRole: models.LocationRoleManifest, + }, + { + Name: "scipy", VersionRange: "1.*", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 15, 15, 1, 14), NameLocation: posPtr(path, 15, 15, 1, 6), VersionLocation: posPtr(path, 15, 15, 10, 13), LocationRole: models.LocationRoleManifest, + }, }) } @@ -201,19 +226,19 @@ func TestParsePyProjectTOML_Poetry_GroupDeps(t *testing.T) { expectNilErr(t, err) testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 11, 11, 1, 22), NameLocation: posPtr(path, 11, 11, 1, 9), VersionLocation: posPtr(path, 11, 11, 13, 21), LocationRole: models.LocationRoleManifest, }, { - Name: "black", Version: "23.1.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"dev"}, + Name: "black", Version: "23.1.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"dev"}, BlockLocation: pos(path, 14, 14, 1, 19), NameLocation: posPtr(path, 14, 14, 1, 6), VersionLocation: posPtr(path, 14, 14, 10, 18), LocationRole: models.LocationRoleManifest, }, { - Name: "pytest", Version: "7.4.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"test"}, + Name: "pytest", Version: "7.4.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"test"}, BlockLocation: pos(path, 17, 17, 1, 19), NameLocation: posPtr(path, 17, 17, 1, 7), VersionLocation: posPtr(path, 17, 17, 11, 18), LocationRole: models.LocationRoleManifest, }, { - Name: "pytest-cov", Version: "4.1.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"test"}, + Name: "pytest-cov", Version: "4.1.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"test"}, BlockLocation: pos(path, 18, 18, 1, 35), NameLocation: posPtr(path, 18, 18, 1, 11), VersionLocation: posPtr(path, 18, 18, 26, 33), LocationRole: models.LocationRoleManifest, }, }) @@ -254,7 +279,7 @@ func TestParsePyProjectTOML_PEP621_ParenthesizedPin(t *testing.T) { expectNilErr(t, err) testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 5, 5, 5, 27), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 18, 24), LocationRole: models.LocationRoleManifest, }, }) @@ -273,12 +298,68 @@ func TestParsePyProjectTOML_MergesGroupsForDuplicatePackage(t *testing.T) { expectNilErr(t, err) testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod", "dev"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod", "dev"}, BlockLocation: pos(path, 5, 5, 5, 24), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 16, 22), LocationRole: models.LocationRoleManifest, }, }) } +func TestParsePyProjectTOML_ConflictingRangesAreSourceOrderedAndLogged(t *testing.T) { + t.Parallel() + + var logs bytes.Buffer + previousLogOutput := log.Writer() + log.SetOutput(&logs) + defer log.SetOutput(previousLogOutput) + + dir := t.TempDir() + path := filepath.Join(dir, "pyproject.toml") + content := `[project] +name = "my-app" +version = "1.0.0" +dependencies = [ + "requests>=1,<2", + "flask>=2", +] + +[project.optional-dependencies] +dev = [ + "requests>=2,<3", +] +` + if err := os.WriteFile(path, []byte(content), 0o600); err != nil { + t.Fatalf("could not write pyproject fixture: %v", err) + } + + packages, err := python.ParsePyProjectTOML(path) + + expectNilErr(t, err) + testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ + { + Name: "requests", VersionRange: ">=1,<2", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 5, 5, 5, 22), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 14, 20), LocationRole: models.LocationRoleManifest, + }, + { + Name: "flask", VersionRange: ">=2", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 6, 6, 5, 16), NameLocation: posPtr(path, 6, 6, 6, 11), VersionLocation: posPtr(path, 6, 6, 11, 14), LocationRole: models.LocationRoleManifest, + }, + { + Name: "requests", VersionRange: ">=2,<3", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"dev"}, + BlockLocation: pos(path, 11, 11, 5, 22), NameLocation: posPtr(path, 11, 11, 6, 14), VersionLocation: posPtr(path, 11, 11, 14, 20), LocationRole: models.LocationRoleManifest, + }, + }) + + if len(packages) != 3 { + t.Fatalf("expected 3 packages, got %d", len(packages)) + } + if packages[0].Name != "requests" || packages[0].VersionRange != ">=1,<2" { + t.Fatalf("expected first package to be earliest requests range, got %s %q", packages[0].Name, packages[0].VersionRange) + } + if !strings.Contains(logs.String(), `Multiple pyproject version ranges for dependency "requests"`) { + t.Fatalf("expected conflicting range log, got %q", logs.String()) + } +} + // ============================================================================ // Error cases // ============================================================================ diff --git a/pkg/lockfile/python/pyproject_package_collector.go b/pkg/lockfile/python/pyproject_package_collector.go new file mode 100644 index 00000000..e92d1465 --- /dev/null +++ b/pkg/lockfile/python/pyproject_package_collector.go @@ -0,0 +1,169 @@ +package python + +import ( + "cmp" + "log" + "maps" + "math" + "slices" + "strings" + + "github.com/DataDog/datadog-sbom-generator/pkg/lockfile" + "github.com/DataDog/datadog-sbom-generator/pkg/models" +) + +type pyprojectPackageCollector struct { + packages map[string]lockfile.PackageDetails + lines []string + path string + packageManager models.PackageManager +} + +func (c *pyprojectPackageCollector) addDependency(dependency pep508Dependency, groups []string, isPoetry bool) { + if (dependency.Version == "") == (dependency.VersionRange == "") { + log.Printf( + "Skipping pyproject dependency %q from %s: expected exactly one of version or version range, got version=%q versionRange=%q\n", + dependency.Name, + c.path, + dependency.Version, + dependency.VersionRange, + ) + + return + } + + block, nameLocation, versionLocation := extractPositions(c.lines, c.path, dependency.RawName, versionOrRange(dependency.Version, dependency.VersionRange), isPoetry) + c.addOrMergePackageGroups(lockfile.PackageDetails{ + Name: dependency.Name, + Version: dependency.Version, + VersionRange: dependency.VersionRange, + PackageManager: c.packageManager, + Ecosystem: models.EcosystemPyPI, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: groups, + BlockLocation: block, + NameLocation: nameLocation, + VersionLocation: versionLocation, + LocationRole: models.LocationRoleManifest, + }) +} + +// addOrMergePackageGroups adds a package to the map, or if it already exists (same name+version/range), +// merges the new dep groups into the existing entry rather than dropping the duplicate. +func (c *pyprojectPackageCollector) addOrMergePackageGroups(pkg lockfile.PackageDetails) { + key := pyprojectPackageKey(pkg) + if existing, exists := c.packages[key]; exists { + mergeDepGroups(&existing, pkg.DepGroups) + c.packages[key] = existing + + return + } + c.logVersionRangeConflict(pkg) + c.packages[key] = pkg +} + +// pyprojectPackageKey keeps declarations separate unless the package name and +// the exact version or range match, so duplicate declarations can merge groups +// without dropping distinct constraints. +func pyprojectPackageKey(pkg lockfile.PackageDetails) string { + return pkg.Name + "@" + pkg.Version + "|" + pkg.VersionRange +} + +func mergeDepGroups(pkg *lockfile.PackageDetails, groups []string) { + for _, group := range groups { + if !slices.Contains(pkg.DepGroups, group) { + pkg.DepGroups = append(pkg.DepGroups, group) + } + } +} + +// logVersionRangeConflict warns when two ranges for the same package will later +// share an unversioned PURL in CycloneDX output. The packages stay separate here +// so their source locations can still be preserved. +func (c *pyprojectPackageCollector) logVersionRangeConflict(pkg lockfile.PackageDetails) { + if pkg.VersionRange == "" { + return + } + + for _, existing := range c.packages { + if !hasConflictingVersionRange(existing, pkg) { + continue + } + + log.Printf( + "Multiple pyproject version ranges for dependency %q from %s collapse to the same unversioned PURL; CycloneDX output will keep the earliest source declaration. Saw ranges %q and %q\n", + pkg.Name, + c.path, + existing.VersionRange, + pkg.VersionRange, + ) + + return + } +} + +func hasConflictingVersionRange(existing, pkg lockfile.PackageDetails) bool { + return existing.Name == pkg.Name && + existing.VersionRange != "" && + existing.VersionRange != pkg.VersionRange +} + +func sortedPyprojectPackages(packages map[string]lockfile.PackageDetails) []lockfile.PackageDetails { + result := slices.Collect(maps.Values(packages)) + slices.SortFunc(result, comparePyprojectPackageDetails) + + return result +} + +// comparePyprojectPackageDetails sorts by pyproject.toml location first. Entries +// without a parsed location sort last, then package fields break ties. +func comparePyprojectPackageDetails(a, b lockfile.PackageDetails) int { + if c := strings.Compare(a.BlockLocation.Filename, b.BlockLocation.Filename); c != 0 { + return c + } + if c := cmp.Compare(sourceLine(a.BlockLocation), sourceLine(b.BlockLocation)); c != 0 { + return c + } + if c := cmp.Compare(sourceColumn(a.BlockLocation), sourceColumn(b.BlockLocation)); c != 0 { + return c + } + if c := strings.Compare(a.Name, b.Name); c != 0 { + return c + } + if c := strings.Compare(a.Version, b.Version); c != 0 { + return c + } + + return strings.Compare(a.VersionRange, b.VersionRange) +} + +// sourceLine returns a sortable line number. Missing source positions sort last +// so packages with extracted manifest locations preserve source order first. +func sourceLine(location models.FilePosition) int { + if location.Line.Start == 0 { + return math.MaxInt + } + + return location.Line.Start +} + +// sourceColumn returns a sortable column number. Missing source positions sort +// last, then name/version/range comparisons below keep ordering deterministic. +func sourceColumn(location models.FilePosition) int { + if location.Column.Start == 0 { + return math.MaxInt + } + + return location.Column.Start +} + +// versionOrRange returns the manifest value to anchor when extracting source +// positions; each dependency should have exactly one of these set. +func versionOrRange(version, versionRange string) string { + if version != "" { + return version + } + + return versionRange +} diff --git a/pkg/lockfile/types.go b/pkg/lockfile/types.go index b1e4194a..34d1ea24 100644 --- a/pkg/lockfile/types.go +++ b/pkg/lockfile/types.go @@ -5,25 +5,26 @@ import ( ) type PackageDetails struct { - Source models.SourceInfo - Name string `json:"name"` - Version string `json:"version"` - TargetVersions []string `json:"targetVersions,omitempty"` - TargetFrameworks []string `json:"targetFrameworks,omitempty"` - Commit string `json:"commit,omitempty"` - PURL string `json:"purl,omitempty"` - Ecosystem models.Ecosystem `json:"ecosystem,omitempty"` - DepGroups []string `json:"depGroups,omitempty"` - BlockLocation models.FilePosition `json:"blockLocation,omitempty"` - LocationRole string `json:"locationRole,omitempty"` - VersionLocation *models.FilePosition `json:"versionLocation,omitempty"` - NameLocation *models.FilePosition `json:"nameLocation,omitempty"` - PackageManager models.PackageManager `json:"packageManager,omitempty"` - VersionRange string `json:"versionRange,omitempty"` - IsDirect bool `json:"isDirect,omitempty"` - Opaque bool `json:"opaque,omitempty"` - Dependencies []*PackageDetails `json:"dependencies,omitempty"` - Exclusions []string `json:"exclusions,omitempty"` + Source models.SourceInfo + Name string `json:"name"` + Version string `json:"version"` + VersionRange string `json:"versionRange,omitempty"` + TargetVersions []string `json:"targetVersions,omitempty"` + TargetFrameworks []string `json:"targetFrameworks,omitempty"` + Commit string `json:"commit,omitempty"` + PURL string `json:"purl,omitempty"` + Ecosystem models.Ecosystem `json:"ecosystem,omitempty"` + DepGroups []string `json:"depGroups,omitempty"` + BlockLocation models.FilePosition `json:"blockLocation,omitempty"` + LocationRole string `json:"locationRole,omitempty"` + VersionLocation *models.FilePosition `json:"versionLocation,omitempty"` + NameLocation *models.FilePosition `json:"nameLocation,omitempty"` + PackageManager models.PackageManager `json:"packageManager,omitempty"` + IsDirect bool `json:"isDirect,omitempty"` + RequiresTransitiveEnrichment bool `json:"requiresTransitiveEnrichment,omitempty"` + Opaque bool `json:"opaque,omitempty"` + Dependencies []*PackageDetails `json:"dependencies,omitempty"` + Exclusions []string `json:"exclusions,omitempty"` } type Ecosystem string diff --git a/pkg/models/package_metadata.go b/pkg/models/package_metadata.go index ac23bbc7..eff57850 100644 --- a/pkg/models/package_metadata.go +++ b/pkg/models/package_metadata.go @@ -5,12 +5,13 @@ import "fmt" type PackageMetadataType string const ( - PackageManagerMetadata PackageMetadataType = "package-manager" - IsDirectDependencyMetadata PackageMetadataType = "is-direct" - IsDevDependencyMetadata PackageMetadataType = "is-dev" - ExclusionMetadata PackageMetadataType = "exclusion" - OpaqueMetadata PackageMetadataType = "opaque" - VersionRangeMetadata PackageMetadataType = "version-range" + PackageManagerMetadata PackageMetadataType = "package-manager" + IsDirectDependencyMetadata PackageMetadataType = "is-direct" + VersionRangeMetadata PackageMetadataType = "version-range" + RequiresTransitiveEnrichmentMetadata PackageMetadataType = "requires-transitive-enrichment" + IsDevDependencyMetadata PackageMetadataType = "is-dev" + ExclusionMetadata PackageMetadataType = "exclusion" + OpaqueMetadata PackageMetadataType = "opaque" ) type PackageMetadata map[PackageMetadataType]string diff --git a/pkg/scanner/datadog_sbom_generator.go b/pkg/scanner/datadog_sbom_generator.go index c1bc9e86..30feeeeb 100644 --- a/pkg/scanner/datadog_sbom_generator.go +++ b/pkg/scanner/datadog_sbom_generator.go @@ -360,8 +360,10 @@ func packageHasRangedVersion(scannedPackage lockfile.PackageDetails) bool { } // sanitizeScannedPackages is used to sanitize scanned packages. -// 1. filters our packages that have a ranged version +// 1. filters out packages that have a ranged value in Version // 2. creates a PURL for each package and drops the package if it cannot be created +// Packages with VersionRange are allowed through with an empty Version so that +// downstream services can resolve them. func sanitizeScannedPackages(scannedPackages []lockfile.PackageDetails) ([]lockfile.PackageDetails, []string) { finalPackages := make([]lockfile.PackageDetails, 0, len(scannedPackages)) droppedReasons := make([]string, 0, len(scannedPackages)) diff --git a/pkg/scanner/datadog_sbom_generator_test.go b/pkg/scanner/datadog_sbom_generator_test.go index d9f16d42..c2efcdc3 100644 --- a/pkg/scanner/datadog_sbom_generator_test.go +++ b/pkg/scanner/datadog_sbom_generator_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/DataDog/datadog-sbom-generator/pkg/lockfile" + "github.com/DataDog/datadog-sbom-generator/pkg/models" "github.com/DataDog/datadog-sbom-generator/pkg/reporter" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -254,3 +255,24 @@ func Test_sanitizeScannedPackages_RangedVersionAreFiltered(t *testing.T) { assert.Empty(t, sanitizedPackages) assert.Len(t, errors, 3) } + +func Test_sanitizeScannedPackages_VersionRangesAreAllowed(t *testing.T) { + t.Parallel() + + scannedPackages := []lockfile.PackageDetails{ + { + Name: "requests", + VersionRange: ">=2.0,<3.0", + Ecosystem: models.EcosystemPyPI, + IsDirect: true, + }, + } + + sanitizedPackages, errors := sanitizeScannedPackages(scannedPackages) + + assert.Empty(t, errors) + require.Len(t, sanitizedPackages, 1) + assert.Empty(t, sanitizedPackages[0].Version) + assert.Equal(t, ">=2.0,<3.0", sanitizedPackages[0].VersionRange) + assert.Equal(t, "pkg:pypi/requests", sanitizedPackages[0].PURL) +} diff --git a/pkg/scanner/vulnerability_result.go b/pkg/scanner/vulnerability_result.go index b727db61..43e5d06b 100644 --- a/pkg/scanner/vulnerability_result.go +++ b/pkg/scanner/vulnerability_result.go @@ -23,6 +23,12 @@ func exportMetadata(rawPkg lockfile.PackageDetails, reachabilityAnalysisResults if rawPkg.IsDirect { metadata[models.IsDirectDependencyMetadata] = strconv.FormatBool(rawPkg.IsDirect) } + if rawPkg.VersionRange != "" { + metadata[models.VersionRangeMetadata] = rawPkg.VersionRange + } + if rawPkg.RequiresTransitiveEnrichment { + metadata[models.RequiresTransitiveEnrichmentMetadata] = strconv.FormatBool(rawPkg.RequiresTransitiveEnrichment) + } if rawPkg.Ecosystem.IsDevGroup(rawPkg.DepGroups) { metadata[models.IsDevDependencyMetadata] = strconv.FormatBool(true) } diff --git a/pkg/scanner/vulnerability_result_test.go b/pkg/scanner/vulnerability_result_test.go index 116adba4..3438cb24 100644 --- a/pkg/scanner/vulnerability_result_test.go +++ b/pkg/scanner/vulnerability_result_test.go @@ -139,6 +139,39 @@ func Test_exportMetadata(t *testing.T) { models.IsDevDependencyMetadata: "true", }, }, + { + name: "Package with version range", + rawPkg: lockfile.PackageDetails{ + VersionRange: ">=2.0,<3.0", + }, + reachabilityAnalysis: nil, + expectedMetadata: map[models.PackageMetadataType]string{ + models.VersionRangeMetadata: ">=2.0,<3.0", + }, + }, + { + name: "Manifest exact pin requires transitive enrichment", + rawPkg: lockfile.PackageDetails{ + Version: "2.28.0", + RequiresTransitiveEnrichment: true, + }, + reachabilityAnalysis: nil, + expectedMetadata: map[models.PackageMetadataType]string{ + models.RequiresTransitiveEnrichmentMetadata: "true", + }, + }, + { + name: "Manifest version range requires transitive enrichment", + rawPkg: lockfile.PackageDetails{ + VersionRange: ">=2.0,<3.0", + RequiresTransitiveEnrichment: true, + }, + reachabilityAnalysis: nil, + expectedMetadata: map[models.PackageMetadataType]string{ + models.VersionRangeMetadata: ">=2.0,<3.0", + models.RequiresTransitiveEnrichmentMetadata: "true", + }, + }, { name: "Opaque package", rawPkg: lockfile.PackageDetails{