diff --git a/cmd/datadog-sbom-generator/__snapshots__/main_test.snap b/cmd/datadog-sbom-generator/__snapshots__/main_test.snap index 9441a512..c3572b65 100644 --- a/cmd/datadog-sbom-generator/__snapshots__/main_test.snap +++ b/cmd/datadog-sbom-generator/__snapshots__/main_test.snap @@ -2641,6 +2641,10 @@ Scanned /fixtures/integration-npm/with-workspace/yarn.lock file and fou { "name": "datadog:package-manager", "value": "uv" + }, + { + "name": "datadog:requires-transitive-enrichment", + "value": "true" } ], "evidence": { @@ -2651,6 +2655,37 @@ Scanned /fixtures/integration-npm/with-workspace/yarn.lock file and fou ] } }, + { + "bom-ref": "pkg:pypi/numpy", + "type": "library", + "name": "numpy", + "purl": "pkg:pypi/numpy", + "properties": [ + { + "name": "datadog:is-direct", + "value": "true" + }, + { + "name": "datadog:package-manager", + "value": "uv" + }, + { + "name": "datadog:requires-transitive-enrichment", + "value": "true" + }, + { + "name": "datadog:version-range", + "value": "/u003e=1.24" + } + ], + "evidence": { + "occurrences": [ + { + "location": "{/"block/":{/"file_name/":/"pyproject.toml/",/"line_start/":8,/"line_end/":8,/"column_start/":5,/"column_end/":19,/"role/":/"manifest/"},/"name/":{/"file_name/":/"pyproject.toml/",/"line_start/":8,/"line_end/":8,/"column_start/":6,/"column_end/":11,/"role/":/"manifest/"},/"version/":{/"file_name/":/"pyproject.toml/",/"line_start/":8,/"line_end/":8,/"column_start/":11,/"column_end/":17,/"role/":/"manifest/"}}" + } + ] + } + }, { "bom-ref": "pkg:pypi/pytest@8.2.0", "type": "library", @@ -2669,6 +2704,10 @@ Scanned /fixtures/integration-npm/with-workspace/yarn.lock file and fou { "name": "datadog:package-manager", "value": "uv" + }, + { + "name": "datadog:requires-transitive-enrichment", + "value": "true" } ], "evidence": { @@ -2693,6 +2732,10 @@ Scanned /fixtures/integration-npm/with-workspace/yarn.lock file and fou { "name": "datadog:package-manager", "value": "uv" + }, + { + "name": "datadog:requires-transitive-enrichment", + "value": "true" } ], "evidence": { @@ -2710,7 +2753,7 @@ Scanned /fixtures/integration-npm/with-workspace/yarn.lock file and fou [TestRun/Scan_pyproject.toml_without_lock_file - 2] Scanning directory './fixtures/integration-pyproject', resolved absolute path '/fixtures/integration-pyproject' -Scanned /fixtures/integration-pyproject/pyproject.toml file and found 3 packages +Scanned /fixtures/integration-pyproject/pyproject.toml file and found 4 packages [reachability] Reachability analysis is disabled --- diff --git a/pkg/lockfile/fixtures/pyproject-toml-extractor/poetry-pinned/pyproject.toml b/pkg/lockfile/fixtures/pyproject-toml-extractor/poetry-pinned/pyproject.toml index 7780efb0..c68a3d73 100644 --- a/pkg/lockfile/fixtures/pyproject-toml-extractor/poetry-pinned/pyproject.toml +++ b/pkg/lockfile/fixtures/pyproject-toml-extractor/poetry-pinned/pyproject.toml @@ -12,3 +12,4 @@ requests = "==2.28.0" flask = "==2.3.2" boto3 = "1.26.0" numpy = "^1.24" +scipy = "1.*" diff --git a/pkg/lockfile/internal/testutil/helpers.go b/pkg/lockfile/internal/testutil/helpers.go index e8bb91cd..3ade9567 100644 --- a/pkg/lockfile/internal/testutil/helpers.go +++ b/pkg/lockfile/internal/testutil/helpers.go @@ -47,9 +47,10 @@ func PackageToString(pkg lockfile.PackageDetails) string { } return fmt.Sprintf( - "%s@%s {ecosystem=%q targetVersions=%v packageManager=%q commit=%q depGroups=%v exclusions=%v targetFrameworks=%v isDirect=%t blockLocation=%s nameLocation=%s versionLocation=%s}", + "%s@%s {versionRange=%q ecosystem=%q targetVersions=%v packageManager=%q commit=%q depGroups=%v exclusions=%v targetFrameworks=%v isDirect=%t requiresTransitiveEnrichment=%t blockLocation=%s nameLocation=%s versionLocation=%s}", pkg.Name, pkg.Version, + pkg.VersionRange, pkg.Ecosystem, pkg.TargetVersions, pkg.PackageManager, @@ -58,6 +59,7 @@ func PackageToString(pkg lockfile.PackageDetails) string { pkg.Exclusions, pkg.TargetFrameworks, pkg.IsDirect, + pkg.RequiresTransitiveEnrichment, formatLoc(pkg.BlockLocation), formatLocPtr(pkg.NameLocation), formatLocPtr(pkg.VersionLocation), diff --git a/pkg/lockfile/python/parse-pyproject-toml.go b/pkg/lockfile/python/parse-pyproject-toml.go index 473611fd..95a2effb 100644 --- a/pkg/lockfile/python/parse-pyproject-toml.go +++ b/pkg/lockfile/python/parse-pyproject-toml.go @@ -3,9 +3,7 @@ package python import ( "fmt" "io" - "maps" "path/filepath" - "slices" "strings" "github.com/BurntSushi/toml" @@ -57,11 +55,11 @@ func detectPackageManager(pyproject *PyProjectTOML) models.PackageManager { return models.Unknown } -// extractPositions finds the line containing the given rawName and version in lines and returns -// block, name, and version positions. rawName is the pre-normalization name as it appears in the -// file (e.g. "my_pkg"), which may differ from the normalized name used as the package key. -// For PEP 621 string deps the version appears inline; for Poetry key=value deps it is quoted. -func extractPositions(lines []string, filePath, rawName, version string, isPoetry bool) (models.FilePosition, *models.FilePosition, *models.FilePosition) { +// extractPositions finds the line containing the dependency and returns block, name, and version +// positions. rawName is the pre-normalization name as it appears in the file (e.g. "my_pkg"), which +// may differ from the normalized name used as the package key. rawValue is the full PEP 508 string +// dependency when available, and anchors inline array entries that share the same line. +func extractPositions(lines []string, filePath, rawName, rawValue, version string, isPoetry bool) (models.FilePosition, *models.FilePosition, *models.FilePosition) { lowerRawName := strings.ToLower(rawName) lowerVersion := strings.ToLower(version) @@ -70,6 +68,15 @@ func extractPositions(lines []string, filePath, rawName, version string, isPoetr if strings.HasPrefix(strings.TrimSpace(lowerLine), "#") { continue } + rawValueStart := -1 + sourceRawValue := rawValue + if rawValue != "" { + var ok bool + rawValueStart, sourceRawValue, ok = quotedTOMLValue(line, rawValue) + if !ok { + continue + } + } if !strings.Contains(lowerLine, lowerRawName) { continue } @@ -79,7 +86,7 @@ func extractPositions(lines []string, filePath, rawName, version string, isPoetr lineNumber := i + 1 - nameLocation := fileposition.ExtractStringPositionInBlock([]string{lowerLine}, lowerRawName, lineNumber) + nameLocation := extractNamePosition(lineNumber, line, sourceRawValue, rawValueStart, rawName, isPoetry) if nameLocation == nil { continue } @@ -96,10 +103,16 @@ func extractPositions(lines []string, filePath, rawName, version string, isPoetr var versionLocation *models.FilePosition if version != "" { if isPoetry { - versionLocation = fileposition.ExtractDelimitedRegexpPositionInBlock([]string{lowerLine}, "[^\"']+", lineNumber, "version\\s*=\\s*[\"']", "[\"']") - if versionLocation == nil { + if rawValue != "" { + versionLocation = extractStringPosition(lineNumber, line, sourceRawValue, rawValueStart, sourceRawValue) + } else { + versionLocation = fileposition.ExtractDelimitedRegexpPositionInBlock([]string{lowerLine}, "[^\"']+", lineNumber, "version\\s*=\\s*[\"']", "[\"']") + } + if versionLocation == nil && rawValue == "" { versionLocation = fileposition.ExtractDelimitedRegexpPositionInBlock([]string{lowerLine}, "[^\"']+", lineNumber, "=\\s*[\"']", "[\"']") } + } else if rawValue != "" { + versionLocation = extractStringPosition(lineNumber, line, sourceRawValue, rawValueStart, version) } else { versionLocation = fileposition.ExtractStringPositionInBlock([]string{lowerLine}, lowerVersion, lineNumber) } @@ -114,6 +127,63 @@ func extractPositions(lines []string, filePath, rawName, version string, isPoetr return models.FilePosition{Filename: filePath}, nil, nil } +func quotedTOMLValue(line, value string) (int, string, bool) { + for _, candidate := range tomlBasicStringCandidates(value) { + if quotedStart := strings.Index(line, `"`+candidate+`"`); quotedStart != -1 { + return quotedStart + len(`"`), candidate, true + } + } + + if quotedStart := strings.Index(line, `'`+value+`'`); quotedStart != -1 { + return quotedStart + len(`'`), value, true + } + + return 0, "", false +} + +func tomlBasicStringCandidates(value string) []string { + values := []string{value} + if !strings.HasPrefix(value, "==") { + values = append(values, "=="+value) + } + + candidates := make([]string, 0, len(values)*2) + for _, value := range values { + candidates = append(candidates, value) + if escaped := strings.NewReplacer(`\`, `\\`, `"`, `\"`).Replace(value); escaped != value { + candidates = append(candidates, escaped) + } + } + + return candidates +} + +func extractNamePosition(lineNumber int, line, rawValue string, rawValueStart int, rawName string, isPoetry bool) *models.FilePosition { + if isPoetry { + return fileposition.ExtractStringPositionInBlock([]string{strings.ToLower(line)}, strings.ToLower(rawName), lineNumber) + } + + return extractStringPosition(lineNumber, line, rawValue, rawValueStart, rawName) +} + +func extractStringPosition(lineNumber int, line, rawValue string, rawValueStart int, value string) *models.FilePosition { + if rawValueStart == -1 { + return fileposition.ExtractStringPositionInBlock([]string{strings.ToLower(line)}, strings.ToLower(value), lineNumber) + } + + index := strings.Index(strings.ToLower(rawValue), strings.ToLower(value)) + if index == -1 { + return nil + } + + columnStart := rawValueStart + index + 1 + + return &models.FilePosition{ + Line: models.Position{Start: lineNumber, End: lineNumber}, + Column: models.Position{Start: columnStart, End: columnStart + len(value)}, + } +} + func (e PyProjectTOMLExtractor) ShouldExtract(path string) bool { return filepath.Base(path) == models.PyProjectTomlFilePath.String() } @@ -147,34 +217,39 @@ func (e PyProjectTOMLExtractor) Extract(f lockfile.DepFile, context lockfile.Sca lines := fileposition.BytesToLines(content) pm := detectPackageManager(&pyproject) - packages := map[string]lockfile.PackageDetails{} + collector := pyprojectPackageCollector{ + packages: map[string]lockfile.PackageDetails{}, + lines: lines, + path: f.Path(), + packageManager: pm, + } for _, dep := range pyproject.Project.Dependencies { - if name, rawName, version, ok := parsePEP508Pin(dep); ok { - block, nameLocation, versionLocation := extractPositions(lines, f.Path(), rawName, version, false) - addOrMergeGroups(packages, name, version, []string{"prod"}, pm, block, nameLocation, versionLocation) + dependency, ok := parsePEP508Dependency(dep) + if ok { + collector.addDependency(dependency, []string{string(models.DepGroupProd)}, false) } } - for group, deps := range pyproject.Project.OptionalDependencies { + for optionalDependencyGroup, deps := range pyproject.Project.OptionalDependencies { for _, dep := range deps { - if name, rawName, version, ok := parsePEP508Pin(dep); ok { - block, nameLocation, versionLocation := extractPositions(lines, f.Path(), rawName, version, false) - addOrMergeGroups(packages, name, version, []string{group}, pm, block, nameLocation, versionLocation) + dependency, ok := parsePEP508Dependency(dep) + if ok { + collector.addDependency(dependency, []string{optionalDependencyGroup}, false) } } } - for group, items := range pyproject.DependencyGroups { - for _, item := range items { + for dependencyGroupName, dependencyGroupItems := range pyproject.DependencyGroups { + for _, item := range dependencyGroupItems { dep, ok := item.(string) if !ok { - // skip {include-group = "..."} table entries + // PEP 735 dependency groups can include table entries such as {include-group = "..."}. continue } - if name, rawName, version, ok := parsePEP508Pin(dep); ok { - block, nameLocation, versionLocation := extractPositions(lines, f.Path(), rawName, version, false) - addOrMergeGroups(packages, name, version, []string{group}, pm, block, nameLocation, versionLocation) + dependency, ok := parsePEP508Dependency(dep) + if ok { + collector.addDependency(dependency, []string{dependencyGroupName}, false) } } } @@ -185,147 +260,168 @@ func (e PyProjectTOMLExtractor) Extract(f lockfile.DepFile, context lockfile.Sca if name == "python" { continue } - if version, ok := parsePoetryPin(val); ok { - normalized := normalizedRequirementName(name) - block, nameLocation, versionLocation := extractPositions(lines, f.Path(), name, version, true) - addOrMergeGroups(packages, normalized, version, []string{"prod"}, pm, block, nameLocation, versionLocation) + if version, versionRange, ok := parsePoetryDependency(val); ok { + collector.addDependency(pep508Dependency{ + Name: normalizedRequirementName(name), + RawName: name, + RawValue: versionOrRange(version, versionRange), + Version: version, + VersionRange: versionRange, + }, []string{string(models.DepGroupProd)}, true) } } for name, val := range pyproject.Tool.Poetry.DevDependencies { - if version, ok := parsePoetryPin(val); ok { - normalized := normalizedRequirementName(name) - block, nameLocation, versionLocation := extractPositions(lines, f.Path(), name, version, true) - addOrMergeGroups(packages, normalized, version, []string{"dev"}, pm, block, nameLocation, versionLocation) + if version, versionRange, ok := parsePoetryDependency(val); ok { + collector.addDependency(pep508Dependency{ + Name: normalizedRequirementName(name), + RawName: name, + RawValue: versionOrRange(version, versionRange), + Version: version, + VersionRange: versionRange, + }, []string{string(models.DepGroupDev)}, true) } } - for groupName, group := range pyproject.Tool.Poetry.Group { - for name, val := range group.Dependencies { - if version, ok := parsePoetryPin(val); ok { - normalized := normalizedRequirementName(name) - block, nameLocation, versionLocation := extractPositions(lines, f.Path(), name, version, true) - addOrMergeGroups(packages, normalized, version, []string{groupName}, pm, block, nameLocation, versionLocation) + for poetryGroupName, poetryGroup := range pyproject.Tool.Poetry.Group { + for name, val := range poetryGroup.Dependencies { + if version, versionRange, ok := parsePoetryDependency(val); ok { + collector.addDependency(pep508Dependency{ + Name: normalizedRequirementName(name), + RawName: name, + RawValue: versionOrRange(version, versionRange), + Version: version, + VersionRange: versionRange, + }, []string{poetryGroupName}, true) } } } } - return slices.Collect(maps.Values(packages)), nil + return sortedPyprojectPackages(collector.packages), nil } -// addOrMergeGroups adds a package to the map, or if it already exists (same name+version), -// merges the new dep groups into the existing entry rather than dropping the duplicate. -func addOrMergeGroups(packages map[string]lockfile.PackageDetails, name, version string, groups []string, pm models.PackageManager, block models.FilePosition, nameLocation, versionLocation *models.FilePosition) { - key := name + "@" + version - if existing, exists := packages[key]; exists { - for _, g := range groups { - if !slices.Contains(existing.DepGroups, g) { - existing.DepGroups = append(existing.DepGroups, g) - } - } - packages[key] = existing - - return - } - packages[key] = lockfile.PackageDetails{ - Name: name, - Version: version, - PackageManager: pm, - Ecosystem: models.EcosystemPyPI, - IsDirect: true, - DepGroups: groups, - BlockLocation: block, - NameLocation: nameLocation, - VersionLocation: versionLocation, - LocationRole: models.LocationRoleManifest, - } +type pep508Dependency struct { + Name string + RawName string + RawValue string + Version string + VersionRange string } -// parsePEP508Pin parses a PEP 508 dependency string and returns the normalized name, the raw -// (pre-normalization) name as written in the file, and the version — only when the dependency -// is an exact pin (==). Returns ok=false for all other specifiers. -func parsePEP508Pin(dep string) (name, rawName, version string, ok bool) { +// parsePEP508Dependency parses a PEP 508 dependency string into a normalized name, +// the raw name as written in the file, and either an exact version or original version range. +func parsePEP508Dependency(dep string) (pep508Dependency, bool) { + rawValue := dep + // strip environment markers (PEP 508) dep, _, _ = strings.Cut(dep, ";") dep = strings.TrimSpace(dep) + if strings.Contains(dep, " @ ") { + return pep508Dependency{}, false + } // strip parenthesized specifier: "requests (==2.28.0)" -> "requests ==2.28.0" dep = strings.NewReplacer("(", "", ")", "").Replace(dep) - // reject if any non-exact operator is present - for _, op := range []string{"===", "!=", ">=", "<=", "~=", ">", "<"} { - if strings.Contains(dep, op) { - return "", "", "", false - } + opIndex, op := findFirstPEP508Specifier(dep) + if opIndex == -1 || op == "===" { + return pep508Dependency{}, false } - rawNamePart, rawVersion, found := strings.Cut(dep, "==") - if !found { - return "", "", "", false + // strip extras: "requests[security]" -> "requests" + fileRawName, _, _ := strings.Cut(strings.TrimSpace(dep[:opIndex]), "[") + fileRawName = strings.TrimSpace(fileRawName) + specifier := strings.TrimSpace(dep[opIndex:]) + + if fileRawName == "" || specifier == "" { + return pep508Dependency{}, false } - // reject multi-constraint specs: "==2.28.0,!=2.28.0" - if strings.Contains(rawVersion, ",") { - return "", "", "", false + if op == "==" { + rawVersion := strings.TrimSpace(specifier[len(op):]) + if rawVersion != "" && !strings.Contains(rawVersion, ",") && isConcreteVersion(rawVersion) { + return pep508Dependency{ + Name: normalizedRequirementName(fileRawName), + RawName: fileRawName, + RawValue: rawValue, + Version: rawVersion, + }, true + } } - rawVersion = strings.TrimSpace(rawVersion) - // strip extras: "requests[security]" -> "requests" - fileRawName, _, _ := strings.Cut(strings.TrimSpace(rawNamePart), "[") - fileRawName = strings.TrimSpace(fileRawName) + return pep508Dependency{ + Name: normalizedRequirementName(fileRawName), + RawName: fileRawName, + RawValue: rawValue, + VersionRange: specifier, + }, true +} - if fileRawName == "" || rawVersion == "" || !isConcreteVersion(rawVersion) { - return "", "", "", false +func findFirstPEP508Specifier(dep string) (int, string) { + firstIndex := -1 + firstOp := "" + for _, op := range []string{"===", "==", "!=", ">=", "<=", "~=", ">", "<"} { + index := strings.Index(dep, op) + if index == -1 { + continue + } + if firstIndex == -1 || index < firstIndex { + firstIndex = index + firstOp = op + } } - return normalizedRequirementName(fileRawName), fileRawName, rawVersion, true + return firstIndex, firstOp } -// parsePoetryPin parses a Poetry dependency value (string or inline table) and returns -// the version only when it is an exact pin (== prefix) with a concrete version. -func parsePoetryPin(val any) (version string, ok bool) { +// parsePoetryDependency parses a Poetry dependency value (string or inline table) and +// returns either an exact version or the original version range. +func parsePoetryDependency(val any) (version, versionRange string, ok bool) { var versionStr string switch v := val.(type) { case string: versionStr = v case map[string]any: + for _, directRefKey := range []string{"path", "git", "url"} { + if _, exists := v[directRefKey]; exists { + return "", "", false + } + } versionStr, ok = v["version"].(string) if !ok { - return "", false + return "", "", false } default: - return "", false + return "", "", false } versionStr = strings.TrimSpace(versionStr) + if versionStr == "" || strings.HasPrefix(versionStr, "===") { + return "", "", false + } - // Poetry bare version string "2.28.0" is an implicit exact pin + // Poetry bare version string "2.28.0" is an implicit exact pin. + // Other digit-starting constraints, such as "1.*", are still ranges. if len(versionStr) > 0 && !strings.ContainsAny(string(versionStr[0]), "=!<>~^*") { if strings.Contains(versionStr, ",") { - return "", false + return "", versionStr, true } if isConcreteVersion(versionStr) { - return versionStr, true + return versionStr, "", true } - return "", false + return "", versionStr, true } - // reject === (arbitrary equality) and any non-== operator - if !strings.HasPrefix(versionStr, "==") || strings.HasPrefix(versionStr, "===") { - return "", false - } + if strings.HasPrefix(versionStr, "==") { + after := strings.TrimSpace(versionStr[2:]) - after := strings.TrimSpace(versionStr[2:]) - - // reject multi-constraint: "==2.28.0,!=2.28.1" is not an exact pin - if strings.Contains(after, ",") { - return "", false - } + if !strings.Contains(after, ",") && isConcreteVersion(after) { + return after, "", true + } - if isConcreteVersion(after) { - return after, true + return "", versionStr, true } - return "", false + return "", versionStr, true } // isConcreteVersion returns true if version looks like a fully-specified version diff --git a/pkg/lockfile/python/parse-pyproject-toml_test.go b/pkg/lockfile/python/parse-pyproject-toml_test.go index f37877ad..927985d9 100644 --- a/pkg/lockfile/python/parse-pyproject-toml_test.go +++ b/pkg/lockfile/python/parse-pyproject-toml_test.go @@ -79,27 +79,41 @@ func TestParsePyProjectTOML_PEP621_PinnedExtracted(t *testing.T) { expectNilErr(t, err) testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 5, 5, 5, 24), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 16, 22), LocationRole: models.LocationRoleManifest, }, { - Name: "flask", Version: "2.3.2", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "flask", Version: "2.3.2", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 6, 6, 5, 20), NameLocation: posPtr(path, 6, 6, 6, 11), VersionLocation: posPtr(path, 6, 6, 13, 18), LocationRole: models.LocationRoleManifest, }, { - Name: "boto3", Version: "1.26.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "numpy", VersionRange: ">=1.24", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 7, 7, 5, 19), NameLocation: posPtr(path, 7, 7, 6, 11), VersionLocation: posPtr(path, 7, 7, 11, 17), LocationRole: models.LocationRoleManifest, + }, + { + Name: "boto3", Version: "1.26.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 9, 9, 5, 21), NameLocation: posPtr(path, 9, 9, 6, 11), VersionLocation: posPtr(path, 9, 9, 13, 19), LocationRole: models.LocationRoleManifest, }, }) } -func TestParsePyProjectTOML_PEP621_UnpinnedSkipped(t *testing.T) { +func TestParsePyProjectTOML_PEP621_RangesExtractedAndUnversionedSkipped(t *testing.T) { t.Parallel() - packages, err := python.ParsePyProjectTOML(fixturePath(t, "../fixtures/pyproject-toml-extractor/no-pinned/pyproject.toml")) + path := fixturePath(t, "../fixtures/pyproject-toml-extractor/no-pinned/pyproject.toml") + packages, err := python.ParsePyProjectTOML(path) expectNilErr(t, err) - testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{}) + testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ + { + Name: "requests", VersionRange: ">=2.28", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 5, 5, 5, 22), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 14, 20), LocationRole: models.LocationRoleManifest, + }, + { + Name: "flask", VersionRange: "~=2.3", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 6, 6, 5, 18), NameLocation: posPtr(path, 6, 6, 6, 11), VersionLocation: posPtr(path, 6, 6, 11, 16), LocationRole: models.LocationRoleManifest, + }, + }) } // ============================================================================ @@ -115,19 +129,19 @@ func TestParsePyProjectTOML_PEP621_OptionalDeps(t *testing.T) { expectNilErr(t, err) testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 5, 5, 5, 24), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 16, 22), LocationRole: models.LocationRoleManifest, }, { - Name: "pytest", Version: "7.4.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"dev"}, + Name: "pytest", Version: "7.4.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"dev"}, BlockLocation: pos(path, 10, 10, 5, 21), NameLocation: posPtr(path, 10, 10, 6, 12), VersionLocation: posPtr(path, 10, 10, 14, 19), LocationRole: models.LocationRoleManifest, }, { - Name: "mypy", Version: "1.5.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"dev"}, + Name: "mypy", Version: "1.5.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"dev"}, BlockLocation: pos(path, 11, 11, 5, 19), NameLocation: posPtr(path, 11, 11, 6, 10), VersionLocation: posPtr(path, 11, 11, 12, 17), LocationRole: models.LocationRoleManifest, }, { - Name: "sphinx", Version: "7.1.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"docs"}, + Name: "sphinx", Version: "7.1.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"docs"}, BlockLocation: pos(path, 14, 14, 5, 21), NameLocation: posPtr(path, 14, 14, 6, 12), VersionLocation: posPtr(path, 14, 14, 14, 19), LocationRole: models.LocationRoleManifest, }, }) @@ -146,19 +160,19 @@ func TestParsePyProjectTOML_DependencyGroups_PinnedExtracted(t *testing.T) { expectNilErr(t, err) testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 5, 5, 5, 24), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 16, 22), LocationRole: models.LocationRoleManifest, }, { - Name: "pytest", Version: "7.4.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"dev"}, + Name: "pytest", Version: "7.4.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"dev"}, BlockLocation: pos(path, 12, 12, 5, 21), NameLocation: posPtr(path, 12, 12, 6, 12), VersionLocation: posPtr(path, 12, 12, 14, 19), LocationRole: models.LocationRoleManifest, }, { - Name: "ruff", Version: "0.1.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"dev"}, + Name: "ruff", Version: "0.1.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"dev"}, BlockLocation: pos(path, 13, 13, 5, 19), NameLocation: posPtr(path, 13, 13, 6, 10), VersionLocation: posPtr(path, 13, 13, 12, 17), LocationRole: models.LocationRoleManifest, }, { - Name: "hypothesis", Version: "6.100.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"test"}, + Name: "hypothesis", Version: "6.100.0", PackageManager: models.Uv, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"test"}, BlockLocation: pos(path, 17, 17, 5, 27), NameLocation: posPtr(path, 17, 17, 6, 16), VersionLocation: posPtr(path, 17, 17, 18, 25), LocationRole: models.LocationRoleManifest, }, }) @@ -175,20 +189,28 @@ func TestParsePyProjectTOML_Poetry_PinnedExtracted(t *testing.T) { packages, err := python.ParsePyProjectTOML(path) expectNilErr(t, err) - // python version constraint must be skipped; unpinned numpy (^) must be skipped + // python version constraint must be skipped; the numpy range is preserved for reducer resolution testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 11, 11, 1, 22), NameLocation: posPtr(path, 11, 11, 1, 9), VersionLocation: posPtr(path, 11, 11, 13, 21), LocationRole: models.LocationRoleManifest, }, { - Name: "flask", Version: "2.3.2", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "flask", Version: "2.3.2", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 12, 12, 1, 18), NameLocation: posPtr(path, 12, 12, 1, 6), VersionLocation: posPtr(path, 12, 12, 10, 17), LocationRole: models.LocationRoleManifest, }, { - Name: "boto3", Version: "1.26.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "boto3", Version: "1.26.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 13, 13, 1, 17), NameLocation: posPtr(path, 13, 13, 1, 6), VersionLocation: posPtr(path, 13, 13, 10, 16), LocationRole: models.LocationRoleManifest, }, + { + Name: "numpy", VersionRange: "^1.24", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 14, 14, 1, 16), NameLocation: posPtr(path, 14, 14, 1, 6), VersionLocation: posPtr(path, 14, 14, 10, 15), LocationRole: models.LocationRoleManifest, + }, + { + Name: "scipy", VersionRange: "1.*", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 15, 15, 1, 14), NameLocation: posPtr(path, 15, 15, 1, 6), VersionLocation: posPtr(path, 15, 15, 10, 13), LocationRole: models.LocationRoleManifest, + }, }) } @@ -201,19 +223,19 @@ func TestParsePyProjectTOML_Poetry_GroupDeps(t *testing.T) { expectNilErr(t, err) testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 11, 11, 1, 22), NameLocation: posPtr(path, 11, 11, 1, 9), VersionLocation: posPtr(path, 11, 11, 13, 21), LocationRole: models.LocationRoleManifest, }, { - Name: "black", Version: "23.1.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"dev"}, + Name: "black", Version: "23.1.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"dev"}, BlockLocation: pos(path, 14, 14, 1, 19), NameLocation: posPtr(path, 14, 14, 1, 6), VersionLocation: posPtr(path, 14, 14, 10, 18), LocationRole: models.LocationRoleManifest, }, { - Name: "pytest", Version: "7.4.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"test"}, + Name: "pytest", Version: "7.4.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"test"}, BlockLocation: pos(path, 17, 17, 1, 19), NameLocation: posPtr(path, 17, 17, 1, 7), VersionLocation: posPtr(path, 17, 17, 11, 18), LocationRole: models.LocationRoleManifest, }, { - Name: "pytest-cov", Version: "4.1.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"test"}, + Name: "pytest-cov", Version: "4.1.0", PackageManager: models.Poetry, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"test"}, BlockLocation: pos(path, 18, 18, 1, 35), NameLocation: posPtr(path, 18, 18, 1, 11), VersionLocation: posPtr(path, 18, 18, 26, 33), LocationRole: models.LocationRoleManifest, }, }) @@ -254,7 +276,7 @@ func TestParsePyProjectTOML_PEP621_ParenthesizedPin(t *testing.T) { expectNilErr(t, err) testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, BlockLocation: pos(path, 5, 5, 5, 27), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 18, 24), LocationRole: models.LocationRoleManifest, }, }) @@ -273,12 +295,159 @@ func TestParsePyProjectTOML_MergesGroupsForDuplicatePackage(t *testing.T) { expectNilErr(t, err) testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ { - Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, DepGroups: []string{"prod", "dev"}, + Name: "requests", Version: "2.28.0", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod", "dev"}, BlockLocation: pos(path, 5, 5, 5, 24), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 16, 22), LocationRole: models.LocationRoleManifest, }, }) } +func TestParsePyProjectTOML_ConflictingRangesAreSourceOrdered(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + path := filepath.Join(dir, "pyproject.toml") + content := `[project] +name = "my-app" +version = "1.0.0" +dependencies = [ + "requests>=1,<2", + "flask>=2", +] + +[project.optional-dependencies] +dev = [ + "requests>=2,<3", +] +` + if err := os.WriteFile(path, []byte(content), 0o600); err != nil { + t.Fatalf("could not write pyproject fixture: %v", err) + } + + packages, err := python.ParsePyProjectTOML(path) + + expectNilErr(t, err) + testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{ + { + Name: "requests", VersionRange: ">=1,<2", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 5, 5, 5, 22), NameLocation: posPtr(path, 5, 5, 6, 14), VersionLocation: posPtr(path, 5, 5, 14, 20), LocationRole: models.LocationRoleManifest, + }, + { + Name: "flask", VersionRange: ">=2", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"prod"}, + BlockLocation: pos(path, 6, 6, 5, 16), NameLocation: posPtr(path, 6, 6, 6, 11), VersionLocation: posPtr(path, 6, 6, 11, 14), LocationRole: models.LocationRoleManifest, + }, + { + Name: "requests", VersionRange: ">=2,<3", PackageManager: models.Unknown, Ecosystem: models.EcosystemPyPI, IsDirect: true, RequiresTransitiveEnrichment: true, DepGroups: []string{"dev"}, + BlockLocation: pos(path, 11, 11, 5, 22), NameLocation: posPtr(path, 11, 11, 6, 14), VersionLocation: posPtr(path, 11, 11, 14, 20), LocationRole: models.LocationRoleManifest, + }, + }) + + if len(packages) != 3 { + t.Fatalf("expected 3 packages, got %d", len(packages)) + } + if packages[0].Name != "requests" || packages[0].VersionRange != ">=1,<2" { + t.Fatalf("expected first package to be earliest requests range, got %s %q", packages[0].Name, packages[0].VersionRange) + } +} + +func TestParsePyProjectTOML_PrefixRangesUseSourceOrder(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + content string + packageManager models.PackageManager + line int + nameColumn int + }{ + { + name: "pep621 multiline", + content: `[project] +name = "my-app" +version = "1.0.0" +dependencies = [ + "foo>=10", + "foo>=1", +] +`, + packageManager: models.Unknown, + line: 5, + nameColumn: 6, + }, + { + name: "pep621 inline", + content: `[project] +name = "my-app" +version = "1.0.0" +dependencies = ["foo>=10", "foo>=1"] +`, + packageManager: models.Unknown, + line: 4, + nameColumn: 18, + }, + { + name: "pep621 escaped marker", + content: `[project] +name = "my-app" +version = "1.0.0" +dependencies = [ + "foo>=10; python_version == \"3.11\"", + "foo>=1", +] +`, + packageManager: models.Unknown, + line: 5, + nameColumn: 6, + }, + { + name: "poetry", + content: `[tool.poetry] +name = "my-app" +version = "1.0.0" + +[tool.poetry.dependencies] +python = "^3.11" +foo = ">=10" + +[tool.poetry.group.dev.dependencies] +foo = ">=1" +`, + packageManager: models.Poetry, + line: 7, + nameColumn: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + path := filepath.Join(dir, "pyproject.toml") + if err := os.WriteFile(path, []byte(tt.content), 0o600); err != nil { + t.Fatalf("could not write pyproject fixture: %v", err) + } + + packages, err := python.ParsePyProjectTOML(path) + + expectNilErr(t, err) + if len(packages) == 0 { + t.Fatalf("expected packages, got none") + } + + first := packages[0] + if first.Name != "foo" || first.VersionRange != ">=10" || first.PackageManager != tt.packageManager { + t.Fatalf("expected first package to be earliest foo range, got %s %q %q", first.Name, first.VersionRange, first.PackageManager) + } + if first.BlockLocation.Line.Start != tt.line { + t.Fatalf("expected first package on line %d, got %d", tt.line, first.BlockLocation.Line.Start) + } + if first.NameLocation == nil || first.NameLocation.Column.Start != tt.nameColumn { + t.Fatalf("expected first package name on column %d, got %v", tt.nameColumn, first.NameLocation) + } + }) + } +} + // ============================================================================ // Error cases // ============================================================================ diff --git a/pkg/lockfile/python/pyproject_package_collector.go b/pkg/lockfile/python/pyproject_package_collector.go new file mode 100644 index 00000000..f64feea5 --- /dev/null +++ b/pkg/lockfile/python/pyproject_package_collector.go @@ -0,0 +1,176 @@ +package python + +import ( + "cmp" + "log" + "maps" + "slices" + "strings" + + "github.com/DataDog/datadog-sbom-generator/pkg/lockfile" + "github.com/DataDog/datadog-sbom-generator/pkg/models" +) + +type pyprojectPackageCollector struct { + packages map[string]lockfile.PackageDetails + lines []string + path string + packageManager models.PackageManager +} + +func (c *pyprojectPackageCollector) addDependency(dependency pep508Dependency, groups []string, isPoetry bool) { + if (dependency.Version == "") == (dependency.VersionRange == "") { + log.Printf( + "Skipping pyproject dependency %q from %s: expected exactly one of version or version range, got version=%q versionRange=%q\n", + dependency.Name, + c.path, + dependency.Version, + dependency.VersionRange, + ) + + return + } + + block, nameLocation, versionLocation := extractPositions(c.lines, c.path, dependency.RawName, dependency.RawValue, versionOrRange(dependency.Version, dependency.VersionRange), isPoetry) + c.addOrMergePackageGroups(lockfile.PackageDetails{ + Name: dependency.Name, + Version: dependency.Version, + VersionRange: dependency.VersionRange, + PackageManager: c.packageManager, + Ecosystem: models.EcosystemPyPI, + IsDirect: true, + RequiresTransitiveEnrichment: true, + DepGroups: groups, + BlockLocation: block, + NameLocation: nameLocation, + VersionLocation: versionLocation, + LocationRole: models.LocationRoleManifest, + }) +} + +// addOrMergePackageGroups adds a package to the map, or if it already exists (same name+version/range), +// merges the new dep groups into the existing entry rather than dropping the duplicate. +func (c *pyprojectPackageCollector) addOrMergePackageGroups(pkg lockfile.PackageDetails) { + key := pyprojectPackageKey(pkg) + if existing, exists := c.packages[key]; exists { + mergeDepGroups(&existing, pkg.DepGroups) + c.packages[key] = existing + + return + } + c.logVersionRangeConflict(pkg) + c.packages[key] = pkg +} + +func pyprojectPackageKey(pkg lockfile.PackageDetails) string { + return pkg.Name + "@" + pkg.Version + "|" + pkg.VersionRange +} + +func mergeDepGroups(pkg *lockfile.PackageDetails, groups []string) { + for _, group := range groups { + if !slices.Contains(pkg.DepGroups, group) { + pkg.DepGroups = append(pkg.DepGroups, group) + } + } +} + +func (c *pyprojectPackageCollector) logVersionRangeConflict(pkg lockfile.PackageDetails) { + if pkg.VersionRange == "" { + return + } + + for _, existing := range c.packages { + if !hasConflictingVersionRange(existing, pkg) { + continue + } + + log.Printf( + "Multiple pyproject version ranges for dependency %q from %s collapse to the same unversioned PURL; CycloneDX output will keep the earliest source declaration. Saw ranges %q and %q\n", + pkg.Name, + c.path, + existing.VersionRange, + pkg.VersionRange, + ) + + return + } +} + +func hasConflictingVersionRange(existing, pkg lockfile.PackageDetails) bool { + return existing.Name == pkg.Name && + existing.VersionRange != "" && + existing.VersionRange != pkg.VersionRange +} + +func sortedPyprojectPackages(packages map[string]lockfile.PackageDetails) []lockfile.PackageDetails { + result := slices.Collect(maps.Values(packages)) + slices.SortFunc(result, comparePyprojectPackageDetails) + + return result +} + +func comparePyprojectPackageDetails(a, b lockfile.PackageDetails) int { + if c := strings.Compare(a.BlockLocation.Filename, b.BlockLocation.Filename); c != 0 { + return c + } + if c := cmp.Compare(sourceLine(a.BlockLocation), sourceLine(b.BlockLocation)); c != 0 { + return c + } + if c := cmp.Compare(sourceColumn(a.BlockLocation), sourceColumn(b.BlockLocation)); c != 0 { + return c + } + if c := cmp.Compare(sourceLinePtr(a.NameLocation), sourceLinePtr(b.NameLocation)); c != 0 { + return c + } + if c := cmp.Compare(sourceColumnPtr(a.NameLocation), sourceColumnPtr(b.NameLocation)); c != 0 { + return c + } + if c := strings.Compare(a.Name, b.Name); c != 0 { + return c + } + if c := strings.Compare(a.Version, b.Version); c != 0 { + return c + } + + return strings.Compare(a.VersionRange, b.VersionRange) +} + +func sourceLine(location models.FilePosition) int { + if location.Line.Start == 0 { + return 1 << 30 + } + + return location.Line.Start +} + +func sourceColumn(location models.FilePosition) int { + if location.Column.Start == 0 { + return 1 << 30 + } + + return location.Column.Start +} + +func sourceLinePtr(location *models.FilePosition) int { + if location == nil { + return 1 << 30 + } + + return sourceLine(*location) +} + +func sourceColumnPtr(location *models.FilePosition) int { + if location == nil { + return 1 << 30 + } + + return sourceColumn(*location) +} + +func versionOrRange(version, versionRange string) string { + if version != "" { + return version + } + + return versionRange +} diff --git a/pkg/lockfile/types.go b/pkg/lockfile/types.go index 10a41e57..34d1ea24 100644 --- a/pkg/lockfile/types.go +++ b/pkg/lockfile/types.go @@ -5,24 +5,26 @@ import ( ) type PackageDetails struct { - Source models.SourceInfo - Name string `json:"name"` - Version string `json:"version"` - TargetVersions []string `json:"targetVersions,omitempty"` - TargetFrameworks []string `json:"targetFrameworks,omitempty"` - Commit string `json:"commit,omitempty"` - PURL string `json:"purl,omitempty"` - Ecosystem models.Ecosystem `json:"ecosystem,omitempty"` - DepGroups []string `json:"depGroups,omitempty"` - BlockLocation models.FilePosition `json:"blockLocation,omitempty"` - LocationRole string `json:"locationRole,omitempty"` - VersionLocation *models.FilePosition `json:"versionLocation,omitempty"` - NameLocation *models.FilePosition `json:"nameLocation,omitempty"` - PackageManager models.PackageManager `json:"packageManager,omitempty"` - IsDirect bool `json:"isDirect,omitempty"` - Opaque bool `json:"opaque,omitempty"` - Dependencies []*PackageDetails `json:"dependencies,omitempty"` - Exclusions []string `json:"exclusions,omitempty"` + Source models.SourceInfo + Name string `json:"name"` + Version string `json:"version"` + VersionRange string `json:"versionRange,omitempty"` + TargetVersions []string `json:"targetVersions,omitempty"` + TargetFrameworks []string `json:"targetFrameworks,omitempty"` + Commit string `json:"commit,omitempty"` + PURL string `json:"purl,omitempty"` + Ecosystem models.Ecosystem `json:"ecosystem,omitempty"` + DepGroups []string `json:"depGroups,omitempty"` + BlockLocation models.FilePosition `json:"blockLocation,omitempty"` + LocationRole string `json:"locationRole,omitempty"` + VersionLocation *models.FilePosition `json:"versionLocation,omitempty"` + NameLocation *models.FilePosition `json:"nameLocation,omitempty"` + PackageManager models.PackageManager `json:"packageManager,omitempty"` + IsDirect bool `json:"isDirect,omitempty"` + RequiresTransitiveEnrichment bool `json:"requiresTransitiveEnrichment,omitempty"` + Opaque bool `json:"opaque,omitempty"` + Dependencies []*PackageDetails `json:"dependencies,omitempty"` + Exclusions []string `json:"exclusions,omitempty"` } type Ecosystem string diff --git a/pkg/models/package_metadata.go b/pkg/models/package_metadata.go index 6abc615c..eff57850 100644 --- a/pkg/models/package_metadata.go +++ b/pkg/models/package_metadata.go @@ -5,11 +5,13 @@ import "fmt" type PackageMetadataType string const ( - PackageManagerMetadata PackageMetadataType = "package-manager" - IsDirectDependencyMetadata PackageMetadataType = "is-direct" - IsDevDependencyMetadata PackageMetadataType = "is-dev" - ExclusionMetadata PackageMetadataType = "exclusion" - OpaqueMetadata PackageMetadataType = "opaque" + PackageManagerMetadata PackageMetadataType = "package-manager" + IsDirectDependencyMetadata PackageMetadataType = "is-direct" + VersionRangeMetadata PackageMetadataType = "version-range" + RequiresTransitiveEnrichmentMetadata PackageMetadataType = "requires-transitive-enrichment" + IsDevDependencyMetadata PackageMetadataType = "is-dev" + ExclusionMetadata PackageMetadataType = "exclusion" + OpaqueMetadata PackageMetadataType = "opaque" ) type PackageMetadata map[PackageMetadataType]string diff --git a/pkg/scanner/datadog_sbom_generator.go b/pkg/scanner/datadog_sbom_generator.go index c1bc9e86..30feeeeb 100644 --- a/pkg/scanner/datadog_sbom_generator.go +++ b/pkg/scanner/datadog_sbom_generator.go @@ -360,8 +360,10 @@ func packageHasRangedVersion(scannedPackage lockfile.PackageDetails) bool { } // sanitizeScannedPackages is used to sanitize scanned packages. -// 1. filters our packages that have a ranged version +// 1. filters out packages that have a ranged value in Version // 2. creates a PURL for each package and drops the package if it cannot be created +// Packages with VersionRange are allowed through with an empty Version so that +// downstream services can resolve them. func sanitizeScannedPackages(scannedPackages []lockfile.PackageDetails) ([]lockfile.PackageDetails, []string) { finalPackages := make([]lockfile.PackageDetails, 0, len(scannedPackages)) droppedReasons := make([]string, 0, len(scannedPackages)) diff --git a/pkg/scanner/datadog_sbom_generator_test.go b/pkg/scanner/datadog_sbom_generator_test.go index d9f16d42..c2efcdc3 100644 --- a/pkg/scanner/datadog_sbom_generator_test.go +++ b/pkg/scanner/datadog_sbom_generator_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/DataDog/datadog-sbom-generator/pkg/lockfile" + "github.com/DataDog/datadog-sbom-generator/pkg/models" "github.com/DataDog/datadog-sbom-generator/pkg/reporter" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -254,3 +255,24 @@ func Test_sanitizeScannedPackages_RangedVersionAreFiltered(t *testing.T) { assert.Empty(t, sanitizedPackages) assert.Len(t, errors, 3) } + +func Test_sanitizeScannedPackages_VersionRangesAreAllowed(t *testing.T) { + t.Parallel() + + scannedPackages := []lockfile.PackageDetails{ + { + Name: "requests", + VersionRange: ">=2.0,<3.0", + Ecosystem: models.EcosystemPyPI, + IsDirect: true, + }, + } + + sanitizedPackages, errors := sanitizeScannedPackages(scannedPackages) + + assert.Empty(t, errors) + require.Len(t, sanitizedPackages, 1) + assert.Empty(t, sanitizedPackages[0].Version) + assert.Equal(t, ">=2.0,<3.0", sanitizedPackages[0].VersionRange) + assert.Equal(t, "pkg:pypi/requests", sanitizedPackages[0].PURL) +} diff --git a/pkg/scanner/vulnerability_result.go b/pkg/scanner/vulnerability_result.go index a2b6bfee..4bb60ceb 100644 --- a/pkg/scanner/vulnerability_result.go +++ b/pkg/scanner/vulnerability_result.go @@ -23,6 +23,12 @@ func exportMetadata(rawPkg lockfile.PackageDetails, reachabilityAnalysisResults if rawPkg.IsDirect { metadata[models.IsDirectDependencyMetadata] = strconv.FormatBool(rawPkg.IsDirect) } + if rawPkg.VersionRange != "" { + metadata[models.VersionRangeMetadata] = rawPkg.VersionRange + } + if rawPkg.RequiresTransitiveEnrichment { + metadata[models.RequiresTransitiveEnrichmentMetadata] = strconv.FormatBool(rawPkg.RequiresTransitiveEnrichment) + } if rawPkg.Ecosystem.IsDevGroup(rawPkg.DepGroups) { metadata[models.IsDevDependencyMetadata] = strconv.FormatBool(true) } diff --git a/pkg/scanner/vulnerability_result_test.go b/pkg/scanner/vulnerability_result_test.go index 116adba4..3438cb24 100644 --- a/pkg/scanner/vulnerability_result_test.go +++ b/pkg/scanner/vulnerability_result_test.go @@ -139,6 +139,39 @@ func Test_exportMetadata(t *testing.T) { models.IsDevDependencyMetadata: "true", }, }, + { + name: "Package with version range", + rawPkg: lockfile.PackageDetails{ + VersionRange: ">=2.0,<3.0", + }, + reachabilityAnalysis: nil, + expectedMetadata: map[models.PackageMetadataType]string{ + models.VersionRangeMetadata: ">=2.0,<3.0", + }, + }, + { + name: "Manifest exact pin requires transitive enrichment", + rawPkg: lockfile.PackageDetails{ + Version: "2.28.0", + RequiresTransitiveEnrichment: true, + }, + reachabilityAnalysis: nil, + expectedMetadata: map[models.PackageMetadataType]string{ + models.RequiresTransitiveEnrichmentMetadata: "true", + }, + }, + { + name: "Manifest version range requires transitive enrichment", + rawPkg: lockfile.PackageDetails{ + VersionRange: ">=2.0,<3.0", + RequiresTransitiveEnrichment: true, + }, + reachabilityAnalysis: nil, + expectedMetadata: map[models.PackageMetadataType]string{ + models.VersionRangeMetadata: ">=2.0,<3.0", + models.RequiresTransitiveEnrichmentMetadata: "true", + }, + }, { name: "Opaque package", rawPkg: lockfile.PackageDetails{