Skip to content

Commit a77c687

Browse files
committed
Emit pyproject version ranges
1 parent 2ad59ab commit a77c687

4 files changed

Lines changed: 221 additions & 87 deletions

File tree

cmd/datadog-sbom-generator/__snapshots__/main_test.snap

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2377,6 +2377,33 @@ Scanned <rootdir>/fixtures/integration-npm/with-workspace/yarn.lock file and fou
23772377
]
23782378
}
23792379
},
2380+
{
2381+
"bom-ref": "pkg:pypi/numpy",
2382+
"type": "library",
2383+
"name": "numpy",
2384+
"purl": "pkg:pypi/numpy",
2385+
"properties": [
2386+
{
2387+
"name": "datadog:is-direct",
2388+
"value": "true"
2389+
},
2390+
{
2391+
"name": "datadog:package-manager",
2392+
"value": "uv"
2393+
},
2394+
{
2395+
"name": "datadog:version-range",
2396+
"value": "/u003e=1.24"
2397+
}
2398+
],
2399+
"evidence": {
2400+
"occurrences": [
2401+
{
2402+
"location": "{/"block/":{/"file_name/":/"pyproject.toml/",/"line_start/":8,/"line_end/":8,/"column_start/":5,/"column_end/":19,/"role/":/"manifest/"},/"name/":{/"file_name/":/"pyproject.toml/",/"line_start/":8,/"line_end/":8,/"column_start/":6,/"column_end/":11,/"role/":/"manifest/"},/"version/":{/"file_name/":/"pyproject.toml/",/"line_start/":8,/"line_end/":8,/"column_start/":11,/"column_end/":17,/"role/":/"manifest/"}}"
2403+
}
2404+
]
2405+
}
2406+
},
23802407
{
23812408
"bom-ref": "pkg:pypi/pytest@8.2.0",
23822409
"type": "library",
@@ -2436,7 +2463,7 @@ Scanned <rootdir>/fixtures/integration-npm/with-workspace/yarn.lock file and fou
24362463

24372464
[TestRun/Scan_pyproject.toml_without_lock_file - 2]
24382465
Scanning directory './fixtures/integration-pyproject', resolved absolute path '<rootdir>/fixtures/integration-pyproject'
2439-
Scanned <rootdir>/fixtures/integration-pyproject/pyproject.toml file and found 3 packages
2466+
Scanned <rootdir>/fixtures/integration-pyproject/pyproject.toml file and found 4 packages
24402467
[reachability] Reachability analysis is disabled
24412468
---
24422469

pkg/lockfile/fixtures/pyproject-toml-extractor/poetry-pinned/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ requests = "==2.28.0"
1212
flask = "==2.3.2"
1313
boto3 = "1.26.0"
1414
numpy = "^1.24"
15+
scipy = "1.*"

pkg/lockfile/python/parse-pyproject-toml.go

Lines changed: 166 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package python
33
import (
44
"fmt"
55
"io"
6+
"log"
67
"maps"
78
"path/filepath"
89
"slices"
@@ -147,20 +148,25 @@ func (e PyProjectTOMLExtractor) Extract(f lockfile.DepFile, context lockfile.Sca
147148

148149
lines := fileposition.BytesToLines(content)
149150
pm := detectPackageManager(&pyproject)
150-
packages := map[string]lockfile.PackageDetails{}
151+
collector := pyprojectPackageCollector{
152+
packages: map[string]lockfile.PackageDetails{},
153+
lines: lines,
154+
path: f.Path(),
155+
packageManager: pm,
156+
}
151157

152158
for _, dep := range pyproject.Project.Dependencies {
153-
if name, rawName, version, ok := parsePEP508Pin(dep); ok {
154-
block, nameLocation, versionLocation := extractPositions(lines, f.Path(), rawName, version, false)
155-
addOrMergeGroups(packages, name, version, []string{"prod"}, pm, block, nameLocation, versionLocation)
159+
dependency, ok := parsePEP508Dependency(dep)
160+
if ok {
161+
collector.addDependency(dependency, []string{"prod"}, false)
156162
}
157163
}
158164

159165
for group, deps := range pyproject.Project.OptionalDependencies {
160166
for _, dep := range deps {
161-
if name, rawName, version, ok := parsePEP508Pin(dep); ok {
162-
block, nameLocation, versionLocation := extractPositions(lines, f.Path(), rawName, version, false)
163-
addOrMergeGroups(packages, name, version, []string{group}, pm, block, nameLocation, versionLocation)
167+
dependency, ok := parsePEP508Dependency(dep)
168+
if ok {
169+
collector.addDependency(dependency, []string{group}, false)
164170
}
165171
}
166172
}
@@ -172,9 +178,9 @@ func (e PyProjectTOMLExtractor) Extract(f lockfile.DepFile, context lockfile.Sca
172178
// skip {include-group = "..."} table entries
173179
continue
174180
}
175-
if name, rawName, version, ok := parsePEP508Pin(dep); ok {
176-
block, nameLocation, versionLocation := extractPositions(lines, f.Path(), rawName, version, false)
177-
addOrMergeGroups(packages, name, version, []string{group}, pm, block, nameLocation, versionLocation)
181+
dependency, ok := parsePEP508Dependency(dep)
182+
if ok {
183+
collector.addDependency(dependency, []string{group}, false)
178184
}
179185
}
180186
}
@@ -185,147 +191,225 @@ func (e PyProjectTOMLExtractor) Extract(f lockfile.DepFile, context lockfile.Sca
185191
if name == "python" {
186192
continue
187193
}
188-
if version, ok := parsePoetryPin(val); ok {
189-
normalized := normalizedRequirementName(name)
190-
block, nameLocation, versionLocation := extractPositions(lines, f.Path(), name, version, true)
191-
addOrMergeGroups(packages, normalized, version, []string{"prod"}, pm, block, nameLocation, versionLocation)
194+
if version, versionRange, ok := parsePoetryDependency(val); ok {
195+
collector.addDependency(pep508Dependency{
196+
Name: normalizedRequirementName(name),
197+
RawName: name,
198+
Version: version,
199+
VersionRange: versionRange,
200+
}, []string{"prod"}, true)
192201
}
193202
}
194203
for name, val := range pyproject.Tool.Poetry.DevDependencies {
195-
if version, ok := parsePoetryPin(val); ok {
196-
normalized := normalizedRequirementName(name)
197-
block, nameLocation, versionLocation := extractPositions(lines, f.Path(), name, version, true)
198-
addOrMergeGroups(packages, normalized, version, []string{"dev"}, pm, block, nameLocation, versionLocation)
204+
if version, versionRange, ok := parsePoetryDependency(val); ok {
205+
collector.addDependency(pep508Dependency{
206+
Name: normalizedRequirementName(name),
207+
RawName: name,
208+
Version: version,
209+
VersionRange: versionRange,
210+
}, []string{"dev"}, true)
199211
}
200212
}
201213
for groupName, group := range pyproject.Tool.Poetry.Group {
202214
for name, val := range group.Dependencies {
203-
if version, ok := parsePoetryPin(val); ok {
204-
normalized := normalizedRequirementName(name)
205-
block, nameLocation, versionLocation := extractPositions(lines, f.Path(), name, version, true)
206-
addOrMergeGroups(packages, normalized, version, []string{groupName}, pm, block, nameLocation, versionLocation)
215+
if version, versionRange, ok := parsePoetryDependency(val); ok {
216+
collector.addDependency(pep508Dependency{
217+
Name: normalizedRequirementName(name),
218+
RawName: name,
219+
Version: version,
220+
VersionRange: versionRange,
221+
}, []string{groupName}, true)
207222
}
208223
}
209224
}
210225
}
211226

212-
return slices.Collect(maps.Values(packages)), nil
227+
return slices.Collect(maps.Values(collector.packages)), nil
213228
}
214229

215-
// addOrMergeGroups adds a package to the map, or if it already exists (same name+version),
216-
// merges the new dep groups into the existing entry rather than dropping the duplicate.
217-
func addOrMergeGroups(packages map[string]lockfile.PackageDetails, name, version string, groups []string, pm models.PackageManager, block models.FilePosition, nameLocation, versionLocation *models.FilePosition) {
218-
key := name + "@" + version
219-
if existing, exists := packages[key]; exists {
220-
for _, g := range groups {
221-
if !slices.Contains(existing.DepGroups, g) {
222-
existing.DepGroups = append(existing.DepGroups, g)
223-
}
224-
}
225-
packages[key] = existing
230+
type pyprojectPackageCollector struct {
231+
packages map[string]lockfile.PackageDetails
232+
lines []string
233+
path string
234+
packageManager models.PackageManager
235+
}
236+
237+
func (c *pyprojectPackageCollector) addDependency(dependency pep508Dependency, groups []string, isPoetry bool) {
238+
if !dependency.hasExactlyOneVersionValue() {
239+
log.Printf(
240+
"Skipping pyproject dependency %q from %s: expected exactly one of version or version range, got version=%q versionRange=%q\n",
241+
dependency.Name,
242+
c.path,
243+
dependency.Version,
244+
dependency.VersionRange,
245+
)
226246

227247
return
228248
}
229-
packages[key] = lockfile.PackageDetails{
230-
Name: name,
231-
Version: version,
232-
PackageManager: pm,
249+
250+
block, nameLocation, versionLocation := extractPositions(c.lines, c.path, dependency.RawName, versionOrRange(dependency.Version, dependency.VersionRange), isPoetry)
251+
c.addOrMergePackageGroups(lockfile.PackageDetails{
252+
Name: dependency.Name,
253+
Version: dependency.Version,
254+
VersionRange: dependency.VersionRange,
255+
PackageManager: c.packageManager,
233256
Ecosystem: models.EcosystemPyPI,
234257
IsDirect: true,
235258
DepGroups: groups,
236259
BlockLocation: block,
237260
NameLocation: nameLocation,
238261
VersionLocation: versionLocation,
239262
LocationRole: models.LocationRoleManifest,
263+
})
264+
}
265+
266+
// addOrMergePackageGroups adds a package to the map, or if it already exists (same name+version/range),
267+
// merges the new dep groups into the existing entry rather than dropping the duplicate.
268+
func (c *pyprojectPackageCollector) addOrMergePackageGroups(pkg lockfile.PackageDetails) {
269+
key := pkg.Name + "@" + pkg.Version + "|" + pkg.VersionRange
270+
if existing, exists := c.packages[key]; exists {
271+
for _, g := range pkg.DepGroups {
272+
if !slices.Contains(existing.DepGroups, g) {
273+
existing.DepGroups = append(existing.DepGroups, g)
274+
}
275+
}
276+
c.packages[key] = existing
277+
278+
return
240279
}
280+
c.packages[key] = pkg
241281
}
242282

243-
// parsePEP508Pin parses a PEP 508 dependency string and returns the normalized name, the raw
244-
// (pre-normalization) name as written in the file, and the version — only when the dependency
245-
// is an exact pin (==). Returns ok=false for all other specifiers.
246-
func parsePEP508Pin(dep string) (name, rawName, version string, ok bool) {
283+
func versionOrRange(version, versionRange string) string {
284+
if version != "" {
285+
return version
286+
}
287+
288+
return versionRange
289+
}
290+
291+
type pep508Dependency struct {
292+
Name string
293+
RawName string
294+
Version string
295+
VersionRange string
296+
}
297+
298+
func (d pep508Dependency) hasExactlyOneVersionValue() bool {
299+
return (d.Version == "") != (d.VersionRange == "")
300+
}
301+
302+
// parsePEP508Dependency parses a PEP 508 dependency string into a normalized name,
303+
// the raw name as written in the file, and either an exact version or original version range.
304+
func parsePEP508Dependency(dep string) (pep508Dependency, bool) {
247305
// strip environment markers (PEP 508)
248306
dep, _, _ = strings.Cut(dep, ";")
249307
dep = strings.TrimSpace(dep)
308+
if strings.Contains(dep, " @ ") {
309+
return pep508Dependency{}, false
310+
}
250311
// strip parenthesized specifier: "requests (==2.28.0)" -> "requests ==2.28.0"
251312
dep = strings.NewReplacer("(", "", ")", "").Replace(dep)
252313

253-
// reject if any non-exact operator is present
254-
for _, op := range []string{"===", "!=", ">=", "<=", "~=", ">", "<"} {
255-
if strings.Contains(dep, op) {
256-
return "", "", "", false
257-
}
314+
opIndex, op := findFirstPEP508Specifier(dep)
315+
if opIndex == -1 || op == "===" {
316+
return pep508Dependency{}, false
258317
}
259318

260-
rawNamePart, rawVersion, found := strings.Cut(dep, "==")
261-
if !found {
262-
return "", "", "", false
319+
// strip extras: "requests[security]" -> "requests"
320+
fileRawName, _, _ := strings.Cut(strings.TrimSpace(dep[:opIndex]), "[")
321+
fileRawName = strings.TrimSpace(fileRawName)
322+
specifier := strings.TrimSpace(dep[opIndex:])
323+
324+
if fileRawName == "" || specifier == "" {
325+
return pep508Dependency{}, false
263326
}
264327

265-
// reject multi-constraint specs: "==2.28.0,!=2.28.0"
266-
if strings.Contains(rawVersion, ",") {
267-
return "", "", "", false
328+
if op == "==" {
329+
rawVersion := strings.TrimSpace(specifier[len(op):])
330+
if rawVersion != "" && !strings.Contains(rawVersion, ",") && isConcreteVersion(rawVersion) {
331+
return pep508Dependency{
332+
Name: normalizedRequirementName(fileRawName),
333+
RawName: fileRawName,
334+
Version: rawVersion,
335+
}, true
336+
}
268337
}
269-
rawVersion = strings.TrimSpace(rawVersion)
270338

271-
// strip extras: "requests[security]" -> "requests"
272-
fileRawName, _, _ := strings.Cut(strings.TrimSpace(rawNamePart), "[")
273-
fileRawName = strings.TrimSpace(fileRawName)
339+
return pep508Dependency{
340+
Name: normalizedRequirementName(fileRawName),
341+
RawName: fileRawName,
342+
VersionRange: specifier,
343+
}, true
344+
}
274345

275-
if fileRawName == "" || rawVersion == "" || !isConcreteVersion(rawVersion) {
276-
return "", "", "", false
346+
func findFirstPEP508Specifier(dep string) (int, string) {
347+
firstIndex := -1
348+
firstOp := ""
349+
for _, op := range []string{"===", "==", "!=", ">=", "<=", "~=", ">", "<"} {
350+
index := strings.Index(dep, op)
351+
if index == -1 {
352+
continue
353+
}
354+
if firstIndex == -1 || index < firstIndex {
355+
firstIndex = index
356+
firstOp = op
357+
}
277358
}
278359

279-
return normalizedRequirementName(fileRawName), fileRawName, rawVersion, true
360+
return firstIndex, firstOp
280361
}
281362

282-
// parsePoetryPin parses a Poetry dependency value (string or inline table) and returns
283-
// the version only when it is an exact pin (== prefix) with a concrete version.
284-
func parsePoetryPin(val any) (version string, ok bool) {
363+
// parsePoetryDependency parses a Poetry dependency value (string or inline table) and
364+
// returns either an exact version or the original version range.
365+
func parsePoetryDependency(val any) (version, versionRange string, ok bool) {
285366
var versionStr string
286367
switch v := val.(type) {
287368
case string:
288369
versionStr = v
289370
case map[string]any:
371+
for _, directRefKey := range []string{"path", "git", "url"} {
372+
if _, exists := v[directRefKey]; exists {
373+
return "", "", false
374+
}
375+
}
290376
versionStr, ok = v["version"].(string)
291377
if !ok {
292-
return "", false
378+
return "", "", false
293379
}
294380
default:
295-
return "", false
381+
return "", "", false
296382
}
297383

298384
versionStr = strings.TrimSpace(versionStr)
385+
if versionStr == "" || strings.HasPrefix(versionStr, "===") {
386+
return "", "", false
387+
}
299388

300-
// Poetry bare version string "2.28.0" is an implicit exact pin
389+
// Poetry bare version string "2.28.0" is an implicit exact pin.
390+
// Other digit-starting constraints, such as "1.*", are still ranges.
301391
if len(versionStr) > 0 && !strings.ContainsAny(string(versionStr[0]), "=!<>~^*") {
302392
if strings.Contains(versionStr, ",") {
303-
return "", false
393+
return "", versionStr, true
304394
}
305395
if isConcreteVersion(versionStr) {
306-
return versionStr, true
396+
return versionStr, "", true
307397
}
308398

309-
return "", false
310-
}
311-
312-
// reject === (arbitrary equality) and any non-== operator
313-
if !strings.HasPrefix(versionStr, "==") || strings.HasPrefix(versionStr, "===") {
314-
return "", false
399+
return "", versionStr, true
315400
}
316401

317-
after := strings.TrimSpace(versionStr[2:])
402+
if strings.HasPrefix(versionStr, "==") {
403+
after := strings.TrimSpace(versionStr[2:])
318404

319-
// reject multi-constraint: "==2.28.0,!=2.28.1" is not an exact pin
320-
if strings.Contains(after, ",") {
321-
return "", false
322-
}
405+
if !strings.Contains(after, ",") && isConcreteVersion(after) {
406+
return after, "", true
407+
}
323408

324-
if isConcreteVersion(after) {
325-
return after, true
409+
return "", versionStr, true
326410
}
327411

328-
return "", false
412+
return "", versionStr, true
329413
}
330414

331415
// isConcreteVersion returns true if version looks like a fully-specified version

0 commit comments

Comments
 (0)