diff --git a/pkg/lockfile/fixtures/jar/bcprov-jdk18on-1.78.1.jar b/pkg/lockfile/fixtures/jar/bcprov-jdk18on-1.78.1.jar new file mode 100644 index 00000000..a835892f Binary files /dev/null and b/pkg/lockfile/fixtures/jar/bcprov-jdk18on-1.78.1.jar differ diff --git a/pkg/lockfile/fixtures/jar/gson-2.10.1.jar b/pkg/lockfile/fixtures/jar/gson-2.10.1.jar new file mode 100644 index 00000000..052604bc Binary files /dev/null and b/pkg/lockfile/fixtures/jar/gson-2.10.1.jar differ diff --git a/pkg/lockfile/fixtures/jar/has-both-1.0.0.jar b/pkg/lockfile/fixtures/jar/has-both-1.0.0.jar new file mode 100644 index 00000000..09389e84 Binary files /dev/null and b/pkg/lockfile/fixtures/jar/has-both-1.0.0.jar differ diff --git a/pkg/lockfile/fixtures/jar/manifest-no-bsn-1.0.0.jar b/pkg/lockfile/fixtures/jar/manifest-no-bsn-1.0.0.jar new file mode 100644 index 00000000..c4c9f8ff Binary files /dev/null and b/pkg/lockfile/fixtures/jar/manifest-no-bsn-1.0.0.jar differ diff --git a/pkg/lockfile/fixtures/jar/mylib-2.0.0.jar b/pkg/lockfile/fixtures/jar/mylib-2.0.0.jar new file mode 100644 index 00000000..a11348e9 Binary files /dev/null and b/pkg/lockfile/fixtures/jar/mylib-2.0.0.jar differ diff --git a/pkg/lockfile/fixtures/jar/runtime-3.26.0.jar b/pkg/lockfile/fixtures/jar/runtime-3.26.0.jar new file mode 100644 index 00000000..3a4a356d Binary files /dev/null and b/pkg/lockfile/fixtures/jar/runtime-3.26.0.jar differ diff --git a/pkg/lockfile/java/jar-manifest.go b/pkg/lockfile/java/jar-manifest.go new file mode 100644 index 00000000..99c4ac60 --- /dev/null +++ b/pkg/lockfile/java/jar-manifest.go @@ -0,0 +1,311 @@ +package java + +import ( + "archive/zip" + "bufio" + "path/filepath" + "strings" + + "github.com/DataDog/datadog-sbom-generator/internal/cachedregexp" + "github.com/DataDog/datadog-sbom-generator/pkg/lockfile" + "github.com/DataDog/datadog-sbom-generator/pkg/models" +) + +// jarFilenameRegex matches Maven-convention JAR filenames: artifactId-version.jar +// The version must start with a digit. The artifactId uses a greedy match so that +// the split occurs at the LAST hyphen-digit boundary, not the first. This correctly +// handles artifactIds that themselves contain a hyphen-digit segment, e.g. +// log4j-1.2-api-2.17.1.jar → artifactId="log4j-1.2-api", version="2.17.1". +var jarFilenameRegex = cachedregexp.MustCompile(`^(.+)-(\d.*)\.jar$`) + +// jarClassifierRegex matches known Maven classifier suffixes appended at the end of a +// version string in a JAR filename. Classifiers are either OS/architecture identifiers +// (linux, windows, osx, …) optionally followed by an arch token, or well-known +// descriptor strings (sources, javadoc, native, …). +// +// Version qualifiers such as -SNAPSHOT, -Final, -RC1 are intentionally excluded so +// they are never stripped. +var jarClassifierRegex = cachedregexp.MustCompile( + `-(?:linux|windows|osx|macos|darwin|freebsd|sunos|solaris|aix)(?:[_-][a-z0-9_]+)*$` + + `|-(?:sources|javadoc|tests|native|all|uber|shaded|assembly|no_aop)$`, +) + +// parseJarFilename extracts artifactId and version from a JAR filename following +// Maven naming conventions. If the version portion contains a known classifier suffix +// (e.g. "-linux-x86_64", "-sources"), the classifier is stripped so that only the +// canonical Maven version is returned. Returns empty strings if the filename doesn't +// match the expected pattern. +func parseJarFilename(filename string) (artifactID, version string) { + matches := jarFilenameRegex.FindStringSubmatch(filename) + if matches == nil { + return "", "" + } + + return matches[1], jarClassifierRegex.ReplaceAllString(matches[2], "") +} + +// cleanBundleSymbolicName strips OSGi directives (everything after the first ';') +// from a Bundle-SymbolicName value and trims whitespace. +func cleanBundleSymbolicName(raw string) string { + raw = strings.TrimSpace(raw) + if idx := strings.IndexByte(raw, ';'); idx >= 0 { + raw = strings.TrimSpace(raw[:idx]) + } + + return raw +} + +// cleanName normalizes a manifest attribute value for use as an artifactId: +// lowercases, replaces spaces with hyphens, removes non-alphanumeric chars +// (except hyphens, underscores, dots). +func cleanName(raw string) string { + raw = strings.TrimSpace(raw) + if raw == "" { + return "" + } + + raw = strings.ToLower(raw) + raw = strings.ReplaceAll(raw, " ", "-") + + var b strings.Builder + for _, r := range raw { + if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' || r == '.' { + b.WriteRune(r) + } + } + + return b.String() +} + +// manifestAttrs holds the MANIFEST.MF attributes relevant to fallback package inference. +// Raw attribute values are stored (not cleaned) so callers can apply appropriate cleaning. +type manifestAttrs struct { + bundleSymbolicName string + bundleName string + bundleVersion string + implVersion string + automaticModuleName string +} + +// parseManifestAttributes reads META-INF/MANIFEST.MF from a zip archive and +// returns the attributes relevant to fallback package inference. +// Parsing stops at the first blank line, which ends the main section; per-entry +// sections that follow must not overwrite main-section values. +func parseManifestAttributes(zipReader *zip.Reader) (manifestAttrs, error) { + var manifestEntry *zip.File + for _, entry := range zipReader.File { + if entry.Name == "META-INF/MANIFEST.MF" { + manifestEntry = entry + + break + } + } + + if manifestEntry == nil { + return manifestAttrs{}, nil + } + + rc, err := manifestEntry.Open() + if err != nil { + return manifestAttrs{}, err + } + defer rc.Close() + + // Parse MANIFEST.MF format: key-value pairs with continuation lines + // (lines starting with a space are appended to the previous value). + attrs := make(map[string]string) + + var currentKey string + + scanner := bufio.NewScanner(rc) + for scanner.Scan() { + line := scanner.Text() + + // A blank line ends the main section of the manifest. + // Per-entry sections follow and must not overwrite main-section attributes. + if line == "" { + break + } + + // Continuation line: starts with a single space + if strings.HasPrefix(line, " ") && currentKey != "" { + attrs[currentKey] += strings.TrimPrefix(line, " ") + + continue + } + + key, value, found := strings.Cut(line, ": ") + if !found { + currentKey = "" + + continue + } + + currentKey = key + attrs[currentKey] = value + } + + if err := scanner.Err(); err != nil { + return manifestAttrs{}, err + } + + return manifestAttrs{ + bundleSymbolicName: attrs["Bundle-SymbolicName"], + bundleName: attrs["Bundle-Name"], + bundleVersion: attrs["Bundle-Version"], + implVersion: attrs["Implementation-Version"], + automaticModuleName: attrs["Automatic-Module-Name"], + }, nil +} + +// parseGroupID infers a Maven groupId from MANIFEST.MF attributes using the +// dot-prefix heuristic from the Java Tracer's Dependency.java guessFallbackNoPom. +// +// Primary algorithm (BSN-based): +// 1. Build candidate names: [filenameArtifact, cleanName(bundleName)] +// 2. For each candidate, check if BSN ends with "." + candidate AND BSN contains "." AND len(BSN) > 5 +// 3. If match: groupId = BSN prefix (BSN minus "." + candidate) +// +// Fallback when BSN has no dots (poor OSGi metadata, e.g. Bundle-SymbolicName: "bcprov"): +// 1. Apply the same dot-prefix heuristic to Automatic-Module-Name +// 2. If no candidate matches, strip the last dot-segment of AMN as a best-effort groupId +// (requires AMN to have at least 2 dots for confidence) +// +// Final fallback: BSN as-is. +func parseGroupID(bundleSymbolicName, bundleName, filenameArtifact, automaticModuleName string) string { + if bundleSymbolicName == "" { + return "" + } + + // Build candidate list: filename artifact first, then cleaned bundle name + var candidates []string + if filenameArtifact != "" { + candidates = append(candidates, filenameArtifact) + } + + if cleanedBundleName := cleanName(bundleName); cleanedBundleName != "" { + candidates = append(candidates, cleanedBundleName) + } + + // Primary: dot-prefix heuristic on BSN + for _, candidate := range candidates { + suffix := "." + candidate + if strings.HasSuffix(bundleSymbolicName, suffix) && + strings.Contains(bundleSymbolicName, ".") && + len(bundleSymbolicName) > 5 { + return bundleSymbolicName[:len(bundleSymbolicName)-len(suffix)] + } + } + + // BSN-based inference failed. When BSN has no dots (e.g. "bcprov"), it carries + // no package hierarchy. Try Automatic-Module-Name as a more reliable source. + if !strings.Contains(bundleSymbolicName, ".") && automaticModuleName != "" { + // Dot-prefix heuristic on AMN + for _, candidate := range candidates { + suffix := "." + candidate + if strings.HasSuffix(automaticModuleName, suffix) && + strings.Contains(automaticModuleName, ".") && + len(automaticModuleName) > 5 { + return automaticModuleName[:len(automaticModuleName)-len(suffix)] + } + } + + // No candidate matched. Strip the last dot-segment of AMN as a best-effort + // groupId (e.g. "org.bouncycastle.provider" → "org.bouncycastle"). + // Require at least 2 dots (3 segments) to avoid over-truncating short names. + if strings.Count(automaticModuleName, ".") >= 2 { + if idx := strings.LastIndex(automaticModuleName, "."); idx > 0 { + return automaticModuleName[:idx] + } + } + } + + // No candidate matched; use BSN as-is + return bundleSymbolicName +} + +// resolveManifestPackage determines the final package name and version from +// MANIFEST.MF attributes and filename-derived values. +// +// ArtifactId: always the filename-derived artifact ID. +// Bundle-Name and Implementation-Title are OSGi/JAR display names and frequently +// do not match the Maven artifactId (e.g. Bundle-Name "bcprov" vs filename artifact +// "bcprov-jdk18on"). The filename is the authoritative source for the Maven coordinate. +// +// Version priority: bundleVersion==implVersion agreement > filenameVersion > bundleVersion > implVersion > "" +// +// Implementation-Title is intentionally excluded: it is an OSGi/JAR display name +// and is not reliably set to the Maven artifactId. +// +// Returns empty name if groupId or artifactId cannot be determined. +func resolveManifestPackage(filenameArtifact, filenameVersion, rawBSN, bundleName, bundleVersion, implVersion, automaticModuleName string) (name, version string) { + bsn := cleanBundleSymbolicName(rawBSN) + if bsn == "" { + return "", "" + } + + // artifactId comes from the filename: it is the most reliable source of the Maven + // artifact ID. Bundle-Name / Implementation-Title are display names only. + artifactID := filenameArtifact + if artifactID == "" { + return "", "" + } + + // Resolve version by priority + if bundleVersion != "" && bundleVersion == implVersion { + // High confidence: both sources agree + version = bundleVersion + } else if filenameVersion != "" { + version = filenameVersion + } else if bundleVersion != "" { + version = bundleVersion + } else { + version = implVersion + } + + // Resolve groupId using filename artifact and bundle name as candidates + // (not the resolved artifactId — groupId inference has its own candidate list) + groupID := parseGroupID(bsn, bundleName, filenameArtifact, automaticModuleName) + if groupID == "" { + return "", "" + } + + return groupID + ":" + artifactID, version +} + +// extractFromManifest attempts to infer a Maven package from the JAR's +// MANIFEST.MF attributes and filename. This is the fallback path used when +// no pom.properties are found inside the JAR. +func extractFromManifest(jarPath string, zipReader *zip.Reader, packages []lockfile.PackageDetails) []lockfile.PackageDetails { + filenameArtifact, filenameVersion := parseJarFilename(filepath.Base(jarPath)) + if filenameArtifact == "" { + // Filename doesn't match Maven convention; cannot infer package + return packages + } + + mf, err := parseManifestAttributes(zipReader) + if err != nil { + // Silently skip on parse error — this is a best-effort fallback + return packages + } + + name, version := resolveManifestPackage( + filenameArtifact, filenameVersion, + mf.bundleSymbolicName, mf.bundleName, mf.bundleVersion, + mf.implVersion, + mf.automaticModuleName, + ) + + if name == "" { + return packages + } + + return append(packages, lockfile.PackageDetails{ + Name: name, + Version: version, + PackageManager: jarPomPropertiesPackageManager, + Ecosystem: models.EcosystemMaven, + Opaque: true, + IsDirect: true, + }) +} diff --git a/pkg/lockfile/java/jar-manifest_test.go b/pkg/lockfile/java/jar-manifest_test.go new file mode 100644 index 00000000..723903ba --- /dev/null +++ b/pkg/lockfile/java/jar-manifest_test.go @@ -0,0 +1,759 @@ +package java + +import ( + "archive/zip" + "bytes" + "fmt" + "testing" +) + +// ============================================================================ +// T-1: parseJarFilename tests +// ============================================================================ + +func TestParseJarFilename(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + filename string + wantArtifactID string + wantVersion string + }{ + { + name: "standard Maven artifact", + filename: "commons-lang3-3.12.0.jar", + wantArtifactID: "commons-lang3", + wantVersion: "3.12.0", + }, + { + name: "version with qualifier", + filename: "spring-core-5.3.0-SNAPSHOT.jar", + wantArtifactID: "spring-core", + wantVersion: "5.3.0-SNAPSHOT", + }, + { + name: "simple artifact with version", + filename: "gson-2.10.1.jar", + wantArtifactID: "gson", + wantVersion: "2.10.1", + }, + { + name: "no version - no match", + filename: "some-lib.jar", + wantArtifactID: "", + wantVersion: "", + }, + { + name: "no extension", + filename: "lib-1.0.0", + wantArtifactID: "", + wantVersion: "", + }, + { + name: "version with dots", + filename: "bcprov-jdk18on-1.78.1.jar", + wantArtifactID: "bcprov-jdk18on", + wantVersion: "1.78.1", + }, + { + name: "single digit version", + filename: "lib-1.jar", + wantArtifactID: "lib", + wantVersion: "1", + }, + { + name: "empty filename", + filename: "", + wantArtifactID: "", + wantVersion: "", + }, + { + name: "just .jar", + filename: ".jar", + wantArtifactID: "", + wantVersion: "", + }, + { + name: "version starts with digit after hyphen", + filename: "log4j-api-2.17.1.jar", + wantArtifactID: "log4j-api", + wantVersion: "2.17.1", + }, + // Classifier stripping + { + name: "platform classifier linux-x86_64 stripped", + filename: "netty-tcnative-boringssl-static-2.0.61.Final-linux-x86_64.jar", + wantArtifactID: "netty-tcnative-boringssl-static", + wantVersion: "2.0.61.Final", + }, + { + name: "platform classifier linux-aarch_64 stripped", + filename: "netty-tcnative-boringssl-static-2.0.61.Final-linux-aarch_64.jar", + wantArtifactID: "netty-tcnative-boringssl-static", + wantVersion: "2.0.61.Final", + }, + { + name: "platform classifier osx-x86_64 stripped", + filename: "grpc-netty-1.50.0-osx-x86_64.jar", + wantArtifactID: "grpc-netty", + wantVersion: "1.50.0", + }, + { + // Regex splits at first -digit boundary: artifactId="guava", version="31.1-jre-sources". + // Classifier stripping then removes "-sources", leaving version="31.1-jre". + name: "sources classifier stripped", + filename: "guava-31.1-jre-sources.jar", + wantArtifactID: "guava", + wantVersion: "31.1-jre", + }, + { + name: "SNAPSHOT qualifier not stripped", + filename: "spring-core-5.3.0-SNAPSHOT.jar", + wantArtifactID: "spring-core", + wantVersion: "5.3.0-SNAPSHOT", + }, + { + name: "Final qualifier not stripped", + filename: "netty-tcnative-2.0.61.Final.jar", + wantArtifactID: "netty-tcnative", + wantVersion: "2.0.61.Final", + }, + { + // artifactId contains a hyphen-digit segment; greedy match finds the + // rightmost -\d boundary so the split is correct. + name: "artifactId with embedded hyphen-digit segment", + filename: "log4j-1.2-api-2.17.1.jar", + wantArtifactID: "log4j-1.2-api", + wantVersion: "2.17.1", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + gotArtifact, gotVersion := parseJarFilename(tt.filename) + if gotArtifact != tt.wantArtifactID { + t.Errorf("parseJarFilename(%q) artifactId = %q, want %q", tt.filename, gotArtifact, tt.wantArtifactID) + } + if gotVersion != tt.wantVersion { + t.Errorf("parseJarFilename(%q) version = %q, want %q", tt.filename, gotVersion, tt.wantVersion) + } + }) + } +} + +// ============================================================================ +// T-1: cleanBundleSymbolicName tests +// ============================================================================ + +func TestCleanBundleSymbolicName(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + raw string + want string + }{ + { + name: "no directives", + raw: "org.bouncycastle.bcprov-jdk18on", + want: "org.bouncycastle.bcprov-jdk18on", + }, + { + name: "singleton directive", + raw: "org.eclipse.core.runtime;singleton:=true", + want: "org.eclipse.core.runtime", + }, + { + name: "multiple directives", + raw: "com.example.bundle;singleton:=true;lazy:=true", + want: "com.example.bundle", + }, + { + name: "empty string", + raw: "", + want: "", + }, + { + name: "whitespace around value", + raw: " org.example.bundle ", + want: "org.example.bundle", + }, + { + name: "whitespace before semicolon", + raw: "org.example.bundle ;singleton:=true", + want: "org.example.bundle", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := cleanBundleSymbolicName(tt.raw) + if got != tt.want { + t.Errorf("cleanBundleSymbolicName(%q) = %q, want %q", tt.raw, got, tt.want) + } + }) + } +} + +// ============================================================================ +// T-1: cleanName tests +// ============================================================================ + +func TestCleanName(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + raw string + want string + }{ + { + name: "simple name", + raw: "Gson", + want: "gson", + }, + { + name: "name with spaces", + raw: "Apache Commons Lang", + want: "apache-commons-lang", + }, + { + name: "empty string", + raw: "", + want: "", + }, + { + name: "already clean", + raw: "my-lib", + want: "my-lib", + }, + { + name: "underscores preserved", + raw: "my_lib", + want: "my_lib", + }, + { + name: "dots preserved", + raw: "org.eclipse.osgi", + want: "org.eclipse.osgi", + }, + { + name: "mixed case with special chars", + raw: "Bouncy Castle Provider", + want: "bouncy-castle-provider", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := cleanName(tt.raw) + if got != tt.want { + t.Errorf("cleanName(%q) = %q, want %q", tt.raw, got, tt.want) + } + }) + } +} + +// ============================================================================ +// T-1: parseManifestAttributes tests +// ============================================================================ + +// createTestJar creates an in-memory JAR (ZIP) with a MANIFEST.MF containing the given content. +func createTestJar(t *testing.T, manifestContent string) *zip.Reader { + t.Helper() + + buf := new(bytes.Buffer) + w := zip.NewWriter(buf) + + f, err := w.Create("META-INF/MANIFEST.MF") + if err != nil { + t.Fatalf("failed to create MANIFEST.MF in test jar: %v", err) + } + + _, err = f.Write([]byte(manifestContent)) + if err != nil { + t.Fatalf("failed to write MANIFEST.MF content: %v", err) + } + + if err := w.Close(); err != nil { + t.Fatalf("failed to close test jar: %v", err) + } + + reader, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatalf("failed to open test jar: %v", err) + } + + return reader +} + +func TestParseManifestAttributes(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + manifest string + wantBSN string + wantBundleName string + wantBundleVersion string + wantImplVersion string + wantAMN string + wantErr bool + }{ + { + name: "full OSGi manifest", + manifest: "Manifest-Version: 1.0\r\n" + + "Bundle-SymbolicName: org.bouncycastle.bcprov-jdk18on\r\n" + + "Bundle-Name: bcprov\r\n" + + "Bundle-Version: 1.78.1\r\n" + + "Implementation-Title: bcprov\r\n" + + "Implementation-Version: 1.78.1\r\n", + wantBSN: "org.bouncycastle.bcprov-jdk18on", + wantBundleName: "bcprov", + wantBundleVersion: "1.78.1", + wantImplVersion: "1.78.1", + }, + { + name: "only manifest version", + manifest: "Manifest-Version: 1.0\r\n", + wantBSN: "", + wantBundleName: "", + wantBundleVersion: "", + wantImplVersion: "", + }, + { + name: "BSN with singleton directive", + manifest: "Manifest-Version: 1.0\r\n" + + "Bundle-SymbolicName: org.eclipse.core.runtime;singleton:=true\r\n" + + "Bundle-Version: 3.26.0\r\n", + wantBSN: "org.eclipse.core.runtime;singleton:=true", + wantBundleName: "", + wantBundleVersion: "3.26.0", + wantImplVersion: "", + }, + { + name: "implementation attributes only", + manifest: "Manifest-Version: 1.0\r\n" + + "Implementation-Title: Gson\r\n" + + "Implementation-Version: 2.10.1\r\n", + wantBSN: "", + wantBundleName: "", + wantBundleVersion: "", + wantImplVersion: "2.10.1", + }, + { + name: "continuation lines", + manifest: "Manifest-Version: 1.0\r\n" + + "Bundle-SymbolicName: org.example.very.long.symbolic\r\n" + + " .name.continued\r\n" + + "Bundle-Version: 1.0.0\r\n", + wantBSN: "org.example.very.long.symbolic.name.continued", + wantBundleName: "", + wantBundleVersion: "1.0.0", + wantImplVersion: "", + }, + { + // Per-entry sections follow the blank line that ends the main section. + // Attributes in those sections must not overwrite main-section values. + name: "per-entry section ignored after blank line", + manifest: "Manifest-Version: 1.0\r\n" + + "Bundle-SymbolicName: org.bouncycastle.bcprov\r\n" + + "Bundle-Version: 1.78.1\r\n" + + "\r\n" + + "Name: com/example/SomeClass.class\r\n" + + "Bundle-SymbolicName: com.attacker.evil\r\n" + + "Implementation-Version: 99.0.0\r\n", + wantBSN: "org.bouncycastle.bcprov", + wantBundleName: "", + wantBundleVersion: "1.78.1", + wantImplVersion: "", + }, + { + name: "Automatic-Module-Name extracted", + manifest: "Manifest-Version: 1.0\r\n" + + "Bundle-SymbolicName: bcprov\r\n" + + "Bundle-Name: bcprov\r\n" + + "Bundle-Version: 1.78.1\r\n" + + "Automatic-Module-Name: org.bouncycastle.provider\r\n", + wantBSN: "bcprov", + wantBundleName: "bcprov", + wantBundleVersion: "1.78.1", + wantAMN: "org.bouncycastle.provider", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + reader := createTestJar(t, tt.manifest) + mf, err := parseManifestAttributes(reader) + if (err != nil) != tt.wantErr { + t.Errorf("parseManifestAttributes() error = %v, wantErr %v", err, tt.wantErr) + return + } + if mf.bundleSymbolicName != tt.wantBSN { + t.Errorf("BSN = %q, want %q", mf.bundleSymbolicName, tt.wantBSN) + } + if mf.bundleName != tt.wantBundleName { + t.Errorf("BundleName = %q, want %q", mf.bundleName, tt.wantBundleName) + } + if mf.bundleVersion != tt.wantBundleVersion { + t.Errorf("BundleVersion = %q, want %q", mf.bundleVersion, tt.wantBundleVersion) + } + if mf.implVersion != tt.wantImplVersion { + t.Errorf("ImplVersion = %q, want %q", mf.implVersion, tt.wantImplVersion) + } + if mf.automaticModuleName != tt.wantAMN { + t.Errorf("AutomaticModuleName = %q, want %q", mf.automaticModuleName, tt.wantAMN) + } + }) + } +} + +func TestParseManifestAttributes_NoManifest(t *testing.T) { + t.Parallel() + + // Create a JAR with no MANIFEST.MF + buf := new(bytes.Buffer) + w := zip.NewWriter(buf) + + f, err := w.Create("some/other/file.txt") + if err != nil { + t.Fatalf("failed to create file: %v", err) + } + + fmt.Fprintln(f, "hello") + + if err := w.Close(); err != nil { + t.Fatalf("failed to close jar: %v", err) + } + + reader, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatalf("failed to open jar: %v", err) + } + + mf, err := parseManifestAttributes(reader) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + // All should be empty when no MANIFEST.MF exists + if mf != (manifestAttrs{}) { + t.Errorf("expected zero manifestAttrs, got %+v", mf) + } +} + +// ============================================================================ +// T-2: parseGroupID tests +// ============================================================================ + +func TestParseGroupId(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + bundleSymbolicName string + bundleName string + filenameArtifact string + automaticModuleName string + want string + }{ + { + name: "bouncycastle - filename candidate matches BSN suffix", + bundleSymbolicName: "org.bouncycastle.bcprov-jdk18on", + bundleName: "", + filenameArtifact: "bcprov-jdk18on", + want: "org.bouncycastle", + }, + { + name: "google gson - cleaned bundle name matches", + bundleSymbolicName: "com.google.gson", + bundleName: "Gson", + filenameArtifact: "gson", + want: "com.google", + }, + { + name: "eclipse osgi - no candidate match, fallback to BSN", + bundleSymbolicName: "org.eclipse.osgi", + bundleName: "", + filenameArtifact: "org.eclipse.osgi", + want: "org.eclipse.osgi", + }, + { + name: "short BSN - len <= 5, fallback to BSN", + bundleSymbolicName: "ab.cd", + bundleName: "", + filenameArtifact: "cd", + want: "ab.cd", + }, + { + name: "BSN with no dots and no AMN - fallback to BSN", + bundleSymbolicName: "mybundle", + bundleName: "", + filenameArtifact: "mybundle", + want: "mybundle", + }, + { + name: "empty BSN", + bundleSymbolicName: "", + bundleName: "", + filenameArtifact: "somelib", + want: "", + }, + { + name: "filename matches but bundleName also matches - filename tried first", + bundleSymbolicName: "org.apache.commons-lang3", + bundleName: "", + filenameArtifact: "commons-lang3", + want: "org.apache", + }, + { + name: "bundleName candidate matches over filename", + bundleSymbolicName: "com.example.mylib", + bundleName: "MyLib", + filenameArtifact: "something-else", + want: "com.example", + }, + { + name: "BSN exactly 6 chars with dot - len > 5 passes", + bundleSymbolicName: "ab.cde", + bundleName: "", + filenameArtifact: "cde", + want: "ab", + }, + { + name: "candidate same as BSN - no stripping", + bundleSymbolicName: "org.example", + bundleName: "", + filenameArtifact: "org.example", + want: "org.example", + }, + // AMN fallback cases (BSN has no dots) + { + // BouncyCastle: BSN="bcprov" has no dots; AMN="org.bouncycastle.provider" + // has 3 segments so last-segment stripping gives "org.bouncycastle". + name: "AMN last-segment strip when BSN has no dots - bouncycastle", + bundleSymbolicName: "bcprov", + bundleName: "bcprov", + filenameArtifact: "bcprov-jdk15to18", + automaticModuleName: "org.bouncycastle.provider", + want: "org.bouncycastle", + }, + { + // AMN "com.mylib" has only 1 dot (2 segments). The dot-prefix heuristic + // can't match because filenameArtifact differs from AMN's suffix, so it + // falls through to the last-segment threshold check (requires >= 2 dots). + // With only 1 dot the strip is skipped and BSN is returned as-is. + name: "AMN with 1 dot - below threshold, fallback to BSN", + bundleSymbolicName: "mylib", + bundleName: "", + filenameArtifact: "unrelated-artifact", + automaticModuleName: "com.mylib", + want: "mylib", + }, + { + // BSN has dots → AMN path is skipped; BSN dot-prefix is used. + name: "BSN with dots - AMN path not taken", + bundleSymbolicName: "org.bouncycastle.bcprov-jdk18on", + bundleName: "", + filenameArtifact: "bcprov-jdk18on", + automaticModuleName: "org.bouncycastle.provider", + want: "org.bouncycastle", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := parseGroupID(tt.bundleSymbolicName, tt.bundleName, tt.filenameArtifact, tt.automaticModuleName) + if got != tt.want { + t.Errorf("parseGroupID(%q, %q, %q, %q) = %q, want %q", + tt.bundleSymbolicName, tt.bundleName, tt.filenameArtifact, tt.automaticModuleName, got, tt.want) + } + }) + } +} + +// ============================================================================ +// T-3: resolveManifestPackage tests +// ============================================================================ + +func TestResolveManifestPackage(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + filenameArtifact string + filenameVersion string + bsn string + bundleName string + bundleVersion string + implVersion string + automaticModuleName string + wantName string + wantVersion string + }{ + { + // JAR has BSN with dots: dot-prefix heuristic gives groupId=org.bouncycastle. + // Bundle-Name "bcprov" is a display name; filename artifact "bcprov-jdk18on" + // is the correct Maven artifactId and is always preferred. + name: "bouncycastle with full BSN - filename artifact preferred over bundle name", + filenameArtifact: "bcprov-jdk18on", + filenameVersion: "1.78.1", + bsn: "org.bouncycastle.bcprov-jdk18on", + bundleName: "bcprov", + bundleVersion: "1.78.1", + implVersion: "1.78.1", + wantName: "org.bouncycastle:bcprov-jdk18on", + wantVersion: "1.78.1", + }, + { + // Real bcprov-jdk15to18: BSN="bcprov" (no dots), Bundle-Name="bcprov" (same as BSN), + // AMN="org.bouncycastle.provider". Expects filename artifact and AMN-derived groupId. + name: "bouncycastle jdk15to18 - AMN groupId + filename artifactId", + filenameArtifact: "bcprov-jdk15to18", + filenameVersion: "1.78.1", + bsn: "bcprov", + bundleName: "bcprov", + bundleVersion: "1..78.1", + implVersion: "1.78.1.0", + automaticModuleName: "org.bouncycastle.provider", + wantName: "org.bouncycastle:bcprov-jdk15to18", + wantVersion: "1.78.1", + }, + { + name: "version agreement - bundle==impl preferred", + filenameArtifact: "mylib", + filenameVersion: "2.0.0", + bsn: "com.example.mylib", + bundleName: "", + bundleVersion: "1.5.0", + implVersion: "1.5.0", + wantName: "com.example:mylib", + wantVersion: "1.5.0", + }, + { + name: "version disagreement - filename preferred", + filenameArtifact: "mylib", + filenameVersion: "2.0.0", + bsn: "com.example.mylib", + bundleName: "", + bundleVersion: "1.5.0", + implVersion: "1.6.0", + wantName: "com.example:mylib", + wantVersion: "2.0.0", + }, + { + name: "no filename version - bundle version fallback", + filenameArtifact: "mylib", + filenameVersion: "", + bsn: "com.example.mylib", + bundleName: "", + bundleVersion: "1.5.0", + implVersion: "", + wantName: "com.example:mylib", + wantVersion: "1.5.0", + }, + { + name: "no filename version no bundle version - impl version fallback", + filenameArtifact: "mylib", + filenameVersion: "", + bsn: "com.example.mylib", + bundleName: "", + bundleVersion: "", + implVersion: "3.0.0", + wantName: "com.example:mylib", + wantVersion: "3.0.0", + }, + { + // Bundle-Name and Implementation-Title are display names; filename artifact + // is always used as the Maven artifactId regardless. + name: "filename artifact used despite bundle name and impl title being present", + filenameArtifact: "filename-art", + filenameVersion: "1.0.0", + bsn: "com.example.filename-art", + bundleName: "My Bundle", + bundleVersion: "1.0.0", + implVersion: "1.0.0", + wantName: "com.example:filename-art", + wantVersion: "1.0.0", + }, + { + name: "filename artifact used when no bundle name or impl title", + filenameArtifact: "filename-art", + filenameVersion: "1.0.0", + bsn: "com.example.filename-art", + bundleName: "", + bundleVersion: "1.0.0", + implVersion: "1.0.0", + wantName: "com.example:filename-art", + wantVersion: "1.0.0", + }, + { + name: "artifactId priority - filename when no bundleName or implTitle", + filenameArtifact: "filename-art", + filenameVersion: "1.0.0", + bsn: "com.example.filename-art", + bundleName: "", + bundleVersion: "1.0.0", + implVersion: "1.0.0", + wantName: "com.example:filename-art", + wantVersion: "1.0.0", + }, + { + name: "empty BSN - no package emitted", + filenameArtifact: "mylib", + filenameVersion: "1.0.0", + bsn: "", + bundleName: "", + bundleVersion: "", + implVersion: "", + wantName: "", + wantVersion: "", + }, + { + name: "no version anywhere - still emits package", + filenameArtifact: "mylib", + filenameVersion: "", + bsn: "com.example.mylib", + bundleName: "", + bundleVersion: "", + implVersion: "", + wantName: "com.example:mylib", + wantVersion: "", + }, + { + name: "BSN with singleton directive - gets cleaned", + filenameArtifact: "runtime", + filenameVersion: "3.26.0", + bsn: "org.eclipse.core.runtime;singleton:=true", + bundleName: "", + bundleVersion: "3.26.0", + implVersion: "", + wantName: "org.eclipse.core:runtime", + wantVersion: "3.26.0", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + gotName, gotVersion := resolveManifestPackage( + tt.filenameArtifact, tt.filenameVersion, + tt.bsn, tt.bundleName, tt.bundleVersion, + tt.implVersion, + tt.automaticModuleName, + ) + if gotName != tt.wantName { + t.Errorf("resolveManifestPackage() name = %q, want %q", gotName, tt.wantName) + } + if gotVersion != tt.wantVersion { + t.Errorf("resolveManifestPackage() version = %q, want %q", gotVersion, tt.wantVersion) + } + }) + } +} diff --git a/pkg/lockfile/java/parse-jar-pom-properties.go b/pkg/lockfile/java/parse-jar-pom-properties.go index c90fbdbb..bc935113 100644 --- a/pkg/lockfile/java/parse-jar-pom-properties.go +++ b/pkg/lockfile/java/parse-jar-pom-properties.go @@ -86,6 +86,11 @@ func (e JarPomPropertiesExtractor) Extract(f lockfile.DepFile, context lockfile. }) } + // Fallback: if no pom.properties found, try to infer from MANIFEST.MF + filename + if len(packages) == 0 { + packages = extractFromManifest(f.Path(), zipReader, packages) + } + return packages, nil } diff --git a/pkg/lockfile/java/parse-jar-pom-properties_test.go b/pkg/lockfile/java/parse-jar-pom-properties_test.go index bd6f2957..17ae1569 100644 --- a/pkg/lockfile/java/parse-jar-pom-properties_test.go +++ b/pkg/lockfile/java/parse-jar-pom-properties_test.go @@ -215,3 +215,159 @@ func TestParseJarPomProperties_MalformedPomProperties(t *testing.T) { // Malformed pom.properties (missing groupId) should be skipped gracefully testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{}) } + +// ============================================================================ +// MANIFEST.MF Fallback Integration Tests +// ============================================================================ + +func TestParseJarPomProperties_ManifestFallback_Bouncycastle(t *testing.T) { + t.Parallel() + + dir, err := os.Getwd() + if err != nil { + t.Fatalf("Got unexpected error: %v", err) + } + + path := filepath.Join(dir, "../fixtures/jar/bcprov-jdk18on-1.78.1.jar") + packages, err := java.ParseJarPomProperties(path) + if err != nil { + t.Errorf("Got unexpected error: %v", err) + } + + testutil.ExpectPackagesWithoutLocations(t, packages, []lockfile.PackageDetails{ + { + Name: "org.bouncycastle:bcprov-jdk18on", + Version: "1.78.1", + PackageManager: models.Maven, + Ecosystem: models.EcosystemMaven, + Opaque: true, + IsDirect: true, + }, + }) +} + +func TestParseJarPomProperties_ManifestFallback_NoBundleSymbolicName(t *testing.T) { + t.Parallel() + + dir, err := os.Getwd() + if err != nil { + t.Fatalf("Got unexpected error: %v", err) + } + + path := filepath.Join(dir, "../fixtures/jar/manifest-no-bsn-1.0.0.jar") + packages, err := java.ParseJarPomProperties(path) + if err != nil { + t.Errorf("Got unexpected error: %v", err) + } + + // No Bundle-SymbolicName means no groupId can be inferred -> no package emitted + testutil.ExpectPackages(t, packages, []lockfile.PackageDetails{}) +} + +func TestParseJarPomProperties_ManifestFallback_VersionPriority(t *testing.T) { + t.Parallel() + + dir, err := os.Getwd() + if err != nil { + t.Fatalf("Got unexpected error: %v", err) + } + + // Bundle-Version=1.5.0, Implementation-Version=1.6.0, filename=2.0.0 + // Since they disagree, filename version wins + path := filepath.Join(dir, "../fixtures/jar/mylib-2.0.0.jar") + packages, err := java.ParseJarPomProperties(path) + if err != nil { + t.Errorf("Got unexpected error: %v", err) + } + + testutil.ExpectPackagesWithoutLocations(t, packages, []lockfile.PackageDetails{ + { + Name: "com.example:mylib", + Version: "2.0.0", + PackageManager: models.Maven, + Ecosystem: models.EcosystemMaven, + Opaque: true, + IsDirect: true, + }, + }) +} + +func TestParseJarPomProperties_ManifestFallback_SingletonDirective(t *testing.T) { + t.Parallel() + + dir, err := os.Getwd() + if err != nil { + t.Fatalf("Got unexpected error: %v", err) + } + + path := filepath.Join(dir, "../fixtures/jar/runtime-3.26.0.jar") + packages, err := java.ParseJarPomProperties(path) + if err != nil { + t.Errorf("Got unexpected error: %v", err) + } + + testutil.ExpectPackagesWithoutLocations(t, packages, []lockfile.PackageDetails{ + { + Name: "org.eclipse.core:runtime", + Version: "3.26.0", + PackageManager: models.Maven, + Ecosystem: models.EcosystemMaven, + Opaque: true, + IsDirect: true, + }, + }) +} + +func TestParseJarPomProperties_PomPropertiesTakesPrecedence(t *testing.T) { + t.Parallel() + + dir, err := os.Getwd() + if err != nil { + t.Fatalf("Got unexpected error: %v", err) + } + + // JAR has both pom.properties and MANIFEST.MF - pom.properties should win + path := filepath.Join(dir, "../fixtures/jar/has-both-1.0.0.jar") + packages, err := java.ParseJarPomProperties(path) + if err != nil { + t.Errorf("Got unexpected error: %v", err) + } + + testutil.ExpectPackagesWithoutLocations(t, packages, []lockfile.PackageDetails{ + { + Name: "com.example:has-both", + Version: "1.0.0", + PackageManager: models.Maven, + Ecosystem: models.EcosystemMaven, + Opaque: true, + IsDirect: true, + }, + }) +} + +func TestParseJarPomProperties_ManifestFallback_VersionAgreement(t *testing.T) { + t.Parallel() + + dir, err := os.Getwd() + if err != nil { + t.Fatalf("Got unexpected error: %v", err) + } + + // Bundle-Version == Implementation-Version -> high confidence, use it + path := filepath.Join(dir, "../fixtures/jar/gson-2.10.1.jar") + packages, err := java.ParseJarPomProperties(path) + if err != nil { + t.Errorf("Got unexpected error: %v", err) + } + + testutil.ExpectPackagesWithoutLocations(t, packages, []lockfile.PackageDetails{ + { + Name: "com.google:gson", + Version: "2.10.1", + PackageManager: models.Maven, + Ecosystem: models.EcosystemMaven, + Opaque: true, + IsDirect: true, + }, + }) +}