Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 53 additions & 53 deletions src/main/java/com/github/packageurl/PackageURL.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,6 @@ public PackageURL(final String type, final String name) throws MalformedPackageU
public PackageURL(final String type, final String namespace, final String name, final String version,
final TreeMap<String, String> qualifiers, final String subpath)
throws MalformedPackageURLException {

this.scheme = validateScheme("pkg");
this.type = validateType(type);
this.namespace = validateNamespace(namespace);
this.name = validateName(name);
Expand All @@ -106,9 +104,14 @@ public PackageURL(final String type, final String namespace, final String name,
}

/**
* The PackageURL scheme constant
* The PackageURL scheme constant.
*/
public static final String SCHEME = "pkg";

/**
* The PackageURL scheme ({@code "pkg"}) constant followed by a colon ({@code ':'}).
*/
private String scheme;
private static final String SCHEME_PART = SCHEME + ':';

/**
* The package "type" or package "protocol" such as maven, npm, nuget, gem, pypi, etc.
Expand Down Expand Up @@ -170,7 +173,7 @@ public PackageURLBuilder toBuilder() {
* @since 1.0.0
*/
public String getScheme() {
return scheme;
return SCHEME;
}

/**
Expand Down Expand Up @@ -233,11 +236,10 @@ public String getSubpath() {
return subpath;
}

private String validateScheme(final String value) throws MalformedPackageURLException {
if ("pkg".equals(value)) {
return "pkg";
}
throw new MalformedPackageURLException("The PackageURL scheme is invalid");
private void validateScheme(final String value) throws MalformedPackageURLException {
if (!SCHEME.equals(value)) {
throw new MalformedPackageURLException("The PackageURL scheme '" + value + "' is invalid. It should be '" + SCHEME + "'");
}
}

private String validateType(final String value) throws MalformedPackageURLException {
Expand Down Expand Up @@ -397,7 +399,7 @@ public String canonicalize() {
*/
private String canonicalize(boolean coordinatesOnly) {
final StringBuilder purl = new StringBuilder();
purl.append(scheme).append(":");
purl.append(SCHEME_PART);
if (type != null) {
purl.append(type);
}
Expand Down Expand Up @@ -519,73 +521,73 @@ public static String uriDecode(String source) {
*/
private void parse(final String purl) throws MalformedPackageURLException {
if (purl == null || purl.trim().isEmpty()) {
throw new MalformedPackageURLException("Invalid purl: Contains an empty or null value");
throw new MalformedPackageURLException("Invalid purl: Is empty or null");
}

try {
final URI uri = new URI(purl);
// Check to ensure that none of these parts are parsed. If so, it's an invalid purl.
if (uri.getUserInfo() != null || uri.getPort() != -1) {
throw new MalformedPackageURLException("Invalid purl: Contains parts not supported by the purl spec");
if (!purl.startsWith(SCHEME_PART)) {
throw new MalformedPackageURLException("Invalid purl: " + purl + ". It does not start with '" + SCHEME_PART + "'");
}

this.scheme = validateScheme(uri.getScheme());
final int length = purl.length();
int start = SCHEME_PART.length();

// subpath is optional - check for existence
if (uri.getRawFragment() != null && !uri.getRawFragment().isEmpty()) {
this.subpath = validatePath(parsePath(uri.getRawFragment(), true), true);
while (start < length && '/' == purl.charAt(start)) {
start++;
}
// This is the purl (minus the scheme) that needs parsed.
final StringBuilder remainder = new StringBuilder(uri.getRawSchemeSpecificPart());

// qualifiers are optional - check for existence
int index = remainder.lastIndexOf("?");
if (index >= 0) {
this.qualifiers = parseQualifiers(remainder.substring(index + 1));
remainder.setLength(index);
final URI uri = new URI(String.join("/", SCHEME_PART, purl.substring(start)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice trick! 💯

This way we get the query parsing for free.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, as I tried to explain in the commit message, the reason that the parsing was not working in the original code is that the canonical purl really should have a single '/', as in "pkg:/", not "pkg:". According to the javadocs for java.net.URI, if it has no '/', then it's not a hierarchical URI, and will not be parsed into its components.

Also according to RFC 2396, having two slashes as in "pkg://" is not allowed unless you have an authority component. In other words, two slashes will make it treat the purl type as the host portion of the URI in this case instead of part if the path.

So the purl spec is not following the RFC is a couple ways. In the past, parsers were more accepting of invalid input, but I am not sure that this is a good idea to keep allowing this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PURL spec does follow RFC 2396 and 3986 as far as I can tell. It just chooses to use the opaque_part syntax from RFC 2396.

I agree that using the hierarchical pkg:/ syntax would be easier for implementations, especially in languages that don't have an implementation of RFC 3986. Maybe it is worth to open an issue in the spec repo.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But, the spec specifically implies that it is a hierarchical:

The purl components are mapped to these URL components:

purl scheme: this is a URL scheme with a constant value: pkg
purl type, namespace, name and version components: these are collectively mapped to a URL path
purl qualifiers: this maps to a URL query
purl subpath: this is a URL fragment

But, none of this "maps" as written.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really expect that they will change any of this, but they really ought to stop referencing wikipedia and reference one or both of these RFCs to clarify these things.

RFC 2396 says:

absoluteURI = scheme ":" ( hier_part | opaque_part )
hier_part = ( net_path | abs_path ) [ "?" query ]
net_path = "//" authority [ abs_path ]
abs_path = "/" path_segments

which means: no empty authority and hier_part has to be an abs_path.

So, I am not sure why you say RFC 2396 and 3986 differ as to this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RFC 2396 says:

absoluteURI = scheme ":" ( hier_part | opaque_part )
hier_part = ( net_path | abs_path ) [ "?" query ]
net_path = "//" authority [ abs_path ]
abs_path = "/" path_segments

which means: no empty authority and hier_part has to be an abs_path.

Currently the PURLs don't have a solid / after the schema, so it is of the form scheme ":" opaque_part.

So, I am not sure why you say RFC 2396 and 3986 differ as to this.

RFC 3986 improved the grammar, by replacing opaque_part with path-rootless [ "?" query ], so a compliant RFC 3986 parser would split the opaque part into a path and a query. And RFC 2396 parser does not do it.


validateScheme(uri.getScheme());

// Check to ensure that none of these parts are parsed. If so, it's an invalid purl.
if (uri.getRawAuthority() != null) {
throw new MalformedPackageURLException("Invalid purl: A purl must NOT contain a URL Authority ");
}

// subpath is optional - check for existence
final String rawFragment = uri.getRawFragment();
if (rawFragment != null && !rawFragment.isEmpty()) {
this.subpath = validatePath(parsePath(rawFragment, true), true);
}
// qualifiers are optional - check for existence
final String rawQuery = uri.getRawQuery();
if (rawQuery != null && !rawQuery.isEmpty()) {
this.qualifiers = parseQualifiers(rawQuery);

// trim leading and trailing '/'
}
// this is the rest of the purl that needs to be parsed
String remainder = uri.getRawPath();
// trim trailing '/'
int end = remainder.length() - 1;
while (end > 0 && '/' == remainder.charAt(end)) {
end--;
}
if (end < remainder.length() - 1) {
remainder.setLength(end + 1);
}
int start = 0;
while (start < remainder.length() && '/' == remainder.charAt(start)) {
start++;
}
//there is no need for the "expensive" delete operation if the start is tracked and used throughout the rest
// of the parsing.
//if (start > 0) {
// remainder.delete(0, start);
//}

remainder = remainder.substring(0, end + 1);
// there is exactly one leading '/' at this point
start = 1;
// type
index = remainder.indexOf("/", start);
int index = remainder.indexOf('/', start);
if (index <= start) {
throw new MalformedPackageURLException("Invalid purl: does not contain both a type and name");
}
this.type = validateType(remainder.substring(start, index).toLowerCase());
//remainder.delete(0, index + 1);
start = index + 1;

// version is optional - check for existence
index = remainder.lastIndexOf("@");
index = remainder.lastIndexOf('@');
if (index >= start) {
this.version = validateVersion(percentDecode(remainder.substring(index + 1)));
remainder.setLength(index);
remainder = remainder.substring(0, index);
}

// The 'remainder' should now consist of the an optional namespace, and the name
index = remainder.lastIndexOf("/");
// The 'remainder' should now consist of an optional namespace and the name
index = remainder.lastIndexOf('/');
if (index <= start) {
this.name = validateName(percentDecode(remainder.substring(start)));
} else {
this.name = validateName(percentDecode(remainder.substring(index + 1)));
remainder.setLength(index);
remainder = remainder.substring(0, index);
this.namespace = validateNamespace(parsePath(remainder.substring(start), false));
}
verifyTypeConstraints(this.type, this.namespace, this.name);
Expand Down Expand Up @@ -672,8 +674,7 @@ public boolean isBaseEquals(final PackageURL purl) {
* @since 1.4.0
*/
public boolean isCoordinatesEquals(final PackageURL purl) {
return Objects.equals(scheme, purl.scheme) &&
Objects.equals(type, purl.type) &&
return Objects.equals(type, purl.type) &&
Objects.equals(namespace, purl.namespace) &&
Objects.equals(name, purl.name) &&
Objects.equals(version, purl.version);
Expand Down Expand Up @@ -708,8 +709,7 @@ public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
final PackageURL other = (PackageURL) o;
return Objects.equals(scheme, other.scheme) &&
Objects.equals(type, other.type) &&
return Objects.equals(type, other.type) &&
Objects.equals(namespace, other.namespace) &&
Objects.equals(name, other.name) &&
Objects.equals(version, other.version) &&
Expand All @@ -719,7 +719,7 @@ public boolean equals(Object o) {

@Override
public int hashCode() {
return Objects.hash(scheme, type, namespace, name, version, qualifiers, subpath);
return Objects.hash(type, namespace, name, version, qualifiers, subpath);
}

/**
Expand Down