2929import java .nio .ByteBuffer ;
3030import java .nio .charset .StandardCharsets ;
3131import java .util .Arrays ;
32+ import java .util .BitSet ;
3233import java .util .Collections ;
3334import java .util .Map ;
3435import java .util .Objects ;
@@ -59,6 +60,79 @@ public final class PackageURL implements Serializable {
5960
6061 private static final char PERCENT_CHAR = '%' ;
6162
63+ private static final int NBITS = 128 ;
64+
65+ private static final BitSet DIGIT = new BitSet (NBITS );
66+ static {
67+ for (int i = '0' ; i <= '9' ; i ++) {
68+ DIGIT .set (i );
69+ }
70+ }
71+
72+ private static final BitSet LOWER = new BitSet (NBITS );
73+ static {
74+ for (int i = 'a' ; i <= 'z' ; i ++) {
75+ LOWER .set (i );
76+ }
77+ }
78+
79+ private static final BitSet UPPER = new BitSet (NBITS );
80+ static {
81+ for (int i = 'A' ; i <= 'Z' ; i ++) {
82+ UPPER .set (i );
83+ }
84+ }
85+
86+ private static final BitSet ALPHA = new BitSet (NBITS );
87+ static {
88+ ALPHA .or (LOWER );
89+ ALPHA .or (UPPER );
90+ }
91+
92+ private static final BitSet ALPHA_DIGIT = new BitSet (NBITS );
93+ static {
94+ ALPHA_DIGIT .or (ALPHA );
95+ ALPHA_DIGIT .or (DIGIT );
96+ }
97+
98+ private static final BitSet UNRESERVED = new BitSet (NBITS );
99+ static {
100+ UNRESERVED .or (ALPHA_DIGIT );
101+ UNRESERVED .set ('-' );
102+ UNRESERVED .set ('.' );
103+ UNRESERVED .set ('_' );
104+ UNRESERVED .set ('~' );
105+ }
106+ private static final BitSet SUB_DELIMS = new BitSet (NBITS );
107+ static {
108+ SUB_DELIMS .set ('!' );
109+ SUB_DELIMS .set ('$' );
110+ SUB_DELIMS .set ('&' );
111+ SUB_DELIMS .set ('\'' );
112+ SUB_DELIMS .set ('(' );
113+ SUB_DELIMS .set (')' );
114+ SUB_DELIMS .set ('*' );
115+ SUB_DELIMS .set ('+' );
116+ SUB_DELIMS .set (',' );
117+ SUB_DELIMS .set (';' );
118+ SUB_DELIMS .set ('=' );
119+
120+ }
121+ private static final BitSet PCHAR = new BitSet (NBITS );
122+ static {
123+ PCHAR .or (UNRESERVED );
124+ PCHAR .or (SUB_DELIMS );
125+ PCHAR .set (':' );
126+ // PCHAR.set('@'); Always encode '@' in the path due to version
127+ }
128+ private static final BitSet QUERY = new BitSet (NBITS );
129+ static {
130+ QUERY .or (PCHAR );
131+ QUERY .set ('/' );
132+ QUERY .set ('?' );
133+ }
134+ private static final BitSet FRAGMENT = QUERY ;
135+
62136 /**
63137 * Constructs a new PackageURL object by parsing the specified string.
64138 *
@@ -472,37 +546,42 @@ private String canonicalize(boolean coordinatesOnly) {
472546 final StringBuilder purl = new StringBuilder ();
473547 purl .append (SCHEME_PART ).append (type ).append ("/" );
474548 if (namespace != null ) {
475- purl .append (encodePath (namespace ));
549+ purl .append (encodePath (namespace , PCHAR ));
476550 purl .append ("/" );
477551 }
478- purl .append (percentEncode (name ));
552+ purl .append (percentEncode (name , PCHAR ));
479553 if (version != null ) {
480- purl .append ("@" ).append (percentEncode (version ));
554+ purl .append ("@" ).append (percentEncode (version , PCHAR ));
481555 }
482556 if (! coordinatesOnly ) {
483557 if (qualifiers != null ) {
484558 purl .append ("?" );
485559 qualifiers .forEach ((key , value ) -> {
486560 purl .append (toLowerCase (key ));
487561 purl .append ("=" );
488- purl .append (percentEncode (value ));
562+ purl .append (percentEncode (value , QUERY ));
489563 purl .append ("&" );
490564 });
491565 purl .setLength (purl .length () - 1 );
492566 }
493567 if (subpath != null ) {
494- purl .append ("#" ).append (encodePath (subpath ));
568+ purl .append ("#" ).append (encodePath (subpath , FRAGMENT ));
495569 }
496570 }
497571 return purl .toString ();
498572 }
499573
500- private static boolean isUnreserved (int c ) {
501- return (isValidCharForKey (c ) || c == '~' );
574+ private static boolean isUnreserved (int c , BitSet safe ) {
575+ if (c < 0 || c >= NBITS ) {
576+ return false ;
577+ }
578+
579+ return safe .get (c );
580+
502581 }
503582
504- private static boolean shouldEncode (int c ) {
505- return !isUnreserved (c );
583+ private static boolean shouldEncode (int c , BitSet safe ) {
584+ return !isUnreserved (c , safe );
506585 }
507586
508587 private static boolean isAlpha (int c ) {
@@ -564,11 +643,11 @@ private static int indexOfPercentChar(final byte[] bytes, final int start) {
564643 return IntStream .range (start , bytes .length ).filter (i -> isPercent (bytes [i ])).findFirst ().orElse (-1 );
565644 }
566645
567- private static int indexOfUnsafeChar (final byte [] bytes , final int start ) {
568- return IntStream .range (start , bytes .length ).filter (i -> shouldEncode (bytes [i ])).findFirst ().orElse (-1 );
646+ private static int indexOfUnsafeChar (final byte [] bytes , final int start , BitSet safe ) {
647+ return IntStream .range (start , bytes .length ).filter (i -> shouldEncode (bytes [i ], safe )).findFirst ().orElse (-1 );
569648 }
570649
571- private static byte percentDecode (final byte [] bytes , final int start ) {
650+ static byte percentDecode (final byte [] bytes , final int start ) {
572651 if (start + 2 >= bytes .length ) {
573652 throw new ValidationException ("Incomplete percent encoding at offset " + start + " with value '" + new String (bytes , start , bytes .length - start , StandardCharsets .UTF_8 ) + "'" );
574653 }
@@ -598,15 +677,15 @@ public static String percentDecode(final String source) {
598677 }
599678
600679 byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
601-
602- int off = 0 ;
603- int idx = indexOfPercentChar (bytes , off );
680+ int idx = indexOfPercentChar (bytes , 0 );
604681
605682 if (idx == -1 ) {
606683 return source ;
607684 }
608685
686+ int off = idx ;
609687 ByteBuffer buffer = ByteBuffer .wrap (bytes );
688+ buffer .position (off );
610689
611690 while (true ) {
612691 int len = idx - off ;
@@ -650,14 +729,18 @@ private static byte[] percentEncode(byte b) {
650729 }
651730
652731 public static String percentEncode (final String source ) {
732+ return percentEncode (source , new BitSet (0 ));
733+ }
734+
735+ private static String percentEncode (final String source , final BitSet safe ) {
653736 if (source .isEmpty ()) {
654737 return source ;
655738 }
656739
657740 byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
658741
659742 int off = 0 ;
660- int idx = indexOfUnsafeChar (bytes , off );
743+ int idx = indexOfUnsafeChar (bytes , off , safe );
661744
662745 if (idx == -1 ) {
663746 return source ;
@@ -674,7 +757,7 @@ public static String percentEncode(final String source) {
674757 }
675758
676759 buffer .put (percentEncode (bytes [off ++]));
677- idx = indexOfUnsafeChar (bytes , off );
760+ idx = indexOfUnsafeChar (bytes , off , safe );
678761
679762 if (idx == -1 ) {
680763 int rem = bytes .length - off ;
@@ -733,7 +816,6 @@ private void parse(final String purl) throws MalformedPackageURLException {
733816 final String rawQuery = uri .getRawQuery ();
734817 if (rawQuery != null && !rawQuery .isEmpty ()) {
735818 this .qualifiers = parseQualifiers (rawQuery );
736-
737819 }
738820 // this is the rest of the purl that needs to be parsed
739821 String remainder = uri .getRawPath ();
@@ -835,8 +917,8 @@ private String[] parsePath(final String path, final boolean isSubpath) {
835917 .toArray (String []::new );
836918 }
837919
838- private String encodePath (final String path ) {
839- return Arrays .stream (path .split ("/" )).map (PackageURL :: percentEncode ).collect (Collectors .joining ("/" ));
920+ private String encodePath (final String path , BitSet safe ) {
921+ return Arrays .stream (path .split ("/" )).map (source -> percentEncode ( source , safe ) ).collect (Collectors .joining ("/" ));
840922 }
841923
842924 /**
0 commit comments