Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 74 additions & 63 deletions src/main/java/com/github/packageurl/utils/StringUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
*/
package com.github.packageurl.utils;

import static java.lang.Byte.toUnsignedInt;

import com.github.packageurl.ValidationException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.stream.IntStream;

/**
* String utility for validation and encoding.
Expand All @@ -35,6 +35,24 @@ public final class StringUtil {

private static final byte PERCENT_CHAR = '%';

private static final boolean[] UNRESERVED_CHARS = new boolean[128];

static {
for (char c = '0'; c <= '9'; c++) {
UNRESERVED_CHARS[c] = true;
}
for (char c = 'A'; c <= 'Z'; c++) {
UNRESERVED_CHARS[c] = true;
}
for (char c = 'a'; c <= 'z'; c++) {
UNRESERVED_CHARS[c] = true;
}
UNRESERVED_CHARS['-'] = true;
UNRESERVED_CHARS['.'] = true;
UNRESERVED_CHARS['_'] = true;
UNRESERVED_CHARS['~'] = true;
}

private StringUtil() {
throw new AssertionError("Cannot instantiate StringUtil");
}
Expand All @@ -48,21 +66,16 @@ private StringUtil() {
* @since 2.0.0
*/
public static String toLowerCase(String s) {
if (s == null) {
return null;
}

int pos = indexOfFirstUpperCaseChar(s);

if (pos == -1) {
return s;
}

char[] chars = s.toCharArray();
int length = chars.length;

for (int i = pos; i < length; i++) {
chars[i] = (char) toLowerCase(chars[i]);
for (int length = chars.length; pos < length; pos++) {
chars[pos] = (char) toLowerCase(chars[pos]);
}

return new String(chars);
Expand All @@ -77,26 +90,22 @@ public static String toLowerCase(String s) {
* @since 2.0.0
*/
public static String percentDecode(final String source) {
if (source == null || source.isEmpty()) {
if (source.indexOf(PERCENT_CHAR) == -1) {
return source;
}

byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
int i = indexOfFirstPercentChar(bytes);

if (i == -1) {
return source;
}

int readPos = indexOfFirstPercentChar(bytes);
int writePos = readPos;
int length = bytes.length;
int writePos = i;
while (i < length) {
byte b = bytes[i];
while (readPos < length) {
byte b = bytes[readPos];
if (b == PERCENT_CHAR) {
bytes[writePos++] = percentDecode(bytes, i++);
i += 2;
bytes[writePos++] = percentDecode(bytes, readPos++);
readPos += 2;
} else {
bytes[writePos++] = bytes[i++];
bytes[writePos++] = bytes[readPos++];
}
}

Expand All @@ -112,34 +121,29 @@ public static String percentDecode(final String source) {
* @since 2.0.0
*/
public static String percentEncode(final String source) {
if (source == null || source.isEmpty()) {
return source;
}
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
int start = indexOfFirstNonAsciiChar(bytes);
if (start == -1) {
if (!shouldEncode(source)) {
return source;
}
int length = bytes.length;
ByteBuffer buffer = ByteBuffer.allocate(start + ((length - start) * 3));
if (start != 0) {
buffer.put(bytes, 0, start);
}

for (int i = start; i < length; i++) {
byte b = bytes[i];
if (shouldEncode(b)) {
byte b1 = (byte) Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
byte b2 = (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16));
buffer.put(PERCENT_CHAR);
buffer.put(b1);
buffer.put(b2);
byte[] src = source.getBytes(StandardCharsets.UTF_8);
byte[] dest = new byte[3 * src.length];

int writePos = 0;
for (byte b : src) {
if (shouldEncode(toUnsignedInt(b))) {
dest[writePos++] = PERCENT_CHAR;
dest[writePos++] = toHexDigit(b >> 4);
dest[writePos++] = toHexDigit(b);
} else {
buffer.put(b);
dest[writePos++] = b;
}
}

return new String(buffer.array(), 0, buffer.position(), StandardCharsets.UTF_8);
return new String(dest, 0, writePos, StandardCharsets.UTF_8);
}

private static byte toHexDigit(int b) {
return (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16));
}

/**
Expand Down Expand Up @@ -178,14 +182,34 @@ public static boolean isValidCharForKey(int c) {
return (isAlphaNumeric(c) || c == '.' || c == '_' || c == '-');
}

/**
* Returns {@code true} if the character is in the unreserved RFC 3986 set.
* <p>
* <strong>Warning</strong>: Profiling shows that the performance of {@link #percentEncode} relies heavily on this method.
* Modify with care.
* </p>
* @param c non-negative integer.
*/
private static boolean isUnreserved(int c) {
return (isValidCharForKey(c) || c == '~');
return c < 128 && UNRESERVED_CHARS[c];
}

/**
* @param c non-negative integer
*/
private static boolean shouldEncode(int c) {
return !isUnreserved(c);
}

private static boolean shouldEncode(String s) {
for (int i = 0, length = s.length(); i < length; i++) {
if (shouldEncode(s.charAt(i))) {
return true;
}
}
return false;
}

private static boolean isAlpha(int c) {
return (isLowerCase(c) || isUpperCase(c));
}
Expand All @@ -195,7 +219,7 @@ private static boolean isAlphaNumeric(int c) {
}

private static boolean isUpperCase(int c) {
return (c >= 'A' && c <= 'Z');
return 'A' <= c && c <= 'Z';
}

private static boolean isLowerCase(int c) {
Expand All @@ -207,34 +231,21 @@ private static int toLowerCase(int c) {
}

private static int indexOfFirstUpperCaseChar(String s) {
int length = s.length();

for (int i = 0; i < length; i++) {
for (int i = 0, length = s.length(); i < length; i++) {
if (isUpperCase(s.charAt(i))) {
return i;
}
}

return -1;
}

private static int indexOfFirstNonAsciiChar(byte[] bytes) {
int length = bytes.length;
int start = -1;
for (int i = 0; i < length; i++) {
if (shouldEncode(bytes[i])) {
start = i;
break;
private static int indexOfFirstPercentChar(final byte[] bytes) {
for (int i = 0, length = bytes.length; i < length; i++) {
if (bytes[i] == PERCENT_CHAR) {
return i;
}
}
return start;
}

private static int indexOfFirstPercentChar(final byte[] bytes) {
return IntStream.range(0, bytes.length)
.filter(i -> bytes[i] == PERCENT_CHAR)
.findFirst()
.orElse(-1);
return -1;
}

private static byte percentDecode(final byte[] bytes, final int start) {
Expand Down
34 changes: 26 additions & 8 deletions src/test/java/com/github/packageurl/utils/StringUtilBenchmark.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
package com.github.packageurl.utils;

import java.nio.charset.StandardCharsets;
import java.util.Locale;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import org.openjdk.jmh.annotations.Benchmark;
Expand Down Expand Up @@ -62,13 +63,13 @@ public class StringUtilBenchmark {
@Param({"0", "0.1", "0.5"})
private double nonAsciiProb;

private String[] decodedData = createDecodedData();
private String[] encodedData = encodeData(decodedData);
private String[] decodedData;
private String[] encodedData;

@Setup
public void setup() {
decodedData = createDecodedData();
encodedData = encodeData(encodedData);
encodedData = encodeData(decodedData);
}

private String[] createDecodedData() {
Expand All @@ -92,6 +93,12 @@ private static String[] encodeData(String[] decodedData) {
String[] encodedData = new String[decodedData.length];
for (int i = 0; i < decodedData.length; i++) {
encodedData[i] = StringUtil.percentEncode(decodedData[i]);
if (!StringUtil.percentDecode(encodedData[i]).equals(decodedData[i])) {
throw new RuntimeException(
"Invalid implementation of `percentEncode` and `percentDecode`.\nOriginal data: "
+ encodedData[i] + "\nEncoded and decoded data: "
+ StringUtil.percentDecode(encodedData[i]));
}
}
return encodedData;
}
Expand All @@ -100,17 +107,28 @@ private static String[] encodeData(String[] decodedData) {
public void baseline(Blackhole blackhole) {
for (int i = 0; i < DATA_COUNT; i++) {
byte[] buffer = decodedData[i].getBytes(StandardCharsets.UTF_8);
// Change the String a little bit
// Prevent JIT compiler from assuming the buffer was not modified
for (int idx = 0; idx < buffer.length; idx++) {
byte b = buffer[idx];
if ('a' <= b && b <= 'z') {
buffer[idx] = (byte) (b & 0x20);
}
buffer[idx] ^= 0x20;
}
blackhole.consume(new String(buffer, StandardCharsets.UTF_8));
}
}

@Benchmark
public void toLowerCaseJre(Blackhole blackhole) {
for (int i = 0; i < DATA_COUNT; i++) {
blackhole.consume(decodedData[i].toLowerCase(Locale.ROOT));
}
}

@Benchmark
public void toLowerCase(Blackhole blackhole) {
for (int i = 0; i < DATA_COUNT; i++) {
blackhole.consume(StringUtil.toLowerCase(decodedData[i]));
}
}

@Benchmark
public void percentDecode(final Blackhole blackhole) {
for (int i = 0; i < DATA_COUNT; i++) {
Expand Down
Loading