diff --git a/.github/workflows/benchmark-pr.yml b/.github/workflows/benchmark-pr.yml new file mode 100644 index 00000000000..b6ede0c28ea --- /dev/null +++ b/.github/workflows/benchmark-pr.yml @@ -0,0 +1,96 @@ +name: Benchmark PR + +on: + pull_request: + types: [opened, synchronize, reopened, labeled] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +permissions: + contents: read + +env: + BENCHMARK_MODULE: exporters/otlp/common + BENCHMARK_CLASSES: StringMarshalBenchmark + +jobs: + sdk-benchmark: + name: Benchmark SDK (Java ${{ matrix.test-java-version }}) + if: contains(github.event.pull_request.labels.*.name, 'run benchmarks') + strategy: + fail-fast: false + matrix: + test-java-version: + - 17 + - 24 + runs-on: oracle-bare-metal-64cpu-512gb-x86-64 + container: + image: ubuntu:24.04@sha256:353675e2a41babd526e2b837d7ec780c2a05bca0164f7ea5dbbd433d21d166fc + timeout-minutes: 20 # since there is only a single bare metal runner across all repos + steps: + - name: Install Git + run: | + apt-get update + apt-get install -y git + + - name: Configure Git safe directory + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + + - id: setup-java-test + name: Set up Java ${{ matrix.test-java-version }} for tests + uses: actions/setup-java@dded0888837ed1f317902acf8a20df0ad188d165 # v5.0.0 + with: + distribution: temurin + java-version: ${{ matrix.test-java-version }} + + - id: setup-java + name: Set up Java for build + uses: actions/setup-java@dded0888837ed1f317902acf8a20df0ad188d165 # v5.0.0 + with: + distribution: temurin + java-version: 17 + + - name: Set up gradle + uses: gradle/actions/setup-gradle@ed408507eac070d1f99cc633dbcf757c94c7933a # v4.4.3 + + - name: Build Benchmark + run: ./gradlew jmhJar + + - name: Run Benchmark + run: > + ${{ steps.setup-java-test.outputs.path }}/bin/java + -jar ${{ env.BENCHMARK_MODULE }}/build/libs/opentelemetry-*-jmh.jar + -jvmArgs="--add-opens=java.base/java.lang=ALL-UNNAMED" + -rf json + ${{ env.BENCHMARK_CLASSES }} + + - name: Rename results + run: mv jmh-result.json jmh-result-pr.json + + - name: Switch to main branch + run: git checkout origin/main + + - name: Build Benchmark on main branch + run: ./gradlew jmhJar + + - name: Run Benchmark on main branch + run: > + ${{ steps.setup-java-test.outputs.path }}/bin/java + -jar ${{ env.BENCHMARK_MODULE }}/build/libs/opentelemetry-*-jmh.jar + -rf json + ${{ env.BENCHMARK_CLASSES }} + + - name: Rename results + run: mv jmh-result.json jmh-result-main.json + + - name: Upload benchmark results + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: benchmark-results-java-${{ matrix.test-java-version }} + path: | + jmh-result-pr.json + jmh-result-main.json diff --git a/buildSrc/src/main/kotlin/otel.jmh-conventions.gradle.kts b/buildSrc/src/main/kotlin/otel.jmh-conventions.gradle.kts index 3e4ad43195c..70b661afabe 100644 --- a/buildSrc/src/main/kotlin/otel.jmh-conventions.gradle.kts +++ b/buildSrc/src/main/kotlin/otel.jmh-conventions.gradle.kts @@ -27,6 +27,15 @@ jmh { if (jmhIncludeSingleClass != null) { includes.add(jmhIncludeSingleClass as String) } + + val testJavaVersion = gradle.startParameter.projectProperties.get("testJavaVersion")?.let(JavaVersion::toVersion) + if (testJavaVersion != null) { + val javaExecutable = javaToolchains.launcherFor { + languageVersion.set(JavaLanguageVersion.of(testJavaVersion.majorVersion)) + }.get().executablePath.asFile.absolutePath + + jvm.set(javaExecutable) + } } jmhReport { diff --git a/exporters/common/build.gradle.kts b/exporters/common/build.gradle.kts index d4bc4b1fd34..8d72eba3715 100644 --- a/exporters/common/build.gradle.kts +++ b/exporters/common/build.gradle.kts @@ -8,6 +8,46 @@ plugins { description = "OpenTelemetry Exporter Common" otelJava.moduleName.set("io.opentelemetry.exporter.internal") +java { + sourceSets { + create("java9") { + java { + srcDir("src/main/java9") + } + // Make java9 source set depend on main source set + // since VarHandleStringEncoder implements StringEncoder from the main source set + compileClasspath += sourceSets.main.get().output + sourceSets.main.get().compileClasspath + } + } +} + +// Configure java9 compilation to see main source classes +sourceSets.named("java9") { + compileClasspath += sourceSets.main.get().output +} + +tasks.named("compileJava9Java") { + options.release.set(9) +} + +tasks.named("jar") { + manifest { + attributes["Multi-Release"] = "true" + } + from(sourceSets.named("java9").get().output) { + into("META-INF/versions/9") + } +} + +// Configure test to include java9 classes when running on Java 9+ +// so that StringEncoderHolder.createUnsafeEncoder() can instantiate the Java 9 version +val javaVersion = JavaVersion.current() +if (javaVersion >= JavaVersion.VERSION_1_9) { + sourceSets.named("test") { + runtimeClasspath += sourceSets.named("java9").get().output + } +} + val versions: Map by project dependencies { api(project(":api:all")) @@ -79,6 +119,15 @@ tasks { check { dependsOn(testing.suites) } + + withType { + // Allow VarHandle access to String internals + // generally users won't do this and so won't get the VarHandle implementation + // but the Java agent is able to automatically open these modules + // (see ModuleOpener.java in that repository) + jvmArgs("--add-opens=java.base/java.lang=ALL-UNNAMED") + jvmArgs("-XX:+IgnoreUnrecognizedVMOptions") // needed for Java 8 + } } afterEvaluate { diff --git a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/AbstractStringEncoder.java b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/AbstractStringEncoder.java new file mode 100644 index 00000000000..0a0405ce11f --- /dev/null +++ b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/AbstractStringEncoder.java @@ -0,0 +1,93 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.exporter.internal.marshal; + +import java.io.IOException; + +/** + * This class contains shared logic for UTF-8 encoding operations while allowing subclasses to + * implement different mechanisms for accessing String internal byte arrays (e.g., Unsafe vs + * VarHandle). + * + *

This class is internal and is hence not for public use. Its APIs are unstable and can change + * at any time. + */ +abstract class AbstractStringEncoder implements StringEncoder { + + private final FallbackStringEncoder fallback = new FallbackStringEncoder(); + + @Override + public final void writeUtf8(CodedOutputStream output, String string, int utf8Length) + throws IOException { + // if the length of the latin1 string and the utf8 output are the same then the string must be + // composed of only 7bit characters and can be directly copied to the output + if (string.length() == utf8Length && isLatin1(string)) { + byte[] bytes = getStringBytes(string); + output.write(bytes, 0, bytes.length); + } else { + fallback.writeUtf8(output, string, utf8Length); + } + } + + @Override + public final int getUtf8Size(String string) { + if (isLatin1(string)) { + byte[] bytes = getStringBytes(string); + // latin1 bytes with negative value (most significant bit set) are encoded as 2 bytes in utf8 + return string.length() + countNegative(bytes); + } + + return fallback.getUtf8Size(string); + } + + protected abstract byte[] getStringBytes(String string); + + protected abstract boolean isLatin1(String string); + + protected abstract long getLong(byte[] bytes, int offset); + + // Inner loop can process at most 8 * 255 bytes without overflowing counter. To process more bytes + // inner loop has to be run multiple times. + private static final int MAX_INNER_LOOP_SIZE = 8 * 255; + // mask that selects only the most significant bit in every byte of the long + private static final long MOST_SIGNIFICANT_BIT_MASK = 0x8080808080808080L; + + /** Returns the count of bytes with negative value. */ + private int countNegative(byte[] bytes) { + int count = 0; + int offset = 0; + // We are processing one long (8 bytes) at a time. In the inner loop we are keeping counts in a + // long where each byte in the long is a separate counter. Due to this the inner loop can + // process a maximum of 8*255 bytes at a time without overflow. + for (int i = 1; i <= bytes.length / MAX_INNER_LOOP_SIZE + 1; i++) { + long tmp = 0; // each byte in this long is a separate counter + int limit = Math.min(i * MAX_INNER_LOOP_SIZE, bytes.length & ~7); + for (; offset < limit; offset += 8) { + long value = getLong(bytes, offset); + // Mask the value keeping only the most significant bit in each byte and then shift this bit + // to the position of the least significant bit in each byte. If the input byte was not + // negative then after this transformation it will be zero, if it was negative then it will + // be one. + tmp += (value & MOST_SIGNIFICANT_BIT_MASK) >>> 7; + } + // sum up counts + if (tmp != 0) { + for (int j = 0; j < 8; j++) { + count += (int) (tmp & 0xff); + tmp = tmp >>> 8; + } + } + } + + // Handle remaining bytes. Previous loop processes 8 bytes a time, if the input size is not + // divisible with 8 the remaining bytes are handled here. + for (int i = offset; i < bytes.length; i++) { + // same as if (bytes[i] < 0) count++; + count += bytes[i] >>> 31; + } + return count; + } +} diff --git a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/FallbackStringEncoder.java b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/FallbackStringEncoder.java new file mode 100644 index 00000000000..33c688bd4de --- /dev/null +++ b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/FallbackStringEncoder.java @@ -0,0 +1,140 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.exporter.internal.marshal; + +import java.io.IOException; + +/** + * Fallback StringEncoder implementation using standard Java string operations. + * + *

This implementation works on all Java versions and provides correct UTF-8 handling. + * + *

This class is internal and is hence not for public use. Its APIs are unstable and can change + * at any time. + */ +final class FallbackStringEncoder implements StringEncoder { + + FallbackStringEncoder() {} + + @Override + public int getUtf8Size(String string) { + return encodedUtf8Length(string); + } + + @Override + public void writeUtf8(CodedOutputStream output, String string, int utf8Length) + throws IOException { + encodeUtf8(output, string); + } + + // adapted from + // https://github.com/protocolbuffers/protobuf/blob/b618f6750aed641a23d5f26fbbaf654668846d24/java/core/src/main/java/com/google/protobuf/Utf8.java#L217 + private static int encodedUtf8Length(String string) { + // Warning to maintainers: this implementation is highly optimized. + int utf16Length = string.length(); + int utf8Length = utf16Length; + int i = 0; + + // This loop optimizes for pure ASCII. + while (i < utf16Length && string.charAt(i) < 0x80) { + i++; + } + + // This loop optimizes for chars less than 0x800. + for (; i < utf16Length; i++) { + char c = string.charAt(i); + if (c < 0x800) { + utf8Length += ((0x7f - c) >>> 31); // branch free! + } else { + utf8Length += encodedUtf8LengthGeneral(string, i); + break; + } + } + + if (utf8Length < utf16Length) { + // Necessary and sufficient condition for overflow because of maximum 3x expansion + throw new IllegalArgumentException( + "UTF-8 length does not fit in int: " + (utf8Length + (1L << 32))); + } + + return utf8Length; + } + + // adapted from + // https://github.com/protocolbuffers/protobuf/blob/b618f6750aed641a23d5f26fbbaf654668846d24/java/core/src/main/java/com/google/protobuf/Utf8.java#L247 + private static int encodedUtf8LengthGeneral(String string, int start) { + int utf16Length = string.length(); + int utf8Length = 0; + for (int i = start; i < utf16Length; i++) { + char c = string.charAt(i); + if (c < 0x800) { + utf8Length += (0x7f - c) >>> 31; // branch free! + } else { + utf8Length += 2; + if (Character.isSurrogate(c)) { + // Check that we have a well-formed surrogate pair. + if (Character.codePointAt(string, i) != c) { + i++; + } else { + // invalid sequence + // At this point we have accumulated 3 byes of length (2 in this method and 1 in caller) + // for current character, reduce the length to 1 bytes as we are going to encode the + // invalid character as ? + utf8Length -= 2; + } + } + } + } + + return utf8Length; + } + + // encode utf8 the same way as length is computed in encodedUtf8Length + // adapted from + // https://github.com/protocolbuffers/protobuf/blob/b618f6750aed641a23d5f26fbbaf654668846d24/java/core/src/main/java/com/google/protobuf/Utf8.java#L1016 + private static void encodeUtf8(CodedOutputStream output, String in) throws IOException { + int utf16Length = in.length(); + int i = 0; + // Designed to take advantage of + // https://wiki.openjdk.java.net/display/HotSpotInternals/RangeCheckElimination + for (char c; i < utf16Length && (c = in.charAt(i)) < 0x80; i++) { + output.write((byte) c); + } + if (i == utf16Length) { + return; + } + + for (char c; i < utf16Length; i++) { + c = in.charAt(i); + if (c < 0x80) { + // 1 byte, 7 bits + output.write((byte) c); + } else if (c < 0x800) { // 11 bits, two UTF-8 bytes + output.write((byte) ((0xF << 6) | (c >>> 6))); + output.write((byte) (0x80 | (0x3F & c))); + } else if (!Character.isSurrogate(c)) { + // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes + output.write((byte) ((0xF << 5) | (c >>> 12))); + output.write((byte) (0x80 | (0x3F & (c >>> 6)))); + output.write((byte) (0x80 | (0x3F & c))); + } else { + // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, + // four UTF-8 bytes + int codePoint = Character.codePointAt(in, i); + if (codePoint != c) { + output.write((byte) ((0xF << 4) | (codePoint >>> 18))); + output.write((byte) (0x80 | (0x3F & (codePoint >>> 12)))); + output.write((byte) (0x80 | (0x3F & (codePoint >>> 6)))); + output.write((byte) (0x80 | (0x3F & codePoint))); + i++; + } else { + // invalid sequence + output.write((byte) '?'); + } + } + } + } +} diff --git a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/MarshalerContext.java b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/MarshalerContext.java index 80d01e84392..62177877ccc 100644 --- a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/MarshalerContext.java +++ b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/MarshalerContext.java @@ -27,7 +27,7 @@ */ public final class MarshalerContext { private final boolean marshalStringNoAllocation; - private final boolean marshalStringUnsafe; + private final StringEncoder stringEncoder; private int[] sizes = new int[16]; private int sizeReadIndex; @@ -37,20 +37,25 @@ public final class MarshalerContext { private int dataWriteIndex; public MarshalerContext() { - this(/* marshalStringNoAllocation= */ true, /* marshalStringUnsafe= */ true); + this(/* marshalStringNoAllocation= */ true); } - public MarshalerContext(boolean marshalStringNoAllocation, boolean marshalStringUnsafe) { + public MarshalerContext(boolean marshalStringNoAllocation) { this.marshalStringNoAllocation = marshalStringNoAllocation; - this.marshalStringUnsafe = marshalStringUnsafe; + this.stringEncoder = StringEncoder.getInstance(); + } + + public MarshalerContext(boolean marshalStringNoAllocation, StringEncoder stringEncoder) { + this.marshalStringNoAllocation = marshalStringNoAllocation; + this.stringEncoder = stringEncoder; } public boolean marshalStringNoAllocation() { return marshalStringNoAllocation; } - public boolean marshalStringUnsafe() { - return marshalStringUnsafe; + public StringEncoder getStringEncoder() { + return stringEncoder; } public void addSize(int size) { diff --git a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/ProtoSerializer.java b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/ProtoSerializer.java index 4094f8b01ac..79bcd861ddd 100644 --- a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/ProtoSerializer.java +++ b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/ProtoSerializer.java @@ -160,7 +160,7 @@ public void writeString( output.writeUInt32NoTag(field.getTag()); output.writeUInt32NoTag(utf8Length); - StatelessMarshalerUtil.writeUtf8(output, string, utf8Length, context); + context.getStringEncoder().writeUtf8(output, string, utf8Length); } @Override diff --git a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/StatelessMarshalerUtil.java b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/StatelessMarshalerUtil.java index 793f90cc9ae..715bcf9f649 100644 --- a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/StatelessMarshalerUtil.java +++ b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/StatelessMarshalerUtil.java @@ -9,7 +9,6 @@ import io.opentelemetry.api.common.Attributes; import io.opentelemetry.sdk.common.InstrumentationScopeInfo; import io.opentelemetry.sdk.resources.Resource; -import java.io.IOException; import java.util.Collection; import java.util.List; import java.util.Map; @@ -95,7 +94,7 @@ public static int sizeStringWithContext( return sizeBytes(field, 0); } if (context.marshalStringNoAllocation()) { - int utf8Size = getUtf8Size(value, context); + int utf8Size = context.getStringEncoder().getUtf8Size(value); context.addSize(utf8Size); return sizeBytes(field, utf8Size); } else { @@ -298,196 +297,5 @@ public static int sizeMessageWithContext( return size; } - /** Returns the size of utf8 encoded string in bytes. */ - private static int getUtf8Size(String string, MarshalerContext context) { - return getUtf8Size(string, context.marshalStringUnsafe()); - } - - // Visible for testing - static int getUtf8Size(String string, boolean useUnsafe) { - if (useUnsafe && UnsafeString.isAvailable() && UnsafeString.isLatin1(string)) { - byte[] bytes = UnsafeString.getBytes(string); - // latin1 bytes with negative value (most significant bit set) are encoded as 2 bytes in utf8 - return string.length() + countNegative(bytes); - } - - return encodedUtf8Length(string); - } - - // Inner loop can process at most 8 * 255 bytes without overflowing counter. To process more bytes - // inner loop has to be run multiple times. - private static final int MAX_INNER_LOOP_SIZE = 8 * 255; - // mask that selects only the most significant bit in every byte of the long - private static final long MOST_SIGNIFICANT_BIT_MASK = 0x8080808080808080L; - - /** Returns the count of bytes with negative value. */ - private static int countNegative(byte[] bytes) { - int count = 0; - int offset = 0; - // We are processing one long (8 bytes) at a time. In the inner loop we are keeping counts in a - // long where each byte in the long is a separate counter. Due to this the inner loop can - // process a maximum of 8*255 bytes at a time without overflow. - for (int i = 1; i <= bytes.length / MAX_INNER_LOOP_SIZE + 1; i++) { - long tmp = 0; // each byte in this long is a separate counter - int limit = Math.min(i * MAX_INNER_LOOP_SIZE, bytes.length & ~7); - for (; offset < limit; offset += 8) { - long value = UnsafeString.getLong(bytes, offset); - // Mask the value keeping only the most significant bit in each byte and then shift this bit - // to the position of the least significant bit in each byte. If the input byte was not - // negative then after this transformation it will be zero, if it was negative then it will - // be one. - tmp += (value & MOST_SIGNIFICANT_BIT_MASK) >>> 7; - } - // sum up counts - if (tmp != 0) { - for (int j = 0; j < 8; j++) { - count += (int) (tmp & 0xff); - tmp = tmp >>> 8; - } - } - } - - // Handle remaining bytes. Previous loop processes 8 bytes a time, if the input size is not - // divisible with 8 the remaining bytes are handled here. - for (int i = offset; i < bytes.length; i++) { - // same as if (bytes[i] < 0) count++; - count += bytes[i] >>> 31; - } - return count; - } - - // adapted from - // https://github.com/protocolbuffers/protobuf/blob/b618f6750aed641a23d5f26fbbaf654668846d24/java/core/src/main/java/com/google/protobuf/Utf8.java#L217 - private static int encodedUtf8Length(String string) { - // Warning to maintainers: this implementation is highly optimized. - int utf16Length = string.length(); - int utf8Length = utf16Length; - int i = 0; - - // This loop optimizes for pure ASCII. - while (i < utf16Length && string.charAt(i) < 0x80) { - i++; - } - - // This loop optimizes for chars less than 0x800. - for (; i < utf16Length; i++) { - char c = string.charAt(i); - if (c < 0x800) { - utf8Length += ((0x7f - c) >>> 31); // branch free! - } else { - utf8Length += encodedUtf8LengthGeneral(string, i); - break; - } - } - - if (utf8Length < utf16Length) { - // Necessary and sufficient condition for overflow because of maximum 3x expansion - throw new IllegalArgumentException( - "UTF-8 length does not fit in int: " + (utf8Length + (1L << 32))); - } - - return utf8Length; - } - - // adapted from - // https://github.com/protocolbuffers/protobuf/blob/b618f6750aed641a23d5f26fbbaf654668846d24/java/core/src/main/java/com/google/protobuf/Utf8.java#L247 - private static int encodedUtf8LengthGeneral(String string, int start) { - int utf16Length = string.length(); - int utf8Length = 0; - for (int i = start; i < utf16Length; i++) { - char c = string.charAt(i); - if (c < 0x800) { - utf8Length += (0x7f - c) >>> 31; // branch free! - } else { - utf8Length += 2; - if (Character.isSurrogate(c)) { - // Check that we have a well-formed surrogate pair. - if (Character.codePointAt(string, i) != c) { - i++; - } else { - // invalid sequence - // At this point we have accumulated 3 byes of length (2 in this method and 1 in caller) - // for current character, reduce the length to 1 bytes as we are going to encode the - // invalid character as ? - utf8Length -= 2; - } - } - } - } - - return utf8Length; - } - - /** Write utf8 encoded string to output stream. */ - @SuppressWarnings("UnusedVariable") // context argument is added for future use - static void writeUtf8( - CodedOutputStream output, String string, int utf8Length, MarshalerContext context) - throws IOException { - writeUtf8(output, string, utf8Length, context.marshalStringUnsafe()); - } - - // Visible for testing - @SuppressWarnings("UnusedVariable") // utf8Length argument is added for future use - static void writeUtf8(CodedOutputStream output, String string, int utf8Length, boolean useUnsafe) - throws IOException { - // if the length of the latin1 string and the utf8 output are the same then the string must be - // composed of only 7bit characters and can be directly copied to the output - if (useUnsafe - && UnsafeString.isAvailable() - && string.length() == utf8Length - && UnsafeString.isLatin1(string)) { - byte[] bytes = UnsafeString.getBytes(string); - output.write(bytes, 0, bytes.length); - } else { - encodeUtf8(output, string); - } - } - - // encode utf8 the same way as length is computed in encodedUtf8Length - // adapted from - // https://github.com/protocolbuffers/protobuf/blob/b618f6750aed641a23d5f26fbbaf654668846d24/java/core/src/main/java/com/google/protobuf/Utf8.java#L1016 - private static void encodeUtf8(CodedOutputStream output, String in) throws IOException { - int utf16Length = in.length(); - int i = 0; - // Designed to take advantage of - // https://wiki.openjdk.java.net/display/HotSpotInternals/RangeCheckElimination - for (char c; i < utf16Length && (c = in.charAt(i)) < 0x80; i++) { - output.write((byte) c); - } - if (i == utf16Length) { - return; - } - - for (char c; i < utf16Length; i++) { - c = in.charAt(i); - if (c < 0x80) { - // 1 byte, 7 bits - output.write((byte) c); - } else if (c < 0x800) { // 11 bits, two UTF-8 bytes - output.write((byte) ((0xF << 6) | (c >>> 6))); - output.write((byte) (0x80 | (0x3F & c))); - } else if (!Character.isSurrogate(c)) { - // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes - output.write((byte) ((0xF << 5) | (c >>> 12))); - output.write((byte) (0x80 | (0x3F & (c >>> 6)))); - output.write((byte) (0x80 | (0x3F & c))); - } else { - // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, - // four UTF-8 bytes - int codePoint = Character.codePointAt(in, i); - if (codePoint != c) { - output.write((byte) ((0xF << 4) | (codePoint >>> 18))); - output.write((byte) (0x80 | (0x3F & (codePoint >>> 12)))); - output.write((byte) (0x80 | (0x3F & (codePoint >>> 6)))); - output.write((byte) (0x80 | (0x3F & codePoint))); - i++; - } else { - // invalid sequence - output.write((byte) '?'); - } - } - } - } - private StatelessMarshalerUtil() {} } diff --git a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/StringEncoder.java b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/StringEncoder.java new file mode 100644 index 00000000000..5a4df5bfbf7 --- /dev/null +++ b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/StringEncoder.java @@ -0,0 +1,44 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.exporter.internal.marshal; + +import java.io.IOException; + +/** + * Interface for efficient UTF-8 string encoding operations. + * + *

This interface provides optimized string-to-UTF-8 conversion with multiple implementations + * based on available platform capabilities. + * + *

The optimal implementation is automatically selected at runtime via {@link #getInstance()}, in + * this priority order: + * + *

    + *
  • {@code VarHandleStringEncoder} - High-performance Java 9+ implementation using VarHandle + *
  • {@code UnsafeStringEncoder} - High-performance Java 8+ implementation using sun.misc.Unsafe + *
  • {@code FallbackStringEncoder} - Implementation using standard Java operations + *
+ * + *

This class is internal and is hence not for public use. Its APIs are unstable and can change + * at any time. + */ +// public visibility only needed for benchmarking purposes +public interface StringEncoder { + + /** Returns the number of bytes required to encode the string as UTF-8. */ + int getUtf8Size(String string); + + /** + * Write a string as UTF-8 bytes to the output stream using the pre-calculated UTF-8 length from + * {@link #getUtf8Size(String)}. + */ + void writeUtf8(CodedOutputStream output, String string, int utf8Length) throws IOException; + + /** Returns the best available StringEncoder implementation. */ + static StringEncoder getInstance() { + return StringEncoderHolder.INSTANCE; + } +} diff --git a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/StringEncoderHolder.java b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/StringEncoderHolder.java new file mode 100644 index 00000000000..b9d9794217a --- /dev/null +++ b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/StringEncoderHolder.java @@ -0,0 +1,109 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.exporter.internal.marshal; + +import java.util.Optional; +import java.util.logging.Level; +import java.util.logging.Logger; +import javax.annotation.Nullable; + +/** + * Factory and holder class for StringEncoder implementations. + * + *

This class is internal and is hence not for public use. Its APIs are unstable and can change + * at any time. + */ +// public visibility only needed for benchmarking purposes +public final class StringEncoderHolder { + private static final Logger logger = Logger.getLogger(StringEncoderHolder.class.getName()); + + static final StringEncoder INSTANCE = createInstance(); + + /** + * Creates a FallbackStringEncoder instance. + * + * @return a new FallbackStringEncoder instance + */ + public static StringEncoder createFallbackEncoder() { + return new FallbackStringEncoder(); + } + + /** + * Creates an UnsafeStringEncoder instance if available. + * + * @return an UnsafeStringEncoder instance if available, or null if not available + */ + @Nullable + public static StringEncoder createUnsafeEncoder() { + return UnsafeStringEncoder.createIfAvailable(); + } + + /** + * Creates a VarHandleStringEncoder instance if available. + * + * @return a VarHandleStringEncoder instance if available, or null if not available + */ + @Nullable + public static StringEncoder createVarHandleEncoder() { + try { + Class varHandleClass = + Class.forName("io.opentelemetry.exporter.internal.marshal.VarHandleStringEncoder"); + java.lang.reflect.Method createMethod = varHandleClass.getMethod("createIfAvailable"); + return (StringEncoder) createMethod.invoke(null); + } catch (Throwable t) { + return null; + } + } + + private static StringEncoder createInstance() { + // UnsafeStringEncoder has slightly better performance than VarHandleStringEncoder + // so try it first + if (!proactivelyAvoidUnsafe()) { + StringEncoder unsafeImpl = createUnsafeEncoder(); + if (unsafeImpl != null) { + logger.log(Level.FINE, "Using UnsafeStringEncoder for optimized Java 8+ performance"); + return unsafeImpl; + } + } + + // the VarHandle implementation requires --add-opens=java.base/java.lang=ALL-UNNAMED + // for VarHandles to access String internals + // + // generally users won't do this and so won't get the VarHandle implementation + // but the Java agent is able to automatically open these modules + // (see ModuleOpener.java in that repository) + StringEncoder varHandleImpl = createVarHandleEncoder(); + if (varHandleImpl != null) { + logger.log(Level.FINE, "Using VarHandleStringEncoder for optimal Java 9+ performance"); + return varHandleImpl; + } + + // Use fallback implementation + logger.log(Level.FINE, "Using FallbackStringEncoder"); + return createFallbackEncoder(); + } + + private static boolean proactivelyAvoidUnsafe() { + Optional javaVersion = getJavaVersion(); + // Avoid Unsafe on Java 23+ due to JEP-498 deprecation warnings: + // "WARNING: A terminally deprecated method in sun.misc.Unsafe has been called" + return javaVersion.map(version -> version >= 23).orElse(true); + } + + private static Optional getJavaVersion() { + String specVersion = System.getProperty("java.specification.version"); + if (specVersion != null) { + try { + return Optional.of(Double.parseDouble(specVersion)); + } catch (NumberFormatException exception) { + // ignore + } + } + return Optional.empty(); + } + + private StringEncoderHolder() {} +} diff --git a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/UnsafeAccess.java b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/UnsafeAccess.java deleted file mode 100644 index 3851e05d23b..00000000000 --- a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/UnsafeAccess.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright The OpenTelemetry Authors - * SPDX-License-Identifier: Apache-2.0 - */ - -package io.opentelemetry.exporter.internal.marshal; - -import io.opentelemetry.api.internal.ConfigUtil; -import java.lang.reflect.Field; -import sun.misc.Unsafe; - -class UnsafeAccess { - private static final int MAX_ENABLED_JAVA_VERSION = 22; - private static final boolean available = checkUnsafe(); - - static boolean isAvailable() { - return available; - } - - private static boolean checkUnsafe() { - double javaVersion = getJavaVersion(); - boolean unsafeEnabled = - Boolean.parseBoolean( - ConfigUtil.getString( - "otel.java.experimental.exporter.unsafe.enabled", - javaVersion != -1 && javaVersion <= MAX_ENABLED_JAVA_VERSION ? "true" : "false")); - if (!unsafeEnabled) { - return false; - } - - try { - Class.forName("sun.misc.Unsafe", false, UnsafeAccess.class.getClassLoader()); - return UnsafeHolder.UNSAFE != null; - } catch (ClassNotFoundException e) { - return false; - } - } - - private static double getJavaVersion() { - String specVersion = System.getProperty("java.specification.version"); - if (specVersion != null) { - try { - return Double.parseDouble(specVersion); - } catch (NumberFormatException exception) { - // ignore - } - } - return -1; - } - - static long objectFieldOffset(Field field) { - return UnsafeHolder.UNSAFE.objectFieldOffset(field); - } - - static Object getObject(Object object, long offset) { - return UnsafeHolder.UNSAFE.getObject(object, offset); - } - - static byte getByte(Object object, long offset) { - return UnsafeHolder.UNSAFE.getByte(object, offset); - } - - static int arrayBaseOffset(Class arrayClass) { - return UnsafeHolder.UNSAFE.arrayBaseOffset(arrayClass); - } - - static long getLong(Object o, long offset) { - return UnsafeHolder.UNSAFE.getLong(o, offset); - } - - private UnsafeAccess() {} - - private static class UnsafeHolder { - private static final Unsafe UNSAFE; - - static { - UNSAFE = getUnsafe(); - } - - private UnsafeHolder() {} - - @SuppressWarnings("NullAway") - private static Unsafe getUnsafe() { - try { - Field field = Unsafe.class.getDeclaredField("theUnsafe"); - field.setAccessible(true); - return (Unsafe) field.get(null); - } catch (Exception ignored) { - return null; - } - } - } -} diff --git a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/UnsafeString.java b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/UnsafeString.java deleted file mode 100644 index 309b005fd49..00000000000 --- a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/UnsafeString.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright The OpenTelemetry Authors - * SPDX-License-Identifier: Apache-2.0 - */ - -package io.opentelemetry.exporter.internal.marshal; - -import java.lang.reflect.Field; - -class UnsafeString { - private static final long valueOffset = getStringFieldOffset("value", byte[].class); - private static final long coderOffset = getStringFieldOffset("coder", byte.class); - private static final int byteArrayBaseOffset = - UnsafeAccess.isAvailable() ? UnsafeAccess.arrayBaseOffset(byte[].class) : -1; - private static final boolean available = valueOffset != -1 && coderOffset != -1; - - static boolean isAvailable() { - return available; - } - - static boolean isLatin1(String string) { - // 0 represents latin1, 1 utf16 - return UnsafeAccess.getByte(string, coderOffset) == 0; - } - - static byte[] getBytes(String string) { - return (byte[]) UnsafeAccess.getObject(string, valueOffset); - } - - static long getLong(byte[] bytes, int index) { - return UnsafeAccess.getLong(bytes, byteArrayBaseOffset + index); - } - - private static long getStringFieldOffset(String fieldName, Class expectedType) { - if (!UnsafeAccess.isAvailable()) { - return -1; - } - - try { - Field field = String.class.getDeclaredField(fieldName); - if (field.getType() != expectedType) { - return -1; - } - return UnsafeAccess.objectFieldOffset(field); - } catch (Exception exception) { - return -1; - } - } - - private UnsafeString() {} -} diff --git a/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/UnsafeStringEncoder.java b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/UnsafeStringEncoder.java new file mode 100644 index 00000000000..792d78a530b --- /dev/null +++ b/exporters/common/src/main/java/io/opentelemetry/exporter/internal/marshal/UnsafeStringEncoder.java @@ -0,0 +1,105 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.exporter.internal.marshal; + +import java.lang.reflect.Field; +import javax.annotation.Nullable; +import sun.misc.Unsafe; + +/** + * StringEncoder implementation using sun.misc.Unsafe for high performance on Java 8+. + * + *

This implementation provides optimized string operations by directly accessing String internal + * fields using Unsafe operations. It's only created if Unsafe is available and all required field + * offsets can be resolved. + * + *

This class is internal and is hence not for public use. Its APIs are unstable and can change + * at any time. + */ +final class UnsafeStringEncoder extends AbstractStringEncoder { + + // Field offsets for direct memory access + private final long valueOffset; + private final long coderOffset; + private final long byteArrayBaseOffset; + + private UnsafeStringEncoder(long valueOffset, long coderOffset, long byteArrayBaseOffset) { + this.valueOffset = valueOffset; + this.coderOffset = coderOffset; + this.byteArrayBaseOffset = byteArrayBaseOffset; + } + + @Nullable + public static UnsafeStringEncoder createIfAvailable() { + if (UnsafeHolder.UNSAFE == null) { + return null; + } + + long valueOffset = getStringFieldOffset("value", byte[].class); + long coderOffset = getStringFieldOffset("coder", byte.class); // this only exists in Java 9+ + + if (valueOffset == -1 || coderOffset == -1) { + return null; + } + + long byteArrayBaseOffset = UnsafeHolder.UNSAFE.arrayBaseOffset(byte[].class); + + return new UnsafeStringEncoder(valueOffset, coderOffset, byteArrayBaseOffset); + } + + @Override + protected byte[] getStringBytes(String string) { + return (byte[]) UnsafeHolder.UNSAFE.getObject(string, valueOffset); + } + + @Override + protected boolean isLatin1(String string) { + return UnsafeHolder.UNSAFE.getByte(string, coderOffset) == 0; + } + + @Override + protected long getLong(byte[] bytes, int offset) { + return UnsafeHolder.UNSAFE.getLong(bytes, byteArrayBaseOffset + offset); + } + + private static long getStringFieldOffset(String fieldName, Class expectedType) { + try { + if (UnsafeHolder.UNSAFE == null) { + return -1; + } + + Field field = String.class.getDeclaredField(fieldName); + if (!expectedType.isAssignableFrom(field.getType())) { + return -1; + } + return UnsafeHolder.UNSAFE.objectFieldOffset(field); + } catch (Exception exception) { + return -1; + } + } + + /** Holder class for lazy initialization of Unsafe. */ + private static final class UnsafeHolder { + private static final Unsafe UNSAFE; + + static { + UNSAFE = getUnsafe(); + } + + private UnsafeHolder() {} + + @SuppressWarnings("NullAway") + private static Unsafe getUnsafe() { + try { + Field field = Unsafe.class.getDeclaredField("theUnsafe"); + field.setAccessible(true); + return (Unsafe) field.get(null); + } catch (Throwable t) { + return null; + } + } + } +} diff --git a/exporters/common/src/main/java9/io/opentelemetry/exporter/internal/marshal/VarHandleStringEncoder.java b/exporters/common/src/main/java9/io/opentelemetry/exporter/internal/marshal/VarHandleStringEncoder.java new file mode 100644 index 00000000000..c2f27aa3628 --- /dev/null +++ b/exporters/common/src/main/java9/io/opentelemetry/exporter/internal/marshal/VarHandleStringEncoder.java @@ -0,0 +1,78 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.exporter.internal.marshal; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.lang.reflect.Field; +import java.nio.ByteOrder; +import javax.annotation.Nullable; + +/** + * StringEncoder implementation using VarHandle for high performance on Java 9+. + * + *

This implementation provides optimized string operations by directly accessing String internal + * fields using VarHandle operations. It's only created if VarHandle is available and all required + * handles can be resolved. + * + *

This class is internal and is hence not for public use. Its APIs are unstable and can change + * at any time. + */ +final class VarHandleStringEncoder extends AbstractStringEncoder { + + private final VarHandle valueHandle; + private final VarHandle coderHandle; + private static final VarHandle LONG_ARRAY_HANDLE = + MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN); + + private VarHandleStringEncoder(VarHandle valueHandle, VarHandle coderHandle) { + this.valueHandle = valueHandle; + this.coderHandle = coderHandle; + } + + @Nullable + public static VarHandleStringEncoder createIfAvailable() { + VarHandle valueHandle1 = getStringFieldHandle("value", byte[].class); + VarHandle coderHandle1 = getStringFieldHandle("coder", byte.class); + + if (valueHandle1 == null || coderHandle1 == null) { + return null; + } + + return new VarHandleStringEncoder(valueHandle1, coderHandle1); + } + + @Override + protected byte[] getStringBytes(String string) { + return (byte[]) valueHandle.get(string); + } + + @Override + protected boolean isLatin1(String string) { + return ((byte) coderHandle.get(string)) == 0; + } + + @Override + protected long getLong(byte[] bytes, int offset) { + return (long) LONG_ARRAY_HANDLE.get(bytes, offset); + } + + @Nullable + private static VarHandle getStringFieldHandle(String fieldName, Class expectedType) { + try { + Field field = String.class.getDeclaredField(fieldName); + if (!expectedType.isAssignableFrom(field.getType())) { + return null; + } + + MethodHandles.Lookup lookup = + MethodHandles.privateLookupIn(String.class, MethodHandles.lookup()); + return lookup.findVarHandle(String.class, fieldName, expectedType); + } catch (Exception exception) { + return null; + } + } +} diff --git a/exporters/common/src/test/java/io/opentelemetry/exporter/internal/marshal/StatelessMarshalerUtilFuzzTest.java b/exporters/common/src/test/java/io/opentelemetry/exporter/internal/marshal/StatelessMarshalerUtilFuzzTest.java deleted file mode 100644 index d8db6d2ed6c..00000000000 --- a/exporters/common/src/test/java/io/opentelemetry/exporter/internal/marshal/StatelessMarshalerUtilFuzzTest.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright The OpenTelemetry Authors - * SPDX-License-Identifier: Apache-2.0 - */ - -package io.opentelemetry.exporter.internal.marshal; - -import static io.opentelemetry.exporter.internal.marshal.StatelessMarshalerUtil.getUtf8Size; -import static io.opentelemetry.exporter.internal.marshal.StatelessMarshalerUtilTest.testUtf8; -import static org.assertj.core.api.Assertions.assertThat; - -import edu.berkeley.cs.jqf.fuzz.Fuzz; -import edu.berkeley.cs.jqf.fuzz.JQF; -import edu.berkeley.cs.jqf.fuzz.junit.GuidedFuzzing; -import edu.berkeley.cs.jqf.fuzz.random.NoGuidance; -import java.nio.charset.StandardCharsets; -import org.junit.jupiter.api.Test; -import org.junit.runner.Result; -import org.junit.runner.RunWith; - -@SuppressWarnings("SystemOut") -class StatelessMarshalerUtilFuzzTest { - - @RunWith(JQF.class) - public static class EncodeUf8 { - - @Fuzz - public void encodeRandomString(String value) { - int utf8Size = value.getBytes(StandardCharsets.UTF_8).length; - assertThat(getUtf8Size(value, false)).isEqualTo(utf8Size); - assertThat(getUtf8Size(value, true)).isEqualTo(utf8Size); - assertThat(testUtf8(value, utf8Size, /* useUnsafe= */ false)).isEqualTo(value); - assertThat(testUtf8(value, utf8Size, /* useUnsafe= */ true)).isEqualTo(value); - } - } - - // driver methods to avoid having to use the vintage junit engine, and to enable increasing the - // number of iterations: - - @Test - void encodeUf8WithFuzzing() { - Result result = - GuidedFuzzing.run( - EncodeUf8.class, "encodeRandomString", new NoGuidance(10000, System.out), System.out); - assertThat(result.wasSuccessful()).isTrue(); - } -} diff --git a/exporters/common/src/test/java/io/opentelemetry/exporter/internal/marshal/StatelessMarshalerUtilTest.java b/exporters/common/src/test/java/io/opentelemetry/exporter/internal/marshal/StatelessMarshalerUtilTest.java deleted file mode 100644 index c4eec43572d..00000000000 --- a/exporters/common/src/test/java/io/opentelemetry/exporter/internal/marshal/StatelessMarshalerUtilTest.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright The OpenTelemetry Authors - * SPDX-License-Identifier: Apache-2.0 - */ - -package io.opentelemetry.exporter.internal.marshal; - -import static io.opentelemetry.exporter.internal.marshal.StatelessMarshalerUtil.getUtf8Size; -import static io.opentelemetry.exporter.internal.marshal.StatelessMarshalerUtil.writeUtf8; -import static org.assertj.core.api.Assertions.assertThat; - -import java.io.ByteArrayOutputStream; -import java.nio.charset.StandardCharsets; -import java.util.Random; -import org.junit.jupiter.api.RepeatedTest; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; - -class StatelessMarshalerUtilTest { - - @ParameterizedTest - @ValueSource(strings = {"true", "false"}) - @SuppressWarnings("AvoidEscapedUnicodeCharacters") - void encodeUtf8(boolean useUnsafe) { - assertThat(getUtf8Size("", useUnsafe)).isEqualTo(0); - assertThat(testUtf8("", 0, useUnsafe)).isEqualTo(""); - - assertThat(getUtf8Size("a", useUnsafe)).isEqualTo(1); - assertThat(testUtf8("a", 1, useUnsafe)).isEqualTo("a"); - - assertThat(getUtf8Size("©", useUnsafe)).isEqualTo(2); - assertThat(testUtf8("©", 2, useUnsafe)).isEqualTo("©"); - - assertThat(getUtf8Size("∆", useUnsafe)).isEqualTo(3); - assertThat(testUtf8("∆", 3, useUnsafe)).isEqualTo("∆"); - - assertThat(getUtf8Size("😀", useUnsafe)).isEqualTo(4); - assertThat(testUtf8("😀", 4, useUnsafe)).isEqualTo("😀"); - - // test that invalid characters are replaced with ? - assertThat(getUtf8Size("\uD83D😀\uDE00", useUnsafe)).isEqualTo(6); - assertThat(testUtf8("\uD83D😀\uDE00", 6, useUnsafe)).isEqualTo("?😀?"); - - // the same invalid sequence as encoded by the jdk - byte[] bytes = "\uD83D😀\uDE00".getBytes(StandardCharsets.UTF_8); - assertThat(bytes.length).isEqualTo(6); - assertThat(new String(bytes, StandardCharsets.UTF_8)).isEqualTo("?😀?"); - } - - @RepeatedTest(1000) - void testUtf8SizeLatin1() { - Random random = new Random(); - byte[] bytes = new byte[15001]; - random.nextBytes(bytes); - String string = new String(bytes, StandardCharsets.ISO_8859_1); - int utf8Size = string.getBytes(StandardCharsets.UTF_8).length; - assertThat(getUtf8Size(string, true)).isEqualTo(utf8Size); - } - - static String testUtf8(String string, int utf8Length, boolean useUnsafe) { - try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { - CodedOutputStream codedOutputStream = CodedOutputStream.newInstance(outputStream); - writeUtf8(codedOutputStream, string, utf8Length, useUnsafe); - codedOutputStream.flush(); - return new String(outputStream.toByteArray(), StandardCharsets.UTF_8); - } catch (Exception exception) { - throw new IllegalArgumentException(exception); - } - } -} diff --git a/exporters/common/src/test/java/io/opentelemetry/exporter/internal/marshal/StringEncoderFuzzTest.java b/exporters/common/src/test/java/io/opentelemetry/exporter/internal/marshal/StringEncoderFuzzTest.java new file mode 100644 index 00000000000..3da35b6b3b8 --- /dev/null +++ b/exporters/common/src/test/java/io/opentelemetry/exporter/internal/marshal/StringEncoderFuzzTest.java @@ -0,0 +1,94 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.exporter.internal.marshal; + +import static io.opentelemetry.exporter.internal.marshal.StringEncoderTest.testUtf8; +import static org.assertj.core.api.Assertions.assertThat; + +import edu.berkeley.cs.jqf.fuzz.Fuzz; +import edu.berkeley.cs.jqf.fuzz.JQF; +import edu.berkeley.cs.jqf.fuzz.junit.GuidedFuzzing; +import edu.berkeley.cs.jqf.fuzz.random.NoGuidance; +import java.nio.charset.StandardCharsets; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledOnJre; +import org.junit.jupiter.api.condition.JRE; +import org.junit.runner.Result; +import org.junit.runner.RunWith; + +@SuppressWarnings("SystemOut") +class StringEncoderFuzzTest { + + private static final StringEncoder fallbackStringEncoder = + StringEncoderHolder.createFallbackEncoder(); + private static final StringEncoder unsafeStringEncoder = + StringEncoderHolder.createUnsafeEncoder(); + private static final StringEncoder varHandleStringEncoder = + StringEncoderHolder.createVarHandleEncoder(); + + @RunWith(JQF.class) + public static class EncodeUf8 { + + @Fuzz + public void encodeRandomString_Fallback(String value) { + assertThat(fallbackStringEncoder).isNotNull(); + int utf8Size = value.getBytes(StandardCharsets.UTF_8).length; + assertThat(fallbackStringEncoder.getUtf8Size(value)).isEqualTo(utf8Size); + assertThat(testUtf8(value, utf8Size, fallbackStringEncoder)).isEqualTo(value); + } + + @Fuzz + public void encodeRandomString_Unsafe(String value) { + assertThat(unsafeStringEncoder).isNotNull(); + int utf8Size = value.getBytes(StandardCharsets.UTF_8).length; + assertThat(unsafeStringEncoder.getUtf8Size(value)).isEqualTo(utf8Size); + assertThat(testUtf8(value, utf8Size, unsafeStringEncoder)).isEqualTo(value); + } + + @Fuzz + public void encodeRandomString_VarHandle(String value) { + assertThat(varHandleStringEncoder).isNotNull(); + int utf8Size = value.getBytes(StandardCharsets.UTF_8).length; + assertThat(varHandleStringEncoder.getUtf8Size(value)).isEqualTo(utf8Size); + assertThat(testUtf8(value, utf8Size, varHandleStringEncoder)).isEqualTo(value); + } + } + + @Test + void encodeUf8WithFuzzing_Fallback() { + Result result = + GuidedFuzzing.run( + EncodeUf8.class, + "encodeRandomString_Fallback", + new NoGuidance(10000, System.out), + System.out); + assertThat(result.wasSuccessful()).isTrue(); + } + + @Test + @DisabledOnJre(JRE.JAVA_8) + void encodeUf8WithFuzzing_Unsafe() { + Result result = + GuidedFuzzing.run( + EncodeUf8.class, + "encodeRandomString_Unsafe", + new NoGuidance(10000, System.out), + System.out); + assertThat(result.wasSuccessful()).isTrue(); + } + + @Test + @DisabledOnJre(JRE.JAVA_8) + void encodeUf8WithFuzzing_VarHandle() { + Result result = + GuidedFuzzing.run( + EncodeUf8.class, + "encodeRandomString_VarHandle", + new NoGuidance(10000, System.out), + System.out); + assertThat(result.wasSuccessful()).isTrue(); + } +} diff --git a/exporters/common/src/test/java/io/opentelemetry/exporter/internal/marshal/StringEncoderTest.java b/exporters/common/src/test/java/io/opentelemetry/exporter/internal/marshal/StringEncoderTest.java new file mode 100644 index 00000000000..64fcf8f853e --- /dev/null +++ b/exporters/common/src/test/java/io/opentelemetry/exporter/internal/marshal/StringEncoderTest.java @@ -0,0 +1,115 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.exporter.internal.marshal; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.ByteArrayOutputStream; +import java.nio.charset.StandardCharsets; +import java.util.Random; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledOnJre; +import org.junit.jupiter.api.condition.JRE; + +class StringEncoderTest { + + private static final StringEncoder fallbackStringEncoder = + StringEncoderHolder.createFallbackEncoder(); + private static final StringEncoder unsafeStringEncoder = + StringEncoderHolder.createUnsafeEncoder(); + private static final StringEncoder varHandleStringEncoder = + StringEncoderHolder.createVarHandleEncoder(); + + @Test + void testUtf8Encoding_Fallback() { + testUtf8Encoding(fallbackStringEncoder); + } + + @Test + void testUtf8SizeLatin1_Fallback() { + testUtf8SizeLatin1(fallbackStringEncoder); + } + + @Test + @DisabledOnJre(JRE.JAVA_8) + void testUtf8Encoding_Unsafe() { + testUtf8Encoding(unsafeStringEncoder); + } + + @Test + @DisabledOnJre(JRE.JAVA_8) + void testUtf8SizeLatin1_Unsafe() { + testUtf8SizeLatin1(unsafeStringEncoder); + } + + @Test + @DisabledOnJre(JRE.JAVA_8) + void testUtf8Encoding_VarHandle() { + testUtf8Encoding(varHandleStringEncoder); + } + + @Test + @DisabledOnJre(JRE.JAVA_8) + void testUtf8SizeLatin1_VarHandle() { + testUtf8SizeLatin1(varHandleStringEncoder); + } + + @SuppressWarnings("AvoidEscapedUnicodeCharacters") + private static void testUtf8Encoding(StringEncoder stringEncoder) { + assertThat(stringEncoder).isNotNull(); + + assertThat(stringEncoder.getUtf8Size("")).isEqualTo(0); + assertThat(testUtf8("", 0, stringEncoder)).isEqualTo(""); + + assertThat(stringEncoder.getUtf8Size("a")).isEqualTo(1); + assertThat(testUtf8("a", 1, stringEncoder)).isEqualTo("a"); + + assertThat(stringEncoder.getUtf8Size("©")).isEqualTo(2); + assertThat(testUtf8("©", 2, stringEncoder)).isEqualTo("©"); + + assertThat(stringEncoder.getUtf8Size("∆")).isEqualTo(3); + assertThat(testUtf8("∆", 3, stringEncoder)).isEqualTo("∆"); + + assertThat(stringEncoder.getUtf8Size("😀")).isEqualTo(4); + assertThat(testUtf8("😀", 4, stringEncoder)).isEqualTo("😀"); + + // test that invalid characters are replaced with ? + assertThat(stringEncoder.getUtf8Size("\uD83D😀\uDE00")).isEqualTo(6); + assertThat(testUtf8("\uD83D😀\uDE00", 6, stringEncoder)).isEqualTo("?😀?"); + + // the same invalid sequence as encoded by the jdk + byte[] bytes = "\uD83D😀\uDE00".getBytes(StandardCharsets.UTF_8); + assertThat(bytes.length).isEqualTo(6); + assertThat(new String(bytes, StandardCharsets.UTF_8)).isEqualTo("?😀?"); + } + + private static void testUtf8SizeLatin1(StringEncoder stringEncoder) { + // Run repeated test logic for each encoder + Random random = new Random(); + for (int i = 0; i < 1000; i++) { + byte[] bytes = new byte[15001]; + random.nextBytes(bytes); + String string = new String(bytes, StandardCharsets.ISO_8859_1); + int utf8Size = string.getBytes(StandardCharsets.UTF_8).length; + assertThat(stringEncoder.getUtf8Size(string)).isEqualTo(utf8Size); + } + } + + static String testUtf8(String string, int utf8Length) { + return testUtf8(string, utf8Length, StringEncoder.getInstance()); + } + + static String testUtf8(String string, int utf8Length, StringEncoder stringEncoder) { + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { + CodedOutputStream codedOutputStream = CodedOutputStream.newInstance(outputStream); + stringEncoder.writeUtf8(codedOutputStream, string, utf8Length); + codedOutputStream.flush(); + return new String(outputStream.toByteArray(), StandardCharsets.UTF_8); + } catch (Exception exception) { + throw new IllegalArgumentException(exception); + } + } +} diff --git a/exporters/common/src/testWithoutUnsafe/java/io/opentelemetry/exporter/internal/marshal/StatelessMarshalerUtilTest.java b/exporters/common/src/testWithoutUnsafe/java/io/opentelemetry/exporter/internal/marshal/StringEncoderWithoutUnsafeTest.java similarity index 90% rename from exporters/common/src/testWithoutUnsafe/java/io/opentelemetry/exporter/internal/marshal/StatelessMarshalerUtilTest.java rename to exporters/common/src/testWithoutUnsafe/java/io/opentelemetry/exporter/internal/marshal/StringEncoderWithoutUnsafeTest.java index 8ff3ec2e04d..f93cad83cb9 100644 --- a/exporters/common/src/testWithoutUnsafe/java/io/opentelemetry/exporter/internal/marshal/StatelessMarshalerUtilTest.java +++ b/exporters/common/src/testWithoutUnsafe/java/io/opentelemetry/exporter/internal/marshal/StringEncoderWithoutUnsafeTest.java @@ -5,8 +5,6 @@ package io.opentelemetry.exporter.internal.marshal; -import static io.opentelemetry.exporter.internal.marshal.StatelessMarshalerUtil.getUtf8Size; -import static io.opentelemetry.exporter.internal.marshal.StatelessMarshalerUtil.writeUtf8; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -16,7 +14,7 @@ import java.nio.charset.StandardCharsets; import org.junit.jupiter.api.Test; -class StatelessMarshalerUtilTest { +class StringEncoderWithoutUnsafeTest { // Simulate running in an environment without sun.misc.Unsafe e.g. when running a modular // application. To use sun.misc.Unsafe in modular application user would need to add dependency to @@ -24,7 +22,7 @@ class StatelessMarshalerUtilTest { // class loader that does not delegate loading sun.misc classes to make sun.misc.Unsafe // unavailable. @Test - void encodeUtf8WithoutUnsafe() throws Exception { + void utf8EncodingWithoutUnsafe() throws Exception { ClassLoader testClassLoader = new ClassLoader(this.getClass().getClassLoader()) { @Override @@ -83,14 +81,14 @@ public void run() { assertThatThrownBy(() -> Class.forName("sun.misc.Unsafe")) .isInstanceOf(ClassNotFoundException.class); // test the methods that use unsafe - assertThat(getUtf8Size("a", true)).isEqualTo(1); + assertThat(StringEncoder.getInstance().getUtf8Size("a")).isEqualTo(1); assertThat(testUtf8("a", 0)).isEqualTo("a"); } static String testUtf8(String string, int utf8Length) { try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { CodedOutputStream codedOutputStream = CodedOutputStream.newInstance(outputStream); - writeUtf8(codedOutputStream, string, utf8Length, true); + StringEncoder.getInstance().writeUtf8(codedOutputStream, string, utf8Length); codedOutputStream.flush(); return new String(outputStream.toByteArray(), StandardCharsets.UTF_8); } catch (Exception exception) { diff --git a/exporters/otlp/common/build.gradle.kts b/exporters/otlp/common/build.gradle.kts index 5ad623561b2..9f97c602ca9 100644 --- a/exporters/otlp/common/build.gradle.kts +++ b/exporters/otlp/common/build.gradle.kts @@ -69,3 +69,11 @@ wire { schemaHandlerFactoryClass = "io.opentelemetry.gradle.ProtoFieldsWireHandlerFactory" } } + +// Configure JMH jar to preserve multi-release jar attribute +// so StringMarshalBenchmark can use VarHandleStringEncoder +tasks.named("jmhJar") { + manifest { + attributes["Multi-Release"] = "true" + } +} diff --git a/exporters/otlp/common/src/jmh/java/io/opentelemetry/exporter/internal/otlp/StringMarshalBenchmark.java b/exporters/otlp/common/src/jmh/java/io/opentelemetry/exporter/internal/otlp/StringMarshalBenchmark.java index 8bf2e2cdd09..9dfa8467638 100644 --- a/exporters/otlp/common/src/jmh/java/io/opentelemetry/exporter/internal/otlp/StringMarshalBenchmark.java +++ b/exporters/otlp/common/src/jmh/java/io/opentelemetry/exporter/internal/otlp/StringMarshalBenchmark.java @@ -8,6 +8,8 @@ import io.opentelemetry.exporter.internal.marshal.Marshaler; import io.opentelemetry.exporter.internal.marshal.MarshalerContext; import io.opentelemetry.exporter.internal.marshal.Serializer; +import io.opentelemetry.exporter.internal.marshal.StringEncoder; +import io.opentelemetry.exporter.internal.marshal.StringEncoderHolder; import java.io.IOException; import java.util.concurrent.TimeUnit; import org.openjdk.jmh.annotations.Benchmark; @@ -16,6 +18,10 @@ import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.Threads; import org.openjdk.jmh.annotations.Warmup; @@ -24,11 +30,43 @@ @Warmup(iterations = 5, time = 1) @Measurement(iterations = 10, time = 1) @Fork(1) +@State(Scope.Benchmark) public class StringMarshalBenchmark { - private static final TestMarshaler MARSHALER_SAFE = new TestMarshaler(/* useUnsafe= */ false); - private static final TestMarshaler MARSHALER_UNSAFE = new TestMarshaler(/* useUnsafe= */ true); + + @Param({"FallbackStringEncoder", "UnsafeStringEncoder", "VarHandleStringEncoder"}) + private String encoderImplementation; + + private TestMarshaler marshaler; private static final TestOutputStream OUTPUT = new TestOutputStream(); + @Setup + public void setup() { + StringEncoder encoder; + switch (encoderImplementation) { + case "FallbackStringEncoder": + encoder = StringEncoderHolder.createFallbackEncoder(); + break; + case "UnsafeStringEncoder": + encoder = StringEncoderHolder.createUnsafeEncoder(); + if (encoder == null) { + throw new IllegalStateException( + "UnsafeStringEncoder is not available (requires Java 9+)"); + } + break; + case "VarHandleStringEncoder": + encoder = StringEncoderHolder.createVarHandleEncoder(); + if (encoder == null) { + throw new IllegalStateException( + "VarHandleStringEncoder is not available (requires Java 9+" + + " and -jvmArgs=\"--add-opens=java.base/java.lang=ALL-UNNAMED\""); + } + break; + default: + throw new IllegalStateException("Unknown encoder implementation: " + encoderImplementation); + } + marshaler = new TestMarshaler(encoder); + } + @Benchmark @Threads(1) public int marshalAsciiStringStateful(StringMarshalState state) throws IOException { @@ -56,38 +94,20 @@ private static int marshalStateful(String string) throws IOException { @Benchmark @Threads(1) - public int marshalAsciiStringStatelessSafe(StringMarshalState state) throws IOException { - return marshalStateless(MARSHALER_SAFE, state.asciiString); - } - - @Benchmark - @Threads(1) - public int marshalAsciiStringStatelessUnsafe(StringMarshalState state) throws IOException { - return marshalStateless(MARSHALER_UNSAFE, state.asciiString); - } - - @Benchmark - @Threads(1) - public int marshalLatin1StringStatelessSafe(StringMarshalState state) throws IOException { - return marshalStateless(MARSHALER_SAFE, state.latin1String); - } - - @Benchmark - @Threads(1) - public int marshalLatin1StringStatelessUnsafe(StringMarshalState state) throws IOException { - return marshalStateless(MARSHALER_UNSAFE, state.latin1String); + public int marshalAsciiStringStateless(StringMarshalState state) throws IOException { + return marshalStateless(marshaler, state.asciiString); } @Benchmark @Threads(1) - public int marshalUnicodeStringStatelessSafe(StringMarshalState state) throws IOException { - return marshalStateless(MARSHALER_SAFE, state.unicodeString); + public int marshalLatin1StringStateless(StringMarshalState state) throws IOException { + return marshalStateless(marshaler, state.latin1String); } @Benchmark @Threads(1) - public int marshalUnicodeStringStatelessUnsafe(StringMarshalState state) throws IOException { - return marshalStateless(MARSHALER_UNSAFE, state.unicodeString); + public int marshalUnicodeStringStateless(StringMarshalState state) throws IOException { + return marshalStateless(marshaler, state.unicodeString); } private static int marshalStateless(TestMarshaler marshaler, String string) throws IOException { @@ -106,8 +126,8 @@ private static class TestMarshaler extends Marshaler { private int size; private String value; - TestMarshaler(boolean useUnsafe) { - context = new MarshalerContext(/* marshalStringNoAllocation= */ true, useUnsafe); + TestMarshaler(StringEncoder encoder) { + context = new MarshalerContext(/* marshalStringNoAllocation= */ true, encoder); } private void initialize(String string) {