From 89738a6169b71384648c46db8dc1f9d302bb575a Mon Sep 17 00:00:00 2001 From: Otmar Ertl Date: Mon, 25 Nov 2024 07:26:24 +0100 Subject: [PATCH 1/3] prepare for release 0.19.0, updated README --- README.md | 8 +++---- build.gradle | 24 ++++++++++++++++--- .../hash4j/consistent/ConsistentHashing.java | 3 ++- .../hash4j/hashing/AbstractHasherTest.java | 2 -- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 7c4385ed..9287daea 100644 --- a/README.md +++ b/README.md @@ -26,12 +26,12 @@ To add a dependency on hash4j using Maven, use the following: com.dynatrace.hash4j hash4j - 0.18.0 + 0.19.0 ``` To add a dependency using Gradle: ```gradle -implementation 'com.dynatrace.hash4j:hash4j:0.18.0' +implementation 'com.dynatrace.hash4j:hash4j:0.19.0' ``` ## Hash algorithms @@ -134,7 +134,7 @@ In case of non-distributed data streams, the [martingale estimator](src/main/jav can be used, which gives slightly better estimation results as the asymptotic storage factor is $6\ln 2 = 4.159$. This gives a relative standard error of $\sqrt{\frac{6\ln 2}{6m}} = \frac{0.833}{\sqrt{m}}$. The theoretically predicted estimation errors have been empirically confirmed by [simulation results](doc/hyperloglog-estimation-error.md). -* UltraLogLog: This algorithm is described in detail in this [paper](https://arxiv.org/abs/2308.16862). +* UltraLogLog: This algorithm is described in detail in this [paper](https://doi.org/10.14778/3654621.3654632). Like for HyperLogLog, a precision parameter $p$ defines the number of registers $m = 2^p$. However, since UltraLogLog uses 8-bit registers to enable fast random accesses and updates of the registers, $m$ is also the state size in bytes. @@ -211,7 +211,7 @@ The following consistent hashing algorithms are available: * [Improved Consistent Weighted Sampling](https://doi.org/10.1109/ICDM.2010.80): This algorithm is based on improved consistent weighted sampling with a constant computation time independent of the number of buckets. This algorithm is faster than JumpHash for a large number of buckets. -* [JumpBackHash](https://arxiv.org/abs/2403.18682): In contrast to JumpHash, which traverses "active indices" (see [here](https://doi.org/10.1109/ICDM.2010.80) for a definition) +* [JumpBackHash](https://doi.org/10.1002/spe.3385): In contrast to JumpHash, which traverses "active indices" (see [here](https://doi.org/10.1109/ICDM.2010.80) for a definition) in ascending order, JumpBackHash does this in the opposite direction. In this way, floating-point operations can be completely avoided. Further optimizations minimize the number of random values that need to be generated to reach the largest "active index" within the given bucket range in amortized constant time. The largest "active index", diff --git a/build.gradle b/build.gradle index 4996d76d..5b4f98cb 100644 --- a/build.gradle +++ b/build.gradle @@ -38,8 +38,8 @@ java { toolchain { languageVersion = JavaLanguageVersion.of(21) } - withJavadocJar() withSourcesJar() + withJavadocJar() } @@ -103,7 +103,7 @@ tasks.test { } tasks.register("java21Test", Test) { - // compare https://github.com/melix/mrjar-gradle-plugin/blob/dac99aadd451e3c2176aa6e13af7ad39e20c2cb9/plugin/src/main/java/me/champeau/mrjar/MultiReleaseExtension.java group=LifecycleBasePlugin.VERIFICATION_GROUP + group = LifecycleBasePlugin.VERIFICATION_GROUP javaLauncher = javaToolchains.launcherFor { languageVersion = JavaLanguageVersion.of(21) } @@ -128,7 +128,7 @@ tasks.withType(JavaCompile).configureEach { } group = 'com.dynatrace.hash4j' -version = '0.18.0' +version = '0.19.0' static def readJavaLicense(licenseName) { @@ -192,7 +192,9 @@ spotless { java { importOrder() removeUnusedImports() + cleanthat() googleJavaFormat(googleJavaFormatVersion) + formatAnnotations() licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') targetExclude \ 'src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java',\ @@ -210,56 +212,72 @@ spotless { format 'javaImohash', JavaExtension, { importOrder() removeUnusedImports() + cleanthat() googleJavaFormat(googleJavaFormatVersion) + formatAnnotations() licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('MIT_IMOHASH') target 'src/main/java/com/dynatrace/hash4j/file/Imohash1_0_2.java' } format 'javaKomihash', JavaExtension, { importOrder() removeUnusedImports() + cleanthat() googleJavaFormat(googleJavaFormatVersion) + formatAnnotations() licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('MIT_KOMIHASH') target 'src/main/java/com/dynatrace/hash4j/hashing/Komihash4_3.java', 'src/main/java/com/dynatrace/hash4j/hashing/Komihash5_0.java', 'src/main/java/com/dynatrace/hash4j/hashing/AbstractKomihash.java' } format 'javaFarmHash', JavaExtension, { importOrder() removeUnusedImports() + cleanthat() googleJavaFormat(googleJavaFormatVersion) + formatAnnotations() licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('MIT_APACHE_2_0_FARMHASH') target 'src/main/java/com/dynatrace/hash4j/hashing/FarmHashNa.java','src/main/java/com/dynatrace/hash4j/hashing/FarmHashUo.java' } format 'javaPolymurHash', JavaExtension, { importOrder() removeUnusedImports() + cleanthat() googleJavaFormat(googleJavaFormatVersion) + formatAnnotations() licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('ZLIB_POLYMURHASH') target 'src/main/java/com/dynatrace/hash4j/hashing/PolymurHash2_0.java' } format 'javaSplitMix64', JavaExtension, { importOrder() removeUnusedImports() + cleanthat() googleJavaFormat(googleJavaFormatVersion) + formatAnnotations() licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('CREATIVE_COMMONS_SPLITMIX64') target 'src/main/java/com/dynatrace/hash4j/random/SplitMix64V1.java' } format 'javaExponential', JavaExtension, { importOrder() removeUnusedImports() + cleanthat() googleJavaFormat(googleJavaFormatVersion) + formatAnnotations() licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('BOOST_EXPONENTIAL_RANDOM_GENERATION') target 'src/main/java/com/dynatrace/hash4j/random/RandomExponentialUtil.java' } format 'javaConsistentJumpHash', JavaExtension, { importOrder() removeUnusedImports() + cleanthat() googleJavaFormat(googleJavaFormatVersion) + formatAnnotations() licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('APACHE_2_0_GUAVA') target 'src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java' } format 'javaXXH', JavaExtension, { importOrder() removeUnusedImports() + cleanthat() googleJavaFormat(googleJavaFormatVersion) + formatAnnotations() licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('APACHE_2_0_XXH') target 'src/main/java/com/dynatrace/hash4j/hashing/XXH3_64.java' } diff --git a/src/main/java/com/dynatrace/hash4j/consistent/ConsistentHashing.java b/src/main/java/com/dynatrace/hash4j/consistent/ConsistentHashing.java index b71f09e1..13302270 100644 --- a/src/main/java/com/dynatrace/hash4j/consistent/ConsistentHashing.java +++ b/src/main/java/com/dynatrace/hash4j/consistent/ConsistentHashing.java @@ -63,7 +63,8 @@ public static ConsistentBucketHasher improvedConsistentWeightedSampling( * *

In contrast to other algorithms, JumpBackHash runs in constant time and does not require * floating-point operations. On some machines it may achieve similar performance as a modulo - * operation. + * operation. See Otmar Ertl, "JumpBackHash: Say Goodbye to the Modulo Operation to Distribute + * Keys Uniformly to Buckets", 10.1002/spe.3385. * * @param pseudoRandomGeneratorProvider a {@link PseudoRandomGeneratorProvider} * @return a {@link ConsistentBucketHasher} diff --git a/src/test/java/com/dynatrace/hash4j/hashing/AbstractHasherTest.java b/src/test/java/com/dynatrace/hash4j/hashing/AbstractHasherTest.java index 3e353d70..d8563f5d 100644 --- a/src/test/java/com/dynatrace/hash4j/hashing/AbstractHasherTest.java +++ b/src/test/java/com/dynatrace/hash4j/hashing/AbstractHasherTest.java @@ -36,7 +36,6 @@ import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Supplier; -import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; @@ -949,7 +948,6 @@ public char charAt(int index) { return AbstractHasher.getChar(buffer, (index & (NUM_CHARS_IN_BUFFER - 1)) << 1); } - @NotNull @Override public CharSequence subSequence(int start, int end) { throw new UnsupportedOperationException(); From e9a08a520d152482fa869e469836a4fd5633e1a0 Mon Sep 17 00:00:00 2001 From: Otmar Ertl Date: Mon, 25 Nov 2024 10:22:10 +0100 Subject: [PATCH 2/3] fixed line endings, simplified special license handling in build.gradle --- .gitattributes | 13 ++--- build.gradle | 126 +++++++++++++++++-------------------------------- 2 files changed, 48 insertions(+), 91 deletions(-) diff --git a/.gitattributes b/.gitattributes index 36b780ad..5947b126 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,13 +1,10 @@ * text=auto - -*.java text -*.py text -*.md text +*.java text eol=lf +*.py text eol=lf +*.md text eol=lf *.csv text - -*.bat text eol=crlf - +gradlew.bat text eol=crlf gradlew text eol=lf +*.gradle eol=lf *.sh text eol=lf - *.jar binary \ No newline at end of file diff --git a/build.gradle b/build.gradle index 5b4f98cb..dcb3d7bb 100644 --- a/build.gradle +++ b/build.gradle @@ -172,6 +172,35 @@ spotless { def eclipseCdtVersion = '11.6' def blackVersion = '24.10.0' def greclipseVersion = '4.32' + def specialLicenseHeaders = [ + new Tuple3('javaImohash', 'MIT_IMOHASH', [ + 'src/main/java/com/dynatrace/hash4j/file/Imohash1_0_2.java' + ]), + new Tuple3('javaKomihash', 'MIT_KOMIHASH' , [ + 'src/main/java/com/dynatrace/hash4j/hashing/Komihash4_3.java', + 'src/main/java/com/dynatrace/hash4j/hashing/Komihash5_0.java', + 'src/main/java/com/dynatrace/hash4j/hashing/AbstractKomihash.java' + ]), + new Tuple3('javaFarmHash', 'MIT_APACHE_2_0_FARMHASH',[ + 'src/main/java/com/dynatrace/hash4j/hashing/FarmHashNa.java', + 'src/main/java/com/dynatrace/hash4j/hashing/FarmHashUo.java' + ]), + new Tuple3('javaPolymurHash', 'ZLIB_POLYMURHASH',[ + 'src/main/java/com/dynatrace/hash4j/hashing/PolymurHash2_0.java' + ]), + new Tuple3('javaSplitMix64', 'CREATIVE_COMMONS_SPLITMIX64',[ + 'src/main/java/com/dynatrace/hash4j/random/SplitMix64V1.java' + ]), + new Tuple3('javaExponential', 'BOOST_EXPONENTIAL_RANDOM_GENERATION',[ + 'src/main/java/com/dynatrace/hash4j/random/RandomExponentialUtil.java' + ]), + new Tuple3('javaConsistentJumpHash', 'APACHE_2_0_GUAVA',[ + 'src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java' + ]), + new Tuple3('javaXXH', 'APACHE_2_0_XXH',[ + 'src/main/java/com/dynatrace/hash4j/hashing/XXH3_64.java' + ]) + ] ratchetFrom 'origin/main' apply plugin: 'groovy' @@ -196,90 +225,21 @@ spotless { googleJavaFormat(googleJavaFormatVersion) formatAnnotations() licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') - targetExclude \ - 'src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java',\ - 'src/main/java/com/dynatrace/hash4j/file/Imohash1_0_2.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/Komihash4_3.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/Komihash5_0.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/PolymurHash2_0.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/AbstractKomihash.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/FarmHashNa.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/FarmHashUo.java',\ - 'src/main/java/com/dynatrace/hash4j/random/SplitMix64V1.java',\ - 'src/main/java/com/dynatrace/hash4j/random/RandomExponentialUtil.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/XXH3_64.java' - } - format 'javaImohash', JavaExtension, { - importOrder() - removeUnusedImports() - cleanthat() - googleJavaFormat(googleJavaFormatVersion) - formatAnnotations() - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('MIT_IMOHASH') - target 'src/main/java/com/dynatrace/hash4j/file/Imohash1_0_2.java' - } - format 'javaKomihash', JavaExtension, { - importOrder() - removeUnusedImports() - cleanthat() - googleJavaFormat(googleJavaFormatVersion) - formatAnnotations() - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('MIT_KOMIHASH') - target 'src/main/java/com/dynatrace/hash4j/hashing/Komihash4_3.java', 'src/main/java/com/dynatrace/hash4j/hashing/Komihash5_0.java', 'src/main/java/com/dynatrace/hash4j/hashing/AbstractKomihash.java' - } - format 'javaFarmHash', JavaExtension, { - importOrder() - removeUnusedImports() - cleanthat() - googleJavaFormat(googleJavaFormatVersion) - formatAnnotations() - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('MIT_APACHE_2_0_FARMHASH') - target 'src/main/java/com/dynatrace/hash4j/hashing/FarmHashNa.java','src/main/java/com/dynatrace/hash4j/hashing/FarmHashUo.java' - } - format 'javaPolymurHash', JavaExtension, { - importOrder() - removeUnusedImports() - cleanthat() - googleJavaFormat(googleJavaFormatVersion) - formatAnnotations() - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('ZLIB_POLYMURHASH') - target 'src/main/java/com/dynatrace/hash4j/hashing/PolymurHash2_0.java' - } - format 'javaSplitMix64', JavaExtension, { - importOrder() - removeUnusedImports() - cleanthat() - googleJavaFormat(googleJavaFormatVersion) - formatAnnotations() - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('CREATIVE_COMMONS_SPLITMIX64') - target 'src/main/java/com/dynatrace/hash4j/random/SplitMix64V1.java' - } - format 'javaExponential', JavaExtension, { - importOrder() - removeUnusedImports() - cleanthat() - googleJavaFormat(googleJavaFormatVersion) - formatAnnotations() - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('BOOST_EXPONENTIAL_RANDOM_GENERATION') - target 'src/main/java/com/dynatrace/hash4j/random/RandomExponentialUtil.java' + targetExclude specialLicenseHeaders.collect {it.get(2)}.flatten() } - format 'javaConsistentJumpHash', JavaExtension, { - importOrder() - removeUnusedImports() - cleanthat() - googleJavaFormat(googleJavaFormatVersion) - formatAnnotations() - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('APACHE_2_0_GUAVA') - target 'src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java' - } - format 'javaXXH', JavaExtension, { - importOrder() - removeUnusedImports() - cleanthat() - googleJavaFormat(googleJavaFormatVersion) - formatAnnotations() - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('APACHE_2_0_XXH') - target 'src/main/java/com/dynatrace/hash4j/hashing/XXH3_64.java' + specialLicenseHeaders.forEach { + def formatName = it.get(0) + def licenseName = it.get(1) + def files = it.get(2) + format formatName, JavaExtension, { + importOrder() + removeUnusedImports() + cleanthat() + googleJavaFormat(googleJavaFormatVersion) + formatAnnotations() + licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense(licenseName) + target files + } } } From 222941359759f17334b4eb0e09f48e02721bf9e6 Mon Sep 17 00:00:00 2001 From: Otmar Ertl Date: Mon, 25 Nov 2024 11:11:22 +0100 Subject: [PATCH 3/3] added api changes from 0.17.0 -> 0.18.0 --- .palantir/revapi.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 349fe3a1..84ecff6b 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -55,6 +55,17 @@ acceptedBreaks: \ com.dynatrace.hash4j.distinctcount.DistinctCounter.Estimator>>>::reconstructHash(int)\ \ @ com.dynatrace.hash4j.distinctcount.UltraLogLog" justification: "removed non-public method" + "0.17.0": + com.dynatrace.hash4j:hash4j: + - code: "java.method.addedToInterface" + new: "method com.dynatrace.hash4j.hashing.HashStream128 com.dynatrace.hash4j.hashing.HashStream128::copy()" + justification: "{added copy method to HashStream}" + - code: "java.method.addedToInterface" + new: "method com.dynatrace.hash4j.hashing.HashStream32 com.dynatrace.hash4j.hashing.HashStream32::copy()" + justification: "{added copy method to HashStream}" + - code: "java.method.addedToInterface" + new: "method com.dynatrace.hash4j.hashing.HashStream64 com.dynatrace.hash4j.hashing.HashStream64::copy()" + justification: "{added copy method to HashStream}" "0.18.0": com.dynatrace.hash4j:hash4j: - code: "java.class.visibilityReduced"