diff --git a/.gitattributes b/.gitattributes index 36b780a..5947b12 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,13 +1,10 @@ * text=auto - -*.java text -*.py text -*.md text +*.java text eol=lf +*.py text eol=lf +*.md text eol=lf *.csv text - -*.bat text eol=crlf - +gradlew.bat text eol=crlf gradlew text eol=lf +*.gradle eol=lf *.sh text eol=lf - *.jar binary \ No newline at end of file diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 349fe3a..84ecff6 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -55,6 +55,17 @@ acceptedBreaks: \ com.dynatrace.hash4j.distinctcount.DistinctCounter.Estimator>>>::reconstructHash(int)\ \ @ com.dynatrace.hash4j.distinctcount.UltraLogLog" justification: "removed non-public method" + "0.17.0": + com.dynatrace.hash4j:hash4j: + - code: "java.method.addedToInterface" + new: "method com.dynatrace.hash4j.hashing.HashStream128 com.dynatrace.hash4j.hashing.HashStream128::copy()" + justification: "{added copy method to HashStream}" + - code: "java.method.addedToInterface" + new: "method com.dynatrace.hash4j.hashing.HashStream32 com.dynatrace.hash4j.hashing.HashStream32::copy()" + justification: "{added copy method to HashStream}" + - code: "java.method.addedToInterface" + new: "method com.dynatrace.hash4j.hashing.HashStream64 com.dynatrace.hash4j.hashing.HashStream64::copy()" + justification: "{added copy method to HashStream}" "0.18.0": com.dynatrace.hash4j:hash4j: - code: "java.class.visibilityReduced" diff --git a/README.md b/README.md index 7c4385e..9287dae 100644 --- a/README.md +++ b/README.md @@ -26,12 +26,12 @@ To add a dependency on hash4j using Maven, use the following: com.dynatrace.hash4j hash4j - 0.18.0 + 0.19.0 ``` To add a dependency using Gradle: ```gradle -implementation 'com.dynatrace.hash4j:hash4j:0.18.0' +implementation 'com.dynatrace.hash4j:hash4j:0.19.0' ``` ## Hash algorithms @@ -134,7 +134,7 @@ In case of non-distributed data streams, the [martingale estimator](src/main/jav can be used, which gives slightly better estimation results as the asymptotic storage factor is $6\ln 2 = 4.159$. This gives a relative standard error of $\sqrt{\frac{6\ln 2}{6m}} = \frac{0.833}{\sqrt{m}}$. The theoretically predicted estimation errors have been empirically confirmed by [simulation results](doc/hyperloglog-estimation-error.md). -* UltraLogLog: This algorithm is described in detail in this [paper](https://arxiv.org/abs/2308.16862). +* UltraLogLog: This algorithm is described in detail in this [paper](https://doi.org/10.14778/3654621.3654632). Like for HyperLogLog, a precision parameter $p$ defines the number of registers $m = 2^p$. However, since UltraLogLog uses 8-bit registers to enable fast random accesses and updates of the registers, $m$ is also the state size in bytes. @@ -211,7 +211,7 @@ The following consistent hashing algorithms are available: * [Improved Consistent Weighted Sampling](https://doi.org/10.1109/ICDM.2010.80): This algorithm is based on improved consistent weighted sampling with a constant computation time independent of the number of buckets. This algorithm is faster than JumpHash for a large number of buckets. -* [JumpBackHash](https://arxiv.org/abs/2403.18682): In contrast to JumpHash, which traverses "active indices" (see [here](https://doi.org/10.1109/ICDM.2010.80) for a definition) +* [JumpBackHash](https://doi.org/10.1002/spe.3385): In contrast to JumpHash, which traverses "active indices" (see [here](https://doi.org/10.1109/ICDM.2010.80) for a definition) in ascending order, JumpBackHash does this in the opposite direction. In this way, floating-point operations can be completely avoided. Further optimizations minimize the number of random values that need to be generated to reach the largest "active index" within the given bucket range in amortized constant time. The largest "active index", diff --git a/build.gradle b/build.gradle index 4996d76..dcb3d7b 100644 --- a/build.gradle +++ b/build.gradle @@ -38,8 +38,8 @@ java { toolchain { languageVersion = JavaLanguageVersion.of(21) } - withJavadocJar() withSourcesJar() + withJavadocJar() } @@ -103,7 +103,7 @@ tasks.test { } tasks.register("java21Test", Test) { - // compare https://github.com/melix/mrjar-gradle-plugin/blob/dac99aadd451e3c2176aa6e13af7ad39e20c2cb9/plugin/src/main/java/me/champeau/mrjar/MultiReleaseExtension.java group=LifecycleBasePlugin.VERIFICATION_GROUP + group = LifecycleBasePlugin.VERIFICATION_GROUP javaLauncher = javaToolchains.launcherFor { languageVersion = JavaLanguageVersion.of(21) } @@ -128,7 +128,7 @@ tasks.withType(JavaCompile).configureEach { } group = 'com.dynatrace.hash4j' -version = '0.18.0' +version = '0.19.0' static def readJavaLicense(licenseName) { @@ -172,6 +172,35 @@ spotless { def eclipseCdtVersion = '11.6' def blackVersion = '24.10.0' def greclipseVersion = '4.32' + def specialLicenseHeaders = [ + new Tuple3('javaImohash', 'MIT_IMOHASH', [ + 'src/main/java/com/dynatrace/hash4j/file/Imohash1_0_2.java' + ]), + new Tuple3('javaKomihash', 'MIT_KOMIHASH' , [ + 'src/main/java/com/dynatrace/hash4j/hashing/Komihash4_3.java', + 'src/main/java/com/dynatrace/hash4j/hashing/Komihash5_0.java', + 'src/main/java/com/dynatrace/hash4j/hashing/AbstractKomihash.java' + ]), + new Tuple3('javaFarmHash', 'MIT_APACHE_2_0_FARMHASH',[ + 'src/main/java/com/dynatrace/hash4j/hashing/FarmHashNa.java', + 'src/main/java/com/dynatrace/hash4j/hashing/FarmHashUo.java' + ]), + new Tuple3('javaPolymurHash', 'ZLIB_POLYMURHASH',[ + 'src/main/java/com/dynatrace/hash4j/hashing/PolymurHash2_0.java' + ]), + new Tuple3('javaSplitMix64', 'CREATIVE_COMMONS_SPLITMIX64',[ + 'src/main/java/com/dynatrace/hash4j/random/SplitMix64V1.java' + ]), + new Tuple3('javaExponential', 'BOOST_EXPONENTIAL_RANDOM_GENERATION',[ + 'src/main/java/com/dynatrace/hash4j/random/RandomExponentialUtil.java' + ]), + new Tuple3('javaConsistentJumpHash', 'APACHE_2_0_GUAVA',[ + 'src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java' + ]), + new Tuple3('javaXXH', 'APACHE_2_0_XXH',[ + 'src/main/java/com/dynatrace/hash4j/hashing/XXH3_64.java' + ]) + ] ratchetFrom 'origin/main' apply plugin: 'groovy' @@ -192,76 +221,25 @@ spotless { java { importOrder() removeUnusedImports() + cleanthat() googleJavaFormat(googleJavaFormatVersion) + formatAnnotations() licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') - targetExclude \ - 'src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java',\ - 'src/main/java/com/dynatrace/hash4j/file/Imohash1_0_2.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/Komihash4_3.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/Komihash5_0.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/PolymurHash2_0.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/AbstractKomihash.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/FarmHashNa.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/FarmHashUo.java',\ - 'src/main/java/com/dynatrace/hash4j/random/SplitMix64V1.java',\ - 'src/main/java/com/dynatrace/hash4j/random/RandomExponentialUtil.java',\ - 'src/main/java/com/dynatrace/hash4j/hashing/XXH3_64.java' - } - format 'javaImohash', JavaExtension, { - importOrder() - removeUnusedImports() - googleJavaFormat(googleJavaFormatVersion) - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('MIT_IMOHASH') - target 'src/main/java/com/dynatrace/hash4j/file/Imohash1_0_2.java' - } - format 'javaKomihash', JavaExtension, { - importOrder() - removeUnusedImports() - googleJavaFormat(googleJavaFormatVersion) - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('MIT_KOMIHASH') - target 'src/main/java/com/dynatrace/hash4j/hashing/Komihash4_3.java', 'src/main/java/com/dynatrace/hash4j/hashing/Komihash5_0.java', 'src/main/java/com/dynatrace/hash4j/hashing/AbstractKomihash.java' - } - format 'javaFarmHash', JavaExtension, { - importOrder() - removeUnusedImports() - googleJavaFormat(googleJavaFormatVersion) - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('MIT_APACHE_2_0_FARMHASH') - target 'src/main/java/com/dynatrace/hash4j/hashing/FarmHashNa.java','src/main/java/com/dynatrace/hash4j/hashing/FarmHashUo.java' - } - format 'javaPolymurHash', JavaExtension, { - importOrder() - removeUnusedImports() - googleJavaFormat(googleJavaFormatVersion) - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('ZLIB_POLYMURHASH') - target 'src/main/java/com/dynatrace/hash4j/hashing/PolymurHash2_0.java' - } - format 'javaSplitMix64', JavaExtension, { - importOrder() - removeUnusedImports() - googleJavaFormat(googleJavaFormatVersion) - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('CREATIVE_COMMONS_SPLITMIX64') - target 'src/main/java/com/dynatrace/hash4j/random/SplitMix64V1.java' - } - format 'javaExponential', JavaExtension, { - importOrder() - removeUnusedImports() - googleJavaFormat(googleJavaFormatVersion) - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('BOOST_EXPONENTIAL_RANDOM_GENERATION') - target 'src/main/java/com/dynatrace/hash4j/random/RandomExponentialUtil.java' - } - format 'javaConsistentJumpHash', JavaExtension, { - importOrder() - removeUnusedImports() - googleJavaFormat(googleJavaFormatVersion) - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('APACHE_2_0_GUAVA') - target 'src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java' + targetExclude specialLicenseHeaders.collect {it.get(2)}.flatten() } - format 'javaXXH', JavaExtension, { - importOrder() - removeUnusedImports() - googleJavaFormat(googleJavaFormatVersion) - licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('APACHE_2_0_XXH') - target 'src/main/java/com/dynatrace/hash4j/hashing/XXH3_64.java' + specialLicenseHeaders.forEach { + def formatName = it.get(0) + def licenseName = it.get(1) + def files = it.get(2) + format formatName, JavaExtension, { + importOrder() + removeUnusedImports() + cleanthat() + googleJavaFormat(googleJavaFormatVersion) + formatAnnotations() + licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense(licenseName) + target files + } } } diff --git a/src/main/java/com/dynatrace/hash4j/consistent/ConsistentHashing.java b/src/main/java/com/dynatrace/hash4j/consistent/ConsistentHashing.java index b71f09e..1330227 100644 --- a/src/main/java/com/dynatrace/hash4j/consistent/ConsistentHashing.java +++ b/src/main/java/com/dynatrace/hash4j/consistent/ConsistentHashing.java @@ -63,7 +63,8 @@ public static ConsistentBucketHasher improvedConsistentWeightedSampling( * *

In contrast to other algorithms, JumpBackHash runs in constant time and does not require * floating-point operations. On some machines it may achieve similar performance as a modulo - * operation. + * operation. See Otmar Ertl, "JumpBackHash: Say Goodbye to the Modulo Operation to Distribute + * Keys Uniformly to Buckets", 10.1002/spe.3385. * * @param pseudoRandomGeneratorProvider a {@link PseudoRandomGeneratorProvider} * @return a {@link ConsistentBucketHasher} diff --git a/src/test/java/com/dynatrace/hash4j/hashing/AbstractHasherTest.java b/src/test/java/com/dynatrace/hash4j/hashing/AbstractHasherTest.java index 3e353d7..d8563f5 100644 --- a/src/test/java/com/dynatrace/hash4j/hashing/AbstractHasherTest.java +++ b/src/test/java/com/dynatrace/hash4j/hashing/AbstractHasherTest.java @@ -36,7 +36,6 @@ import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Supplier; -import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; @@ -949,7 +948,6 @@ public char charAt(int index) { return AbstractHasher.getChar(buffer, (index & (NUM_CHARS_IN_BUFFER - 1)) << 1); } - @NotNull @Override public CharSequence subSequence(int start, int end) { throw new UnsupportedOperationException();