Skip to content

Commit

Permalink
Merge pull request #302 from dynatrace-oss/reset-and-hash-method
Browse files Browse the repository at this point in the history
Reset and hash method
  • Loading branch information
oertl authored Nov 25, 2024
2 parents 5621b0b + 2229413 commit fdd1f6f
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 85 deletions.
13 changes: 5 additions & 8 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
* text=auto

*.java text
*.py text
*.md text
*.java text eol=lf
*.py text eol=lf
*.md text eol=lf
*.csv text

*.bat text eol=crlf

gradlew.bat text eol=crlf
gradlew text eol=lf
*.gradle eol=lf
*.sh text eol=lf

*.jar binary
11 changes: 11 additions & 0 deletions .palantir/revapi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ acceptedBreaks:
\ com.dynatrace.hash4j.distinctcount.DistinctCounter.Estimator<T>>>>::reconstructHash(int)\
\ @ com.dynatrace.hash4j.distinctcount.UltraLogLog"
justification: "removed non-public method"
"0.17.0":
com.dynatrace.hash4j:hash4j:
- code: "java.method.addedToInterface"
new: "method com.dynatrace.hash4j.hashing.HashStream128 com.dynatrace.hash4j.hashing.HashStream128::copy()"
justification: "{added copy method to HashStream}"
- code: "java.method.addedToInterface"
new: "method com.dynatrace.hash4j.hashing.HashStream32 com.dynatrace.hash4j.hashing.HashStream32::copy()"
justification: "{added copy method to HashStream}"
- code: "java.method.addedToInterface"
new: "method com.dynatrace.hash4j.hashing.HashStream64 com.dynatrace.hash4j.hashing.HashStream64::copy()"
justification: "{added copy method to HashStream}"
"0.18.0":
com.dynatrace.hash4j:hash4j:
- code: "java.class.visibilityReduced"
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ To add a dependency on hash4j using Maven, use the following:
<dependency>
<groupId>com.dynatrace.hash4j</groupId>
<artifactId>hash4j</artifactId>
<version>0.18.0</version>
<version>0.19.0</version>
</dependency>
```
To add a dependency using Gradle:
```gradle
implementation 'com.dynatrace.hash4j:hash4j:0.18.0'
implementation 'com.dynatrace.hash4j:hash4j:0.19.0'
```

## Hash algorithms
Expand Down Expand Up @@ -134,7 +134,7 @@ In case of non-distributed data streams, the [martingale estimator](src/main/jav
can be used, which gives slightly better estimation results as the asymptotic storage factor is $6\ln 2 = 4.159$.
This gives a relative standard error of $\sqrt{\frac{6\ln 2}{6m}} = \frac{0.833}{\sqrt{m}}$.
The theoretically predicted estimation errors have been empirically confirmed by [simulation results](doc/hyperloglog-estimation-error.md).
* UltraLogLog: This algorithm is described in detail in this [paper](https://arxiv.org/abs/2308.16862).
* UltraLogLog: This algorithm is described in detail in this [paper](https://doi.org/10.14778/3654621.3654632).
Like for HyperLogLog, a precision parameter $p$ defines the number of registers $m = 2^p$.
However, since UltraLogLog uses 8-bit registers to enable fast random accesses and updates of the registers,
$m$ is also the state size in bytes.
Expand Down Expand Up @@ -211,7 +211,7 @@ The following consistent hashing algorithms are available:
* [Improved Consistent Weighted Sampling](https://doi.org/10.1109/ICDM.2010.80): This algorithm is based on improved
consistent weighted sampling with a constant computation time independent of the number of buckets. This algorithm is faster than
JumpHash for a large number of buckets.
* [JumpBackHash](https://arxiv.org/abs/2403.18682): In contrast to JumpHash, which traverses "active indices" (see [here](https://doi.org/10.1109/ICDM.2010.80) for a definition)
* [JumpBackHash](https://doi.org/10.1002/spe.3385): In contrast to JumpHash, which traverses "active indices" (see [here](https://doi.org/10.1109/ICDM.2010.80) for a definition)
in ascending order, JumpBackHash does this in the opposite direction. In this way, floating-point operations can be completely avoided.
Further optimizations minimize the number of random values that need to be generated to reach
the largest "active index" within the given bucket range in amortized constant time. The largest "active index",
Expand Down
118 changes: 48 additions & 70 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ java {
toolchain {
languageVersion = JavaLanguageVersion.of(21)
}
withJavadocJar()
withSourcesJar()
withJavadocJar()
}


Expand Down Expand Up @@ -103,7 +103,7 @@ tasks.test {
}

tasks.register("java21Test", Test) {
// compare https://github.com/melix/mrjar-gradle-plugin/blob/dac99aadd451e3c2176aa6e13af7ad39e20c2cb9/plugin/src/main/java/me/champeau/mrjar/MultiReleaseExtension.java group=LifecycleBasePlugin.VERIFICATION_GROUP
group = LifecycleBasePlugin.VERIFICATION_GROUP
javaLauncher = javaToolchains.launcherFor {
languageVersion = JavaLanguageVersion.of(21)
}
Expand All @@ -128,7 +128,7 @@ tasks.withType(JavaCompile).configureEach {
}

group = 'com.dynatrace.hash4j'
version = '0.18.0'
version = '0.19.0'


static def readJavaLicense(licenseName) {
Expand Down Expand Up @@ -172,6 +172,35 @@ spotless {
def eclipseCdtVersion = '11.6'
def blackVersion = '24.10.0'
def greclipseVersion = '4.32'
def specialLicenseHeaders = [
new Tuple3('javaImohash', 'MIT_IMOHASH', [
'src/main/java/com/dynatrace/hash4j/file/Imohash1_0_2.java'
]),
new Tuple3('javaKomihash', 'MIT_KOMIHASH' , [
'src/main/java/com/dynatrace/hash4j/hashing/Komihash4_3.java',
'src/main/java/com/dynatrace/hash4j/hashing/Komihash5_0.java',
'src/main/java/com/dynatrace/hash4j/hashing/AbstractKomihash.java'
]),
new Tuple3('javaFarmHash', 'MIT_APACHE_2_0_FARMHASH',[
'src/main/java/com/dynatrace/hash4j/hashing/FarmHashNa.java',
'src/main/java/com/dynatrace/hash4j/hashing/FarmHashUo.java'
]),
new Tuple3('javaPolymurHash', 'ZLIB_POLYMURHASH',[
'src/main/java/com/dynatrace/hash4j/hashing/PolymurHash2_0.java'
]),
new Tuple3('javaSplitMix64', 'CREATIVE_COMMONS_SPLITMIX64',[
'src/main/java/com/dynatrace/hash4j/random/SplitMix64V1.java'
]),
new Tuple3('javaExponential', 'BOOST_EXPONENTIAL_RANDOM_GENERATION',[
'src/main/java/com/dynatrace/hash4j/random/RandomExponentialUtil.java'
]),
new Tuple3('javaConsistentJumpHash', 'APACHE_2_0_GUAVA',[
'src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java'
]),
new Tuple3('javaXXH', 'APACHE_2_0_XXH',[
'src/main/java/com/dynatrace/hash4j/hashing/XXH3_64.java'
])
]

ratchetFrom 'origin/main'
apply plugin: 'groovy'
Expand All @@ -192,76 +221,25 @@ spotless {
java {
importOrder()
removeUnusedImports()
cleanthat()
googleJavaFormat(googleJavaFormatVersion)
formatAnnotations()
licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE')
targetExclude \
'src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java',\
'src/main/java/com/dynatrace/hash4j/file/Imohash1_0_2.java',\
'src/main/java/com/dynatrace/hash4j/hashing/Komihash4_3.java',\
'src/main/java/com/dynatrace/hash4j/hashing/Komihash5_0.java',\
'src/main/java/com/dynatrace/hash4j/hashing/PolymurHash2_0.java',\
'src/main/java/com/dynatrace/hash4j/hashing/AbstractKomihash.java',\
'src/main/java/com/dynatrace/hash4j/hashing/FarmHashNa.java',\
'src/main/java/com/dynatrace/hash4j/hashing/FarmHashUo.java',\
'src/main/java/com/dynatrace/hash4j/random/SplitMix64V1.java',\
'src/main/java/com/dynatrace/hash4j/random/RandomExponentialUtil.java',\
'src/main/java/com/dynatrace/hash4j/hashing/XXH3_64.java'
}
format 'javaImohash', JavaExtension, {
importOrder()
removeUnusedImports()
googleJavaFormat(googleJavaFormatVersion)
licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('MIT_IMOHASH')
target 'src/main/java/com/dynatrace/hash4j/file/Imohash1_0_2.java'
}
format 'javaKomihash', JavaExtension, {
importOrder()
removeUnusedImports()
googleJavaFormat(googleJavaFormatVersion)
licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('MIT_KOMIHASH')
target 'src/main/java/com/dynatrace/hash4j/hashing/Komihash4_3.java', 'src/main/java/com/dynatrace/hash4j/hashing/Komihash5_0.java', 'src/main/java/com/dynatrace/hash4j/hashing/AbstractKomihash.java'
}
format 'javaFarmHash', JavaExtension, {
importOrder()
removeUnusedImports()
googleJavaFormat(googleJavaFormatVersion)
licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('MIT_APACHE_2_0_FARMHASH')
target 'src/main/java/com/dynatrace/hash4j/hashing/FarmHashNa.java','src/main/java/com/dynatrace/hash4j/hashing/FarmHashUo.java'
}
format 'javaPolymurHash', JavaExtension, {
importOrder()
removeUnusedImports()
googleJavaFormat(googleJavaFormatVersion)
licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('ZLIB_POLYMURHASH')
target 'src/main/java/com/dynatrace/hash4j/hashing/PolymurHash2_0.java'
}
format 'javaSplitMix64', JavaExtension, {
importOrder()
removeUnusedImports()
googleJavaFormat(googleJavaFormatVersion)
licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('CREATIVE_COMMONS_SPLITMIX64')
target 'src/main/java/com/dynatrace/hash4j/random/SplitMix64V1.java'
}
format 'javaExponential', JavaExtension, {
importOrder()
removeUnusedImports()
googleJavaFormat(googleJavaFormatVersion)
licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('BOOST_EXPONENTIAL_RANDOM_GENERATION')
target 'src/main/java/com/dynatrace/hash4j/random/RandomExponentialUtil.java'
}
format 'javaConsistentJumpHash', JavaExtension, {
importOrder()
removeUnusedImports()
googleJavaFormat(googleJavaFormatVersion)
licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('APACHE_2_0_GUAVA')
target 'src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java'
targetExclude specialLicenseHeaders.collect {it.get(2)}.flatten()
}
format 'javaXXH', JavaExtension, {
importOrder()
removeUnusedImports()
googleJavaFormat(googleJavaFormatVersion)
licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('APACHE_2_0_XXH')
target 'src/main/java/com/dynatrace/hash4j/hashing/XXH3_64.java'
specialLicenseHeaders.forEach {
def formatName = it.get(0)
def licenseName = it.get(1)
def files = it.get(2)
format formatName, JavaExtension, {
importOrder()
removeUnusedImports()
cleanthat()
googleJavaFormat(googleJavaFormatVersion)
formatAnnotations()
licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense(licenseName)
target files
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ public static ConsistentBucketHasher improvedConsistentWeightedSampling(
*
* <p>In contrast to other algorithms, JumpBackHash runs in constant time and does not require
* floating-point operations. On some machines it may achieve similar performance as a modulo
* operation.
* operation. See Otmar Ertl, "JumpBackHash: Say Goodbye to the Modulo Operation to Distribute
* Keys Uniformly to Buckets", <a href="https://doi.org/10.1002/spe.3385">10.1002/spe.3385.</a>
*
* @param pseudoRandomGeneratorProvider a {@link PseudoRandomGeneratorProvider}
* @return a {@link ConsistentBucketHasher}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Supplier;
import org.jetbrains.annotations.NotNull;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInstance;
Expand Down Expand Up @@ -949,7 +948,6 @@ public char charAt(int index) {
return AbstractHasher.getChar(buffer, (index & (NUM_CHARS_IN_BUFFER - 1)) << 1);
}

@NotNull
@Override
public CharSequence subSequence(int start, int end) {
throw new UnsupportedOperationException();
Expand Down

0 comments on commit fdd1f6f

Please sign in to comment.