Skip to content

Commit

Permalink
improved checksum tests
Browse files Browse the repository at this point in the history
  • Loading branch information
oertl committed Jun 16, 2024
1 parent 1c15b20 commit b88a783
Show file tree
Hide file tree
Showing 38 changed files with 26,845 additions and 99 deletions.
1,025 changes: 1,025 additions & 0 deletions reference-implementations/FarmHash NA.txt

Large diffs are not rendered by default.

1,025 changes: 1,025 additions & 0 deletions reference-implementations/FarmHash UO.txt

Large diffs are not rendered by default.

1,025 changes: 1,025 additions & 0 deletions reference-implementations/Komihash 4.3.txt

Large diffs are not rendered by default.

1,025 changes: 1,025 additions & 0 deletions reference-implementations/Komihash 4.5.txt

Large diffs are not rendered by default.

1,025 changes: 1,025 additions & 0 deletions reference-implementations/Komihash 4.7.txt

Large diffs are not rendered by default.

1,025 changes: 1,025 additions & 0 deletions reference-implementations/Komihash 5.0.txt

Large diffs are not rendered by default.

1,025 changes: 1,025 additions & 0 deletions reference-implementations/Komihash 5.10.txt

Large diffs are not rendered by default.

1,025 changes: 1,025 additions & 0 deletions reference-implementations/Murmur3 128.txt

Large diffs are not rendered by default.

1,025 changes: 1,025 additions & 0 deletions reference-implementations/Murmur3 32.txt

Large diffs are not rendered by default.

1,025 changes: 1,025 additions & 0 deletions reference-implementations/PolymurHash 2.0.txt

Large diffs are not rendered by default.

1,025 changes: 1,025 additions & 0 deletions reference-implementations/Wyhash final 3.txt

Large diffs are not rendered by default.

1,025 changes: 1,025 additions & 0 deletions reference-implementations/Wyhash final 4.txt

Large diffs are not rendered by default.

1,025 changes: 1,025 additions & 0 deletions reference-implementations/XXH3.txt

Large diffs are not rendered by default.

46 changes: 28 additions & 18 deletions reference-implementations/calculate_checksums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <iomanip>
#include <random>
#include <fstream>

#include "openssl/sha.h"

Expand Down Expand Up @@ -46,21 +46,28 @@ uint64_t splitmix_v1_update(uint64_t &state) {
template<typename T>
void computeAndPrintChecksum(const T &hashFunctionConfig = T()) {

uint64_t maxDataLength = 200;
uint64_t numCycles = 10000;
mt19937_64 rng(0);

uint8_t checkSum[SHA256_DIGEST_LENGTH];
SHA256_CTX sha256;
uint64_t maxDataLength = 1024;
uint64_t numCycles = 1000;

SHA256_Init(&sha256);
ofstream outputFile(hashFunctionConfig.getName() + ".txt");

uint64_t rngState = 0;
uint64_t effectiveSeedLength = (hashFunctionConfig.getSeedSize() + 7) >> 3;
for (uint64_t dataLength = 0; dataLength <= maxDataLength; ++dataLength) {

std::vector < uint64_t > seedBytesTemp(effectiveSeedLength);
std::vector < uint8_t > hashBytes(hashFunctionConfig.getHashSize());
uint8_t checkSum[SHA256_DIGEST_LENGTH];
SHA256_CTX sha256;

SHA256_Init(&sha256);

uint64_t seed = rng();
uint64_t rngState = seed;
uint64_t effectiveSeedLength = (hashFunctionConfig.getSeedSize() + 7)
>> 3;

std::vector < uint64_t > seedBytesTemp(effectiveSeedLength);
std::vector < uint8_t > hashBytes(hashFunctionConfig.getHashSize());

for (uint64_t dataLength = 0; dataLength <= maxDataLength; ++dataLength) {
uint64_t effectiveDataLength = (dataLength + 7) >> 3;

std::vector < uint64_t > dataBytesTemp(effectiveDataLength);
Expand All @@ -83,15 +90,18 @@ void computeAndPrintChecksum(const T &hashFunctionConfig = T()) {

}

}
SHA256_Final(checkSum, &sha256);
SHA256_Final(checkSum, &sha256);

cout << hashFunctionConfig.getName() << ": ";
for (uint64_t k = 0; k < SHA256_DIGEST_LENGTH; ++k)
cout << hex << setfill('0') << setw(2)
<< static_cast<uint64_t>(checkSum[k]);
cout << endl;
outputFile << dec << dataLength << ",";
outputFile << dec << numCycles << ",";
outputFile << hex << setfill('0') << setw(16) << seed << ",";
for (uint64_t k = 0; k < SHA256_DIGEST_LENGTH; ++k)
outputFile << hex << setfill('0') << setw(2)
<< static_cast<uint64_t>(checkSum[k]);

outputFile << endl;
}
outputFile.close();
}

int main(int argc, char *argv[]) {
Expand Down
128 changes: 99 additions & 29 deletions src/test/java/com/dynatrace/hash4j/hashing/AbstractHasherTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,23 @@
package com.dynatrace.hash4j.hashing;

import static com.dynatrace.hash4j.testutils.TestUtils.byteArrayToHexString;
import static com.dynatrace.hash4j.testutils.TestUtils.hexStringToByteArray;
import static java.util.stream.Collectors.toList;
import static org.assertj.core.api.Assertions.*;
import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS;

import com.dynatrace.hash4j.random.PseudoRandomGenerator;
import com.dynatrace.hash4j.random.PseudoRandomGeneratorProvider;
import com.dynatrace.hash4j.testutils.TestUtils;
import com.google.common.base.MoreObjects;
import com.google.common.base.Splitter;
import java.io.File;
import java.io.FileNotFoundException;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.VarHandle;
import java.net.URISyntaxException;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
Expand Down Expand Up @@ -78,12 +85,80 @@ public String toString() {

protected abstract List<? extends Hasher> getHashers();

public static final class ChecksumRecord {
private final long dataSize;
private final int numCycles;
private final long seed;
private final String checksum;

public ChecksumRecord(long dataSize, int numCycles, long seed, String checksumString) {
this.dataSize = dataSize;
this.numCycles = numCycles;
this.seed = seed;
this.checksum = checksumString;
}

public String getChecksum() {
return checksum;
}

public long getDataSize() {
return dataSize;
}

public int getNumCycles() {
return numCycles;
}

public long getSeed() {
return seed;
}

@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("dataSize", dataSize)
.add("numCycles", numCycles)
.toString();
}
}

private List<ChecksumRecord> getChecksumRecords() {

ClassLoader classLoader = getClass().getClassLoader();
File file;
try {
file = new File(classLoader.getResource(getChecksumResourceFileName()).toURI());
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
List<ChecksumRecord> checksumRecords = new ArrayList<>();

try (Scanner scanner = new Scanner(file, StandardCharsets.UTF_8.name())) {
while (scanner.hasNextLine()) {
List<String> s = Splitter.on(',').splitToList(scanner.nextLine());

long dataSize = Integer.parseInt(s.get(0));
int numCycles = Integer.parseInt(s.get(1));
long seed = TestUtils.hexStringToLong(s.get(2));
String checksumString = s.get(3);
checksumRecords.add(new ChecksumRecord(dataSize, numCycles, seed, checksumString));
}

} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
return checksumRecords;
}

protected abstract String getChecksumResourceFileName();

private static final class TestCase {
private final byte[] expected;
private final Consumer<HashSink> sinkConsumer;

public TestCase(Consumer<HashSink> sinkConsumer, String expected) {
this.expected = TestUtils.hexStringToByteArray(expected);
this.expected = hexStringToByteArray(expected);
this.sinkConsumer = sinkConsumer;
}

Expand Down Expand Up @@ -133,11 +208,9 @@ public Consumer<HashSink> getSinkConsumer() {
new TestCase(
h -> h.putDouble(Double.longBitsToDouble(0x6b3ea4d75d3f4dbbL)), "bb4d3f5dd7a43e6b"),
new TestCase(h -> h.putBytes(new byte[] {}), ""),
new TestCase(
h -> h.putBytes(TestUtils.hexStringToByteArray("6143f28b2b11d8")), "6143f28b2b11d8"),
new TestCase(h -> h.putBytes(hexStringToByteArray("6143f28b2b11d8")), "6143f28b2b11d8"),
new TestCase(h -> h.putBytes(BYTE_SEQ_199), byteArrayToHexString(BYTE_SEQ_199)),
new TestCase(
h -> h.putBytes(TestUtils.hexStringToByteArray("c1ce762d62"), 1, 3), "ce762d"),
new TestCase(h -> h.putBytes(hexStringToByteArray("c1ce762d62"), 1, 3), "ce762d"),
new TestCase(h -> h.putChar((char) 0x1466), "6614"),
new TestCase(h -> h.putNullable(null, (o, sink) -> {}), "00"),
new TestCase(
Expand Down Expand Up @@ -1022,21 +1095,21 @@ protected abstract void calculateHashForChecksum(

abstract int getHashSizeForChecksum();

abstract String getExpectedChecksum();

protected static final VarHandle LONG_HANDLE =
MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN);
protected static final VarHandle INT_HANDLE =
MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.LITTLE_ENDIAN);

@Test
void testCheckSum() throws NoSuchAlgorithmException {
@ParameterizedTest
@MethodSource("getChecksumRecords")
void testCheckSum(ChecksumRecord checksumRecord) throws NoSuchAlgorithmException {

long maxDataLength = 200;
long numCycles = 10000;
int dataLength = (int) checksumRecord.getDataSize();
long numCycles = checksumRecord.getNumCycles();

PseudoRandomGenerator pseudoRandomGenerator =
PseudoRandomGeneratorProvider.splitMix64_V1().create();
pseudoRandomGenerator.reset(checksumRecord.getSeed());

MessageDigest md = MessageDigest.getInstance("SHA-256");

Expand All @@ -1046,31 +1119,28 @@ void testCheckSum() throws NoSuchAlgorithmException {
byte[] seedBytes = new byte[getSeedSizeForChecksum()];
byte[] hashBytes = new byte[getHashSizeForChecksum()];

for (int dataLength = 0; dataLength <= maxDataLength; ++dataLength) {
int effectiveDataLength = (dataLength + 7) >> 3;
int effectiveDataLength = (dataLength + 7) >>> 3;

byte[] dataBytesTemp = new byte[effectiveDataLength * 8];
byte[] dataBytes = new byte[dataLength];
byte[] dataBytesTemp = new byte[effectiveDataLength * 8];
byte[] dataBytes = new byte[dataLength];

for (long cycle = 0; cycle < numCycles; ++cycle) {
for (int i = 0; i < effectiveDataLength; ++i) {
LONG_HANDLE.set(dataBytesTemp, 8 * i, pseudoRandomGenerator.nextLong());
}
for (int i = 0; i < effectiveSeedLength; ++i) {
LONG_HANDLE.set(seedBytesTemp, 8 * i, pseudoRandomGenerator.nextLong());
}
for (long cycle = 0; cycle < numCycles; ++cycle) {
for (int i = 0; i < effectiveDataLength; ++i) {
LONG_HANDLE.set(dataBytesTemp, 8 * i, pseudoRandomGenerator.nextLong());
}
for (int i = 0; i < effectiveSeedLength; ++i) {
LONG_HANDLE.set(seedBytesTemp, 8 * i, pseudoRandomGenerator.nextLong());
}

System.arraycopy(dataBytesTemp, 0, dataBytes, 0, dataLength);
System.arraycopy(seedBytesTemp, 0, seedBytes, 0, getSeedSizeForChecksum());
System.arraycopy(dataBytesTemp, 0, dataBytes, 0, dataLength);
System.arraycopy(seedBytesTemp, 0, seedBytes, 0, getSeedSizeForChecksum());

calculateHashForChecksum(seedBytes, hashBytes, dataBytes);
calculateHashForChecksum(seedBytes, hashBytes, dataBytes);

md.update(hashBytes);
}
md.update(hashBytes);
}

String checksum = byteArrayToHexString(md.digest());
assertThat(checksum).isEqualTo(getExpectedChecksum());
assertThat(checksum).isEqualTo(checksumRecord.getChecksum());
}

private static Hasher64 getHasherUsingDefaultImplementations(Hasher64 referenceHasher) {
Expand Down
12 changes: 6 additions & 6 deletions src/test/java/com/dynatrace/hash4j/hashing/FarmHashNaTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2022-2023 Dynatrace LLC
* Copyright 2022-2024 Dynatrace LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,6 +32,11 @@ protected List<Hasher64> getHashers() {
return HASHERS;
}

@Override
protected String getChecksumResourceFileName() {
return "FarmHash NA.txt";
}

@Override
protected void calculateHashForChecksum(byte[] seedBytes, byte[] hashBytes, byte[] dataBytes) {
long seed = (long) LONG_HANDLE.get(seedBytes, 0);
Expand All @@ -57,11 +62,6 @@ int getHashSizeForChecksum() {
return 24;
}

@Override
String getExpectedChecksum() {
return "e2e8114ba370dcb23148a82c445bcba37c40a73546d7f0c45ed67fbc6ab365d9";
}

@Override
protected List<ReferenceTestRecord64> getReferenceTestRecords() {

Expand Down
10 changes: 5 additions & 5 deletions src/test/java/com/dynatrace/hash4j/hashing/FarmHashUoTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ protected List<Hasher64> getHashers() {
return HASHERS;
}

@Override
protected String getChecksumResourceFileName() {
return "FarmHash UO.txt";
}

@Override
protected void calculateHashForChecksum(byte[] seedBytes, byte[] hashBytes, byte[] dataBytes) {
long seed = (long) LONG_HANDLE.get(seedBytes, 0);
Expand All @@ -57,11 +62,6 @@ int getHashSizeForChecksum() {
return 24;
}

@Override
String getExpectedChecksum() {
return "50f5d48e00fda3fadcacaa5fec1944d90c33539e19ca2dfcb6a97173a3754682";
}

@Override
protected List<ReferenceTestRecord64> getReferenceTestRecords() {

Expand Down
12 changes: 6 additions & 6 deletions src/test/java/com/dynatrace/hash4j/hashing/Komihash4_3Test.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2022-2023 Dynatrace LLC
* Copyright 2022-2024 Dynatrace LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,6 +30,11 @@ protected List<Hasher64> getHashers() {
return HASHERS;
}

@Override
protected String getChecksumResourceFileName() {
return "Komihash 4.3.txt";
}

@Override
protected void calculateHashForChecksum(byte[] seedBytes, byte[] hashBytes, byte[] dataBytes) {

Expand All @@ -51,11 +56,6 @@ int getHashSizeForChecksum() {
return 16;
}

@Override
String getExpectedChecksum() {
return "b83dc90ff8c0ad72989f5150a6f7dba41adfe0a70b9112da93040f3882ce16f6";
}

@Override
protected List<ReferenceTestRecord64> getReferenceTestRecords() {
List<ReferenceTestRecord64> referenceTestRecords = new ArrayList<>();
Expand Down
12 changes: 6 additions & 6 deletions src/test/java/com/dynatrace/hash4j/hashing/Komihash5_0Test.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2022-2023 Dynatrace LLC
* Copyright 2022-2024 Dynatrace LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,6 +30,11 @@ protected List<Hasher64> getHashers() {
return HASHERS;
}

@Override
protected String getChecksumResourceFileName() {
return "Komihash 5.0.txt";
}

@Override
protected void calculateHashForChecksum(byte[] seedBytes, byte[] hashBytes, byte[] dataBytes) {

Expand All @@ -51,11 +56,6 @@ int getHashSizeForChecksum() {
return 16;
}

@Override
String getExpectedChecksum() {
return "556023b1a3e83983188c75f4f2f4dfbfbc45775d18dd723395d8a3e322833b70";
}

@Override
protected List<ReferenceTestRecord64> getReferenceTestRecords() {
List<ReferenceTestRecord64> referenceTestRecords = new ArrayList<>();
Expand Down
Loading

0 comments on commit b88a783

Please sign in to comment.