Skip to content

Commit

Permalink
Merge pull request #1093 from haileyajohnson/port-filters
Browse files Browse the repository at this point in the history
fix implementation of fletcher32
  • Loading branch information
tdrwenski authored Oct 3, 2022
2 parents 2106e48 + ca18de7 commit 6fa510a
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 34 deletions.
83 changes: 52 additions & 31 deletions cdm/core/src/main/java/ucar/nc2/filter/Checksum32.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import com.google.common.primitives.Ints;

import java.nio.ByteOrder;
import java.util.Map;
import java.util.zip.Adler32;
import java.util.zip.CRC32;
Expand Down Expand Up @@ -35,17 +34,8 @@ public enum CType {

private final CType type; // type of checksum

private final ByteOrder byteOrder;

public Checksum32(CType type, ByteOrder bo) {
this.type = type;
this.byteOrder = bo;
}


public Checksum32(CType type) {
// TODO: can we do this better?
this(type, ByteOrder.LITTLE_ENDIAN);
this.type = type;
}

@Override
Expand All @@ -61,32 +51,41 @@ public int getId() {
@Override
public byte[] encode(byte[] dataIn) {
// create a checksum
int checksum = getChecksum(dataIn);
// append checksum in front of data
int checksum = (int) getChecksum(dataIn);
// append checksum in front or behind data
// Adler and CRC are supported by Zarr, which follows the NumCodec spec with a checksum before the data
// Fletcher is support by hdf5, which has the checksum after the data
byte[] dataOut = new byte[dataIn.length + nbytes];
System.arraycopy(dataIn, 0, dataOut, nbytes, dataIn.length);
System.arraycopy(Ints.toByteArray(checksum), 0, dataOut, 0, nbytes);;
int dataStart = this.type == CType.FLETCHER ? 0 : nbytes;
System.arraycopy(dataIn, 0, dataOut, dataStart, dataIn.length);
int checksumStart = this.type == CType.FLETCHER ? dataOut.length - nbytes : 0;
// encode as little endian by default
System.arraycopy(Ints.toByteArray(Integer.reverseBytes(checksum)), 0, dataOut, checksumStart, nbytes);;
return dataOut;
}

@Override
public byte[] decode(byte[] dataIn) {
// strip the checksum
byte[] dataOut = new byte[dataIn.length - nbytes];
System.arraycopy(dataIn, nbytes, dataOut, 0, dataOut.length);
// Adler and CRC are supported by Zarr, which follows the NumCodec spec with a checksum before the data
// Fletcher is support by hdf5, which has the checksum after the data
int dataStart = this.type == CType.FLETCHER ? 0 : nbytes;
System.arraycopy(dataIn, dataStart, dataOut, 0, dataOut.length);
// verify checksum
int checksum = getChecksum(dataOut);
int checksum = (int) getChecksum(dataOut);
byte[] bytes = new byte[nbytes];
System.arraycopy(dataIn, 0, bytes, 0, nbytes);
int i = Ints.fromByteArray(bytes);
int checksumStart = this.type == CType.FLETCHER ? dataIn.length - nbytes : 0;
System.arraycopy(dataIn, checksumStart, bytes, 0, nbytes);
int i = Integer.reverseBytes(Ints.fromByteArray(bytes)); // convert from little endian
if (i != checksum) {
throw new RuntimeException("Checksum invalid");
}
// return data
return dataOut;
}

private int getChecksum(byte[] data) {
private long getChecksum(byte[] data) {
Checksum checksum;
switch (type) {
case ADLER:
Expand All @@ -101,18 +100,13 @@ private int getChecksum(byte[] data) {
break;
}
checksum.update(data, 0, data.length);
int val = (int) checksum.getValue();
// reverse bytes for little endian
if (this.byteOrder == ByteOrder.LITTLE_ENDIAN) {
val = Integer.reverseBytes(val);
}
return val;
return checksum.getValue();
}

private class Fletcher32 extends Adler32 {

private int sum1 = 0;
private int sum2 = 0;
private long sum1 = 0;
private long sum2 = 0;

@Override
public void update(byte[] b, int off, int len) {
Expand All @@ -122,10 +116,37 @@ public void update(byte[] b, int off, int len) {
if (off < 0 || len < 0 || off > b.length - len) {
throw new ArrayIndexOutOfBoundsException();
}
for (int i = off; i < len; i++) {
sum1 = (sum1 + (b[i] & 0xff)) % 65535;
sum2 = (sum2 + sum1) % 65535;

int i = 0;
int end = len / 2;
while (end > 0) {
int blocklen = end > 360 ? 360 : end;
end -= blocklen;
do {
sum1 += (b[i] & 0xff) << 8 | b[i + 1] & 0xff;
sum2 += sum1;
i += 2;
blocklen--;
} while (blocklen > 0);
sum1 = (sum1 & 0xffff) + (sum1 >>> 16);
sum2 = (sum2 & 0xffff) + (sum2 >>> 16);
}

// handle odd # of bytes
if (len % 2 > 0) {
sum1 += (b[len - 1] & 0xff) << 8;
sum2 += sum1;
sum1 = (sum1 & 0xffff) + (sum1 >>> 16);
sum2 = (sum2 & 0xffff) + (sum2 >>> 16);
}

sum1 = (sum1 & 0xffff) + (sum1 >>> 16);
sum2 = (sum2 & 0xffff) + (sum2 >>> 16);
}

@Override
public long getValue() {
return (sum2 << 16) | sum1;
}
}

Expand Down
22 changes: 19 additions & 3 deletions cdm/core/src/test/java/ucar/nc2/filter/TestFilters.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@

package ucar.nc2.filter;

import com.google.common.primitives.Ints;
import org.junit.BeforeClass;
import org.junit.Test;
import ucar.unidata.io.RandomAccessFile;

import java.io.IOException;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;

Expand Down Expand Up @@ -61,11 +62,11 @@ public void testShuffle() throws IOException {
@Test
public void testChecksum32() throws IOException {
// test Adler32
Filter filter = new Checksum32(Checksum32.CType.ADLER, ByteOrder.LITTLE_ENDIAN);
Filter filter = new Checksum32(Checksum32.CType.ADLER);
testEncodeDecode(filter, "adler32");

// test CRC32
filter = new Checksum32(Checksum32.CType.CRC, ByteOrder.LITTLE_ENDIAN);
filter = new Checksum32(Checksum32.CType.CRC);
testEncodeDecode(filter, "crc32");
}

Expand Down Expand Up @@ -108,4 +109,19 @@ private void testEncodeDecode(Filter filter, String filename) throws IOException
assertThat(decoded).isEqualTo(decoded_data);
}

@Test
public void testFletcher() {
// test case from Wikipeda Fletcher test vectors
String testString = "abcdefgh";
int knownChecksum = -1785599007;
byte[] checksumBytes = Ints.toByteArray(knownChecksum);
Checksum32 filter = new Checksum32(Checksum32.CType.FLETCHER);
byte[] expected = testString.getBytes(StandardCharsets.UTF_8);
byte[] in = new byte[expected.length + 4];
System.arraycopy(expected, 0, in, 0, expected.length);
System.arraycopy(checksumBytes, 0, in, expected.length, 4);
byte[] out = filter.decode(in);
assertThat(out).isEqualTo(expected);
}

}

0 comments on commit 6fa510a

Please sign in to comment.