From dbcee48584451313a1763136907df9d1757bb3c2 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Wed, 29 May 2024 21:04:35 +0200 Subject: [PATCH 1/5] add testZstdLibrary --- src/test/java/dev/zarr/zarrjava/ZarrTest.java | 72 +++++++++++++++++-- .../dev/zarr/zarrjava/test_zstd_library.py | 30 ++++++++ .../java/dev/zarr/zarrjava/zarrita_read.py | 2 +- 3 files changed, 96 insertions(+), 8 deletions(-) create mode 100644 src/test/java/dev/zarr/zarrjava/test_zstd_library.py diff --git a/src/test/java/dev/zarr/zarrjava/ZarrTest.java b/src/test/java/dev/zarr/zarrjava/ZarrTest.java index ebf87cc..b8ace9d 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrTest.java @@ -4,23 +4,24 @@ import com.amazonaws.auth.AnonymousAWSCredentials; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.fasterxml.jackson.databind.ObjectMapper; +import com.github.luben.zstd.ZstdOutputStream; import dev.zarr.zarrjava.store.FilesystemStore; import dev.zarr.zarrjava.store.HttpStore; import dev.zarr.zarrjava.store.S3Store; import dev.zarr.zarrjava.store.StoreHandle; import dev.zarr.zarrjava.utils.MultiArrayUtils; +import dev.zarr.zarrjava.utils.Utils; import dev.zarr.zarrjava.v3.*; import dev.zarr.zarrjava.v3.codec.core.TransposeCodec; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.ValueSource; -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.io.InputStreamReader; +import java.io.*; +import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -37,6 +38,7 @@ public class ZarrTest { final static Path TESTOUTPUT = Paths.get("testoutput"); final static Path ZARRITA_WRITE_PATH = Paths.get("src/test/java/dev/zarr/zarrjava/zarrita_write.py"); final static Path ZARRITA_READ_PATH = Paths.get("src/test/java/dev/zarr/zarrjava/zarrita_read.py"); + final static Path TEST_ZSTD_LIBRARY_PATH = Paths.get("src/test/java/dev/zarr/zarrjava/test_zstd_library.py"); public static String pythonPath() { if (System.getProperty("os.name").startsWith("Windows")) { @@ -91,10 +93,43 @@ public void testReadFromZarrita(String codec) throws IOException, ZarrException, Assertions.assertArrayEquals(expectedData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.INT)); } + @ParameterizedTest + @CsvSource({"0,true", "0,false", "5, true", "5, false"}) + public void testZstdLibrary(int clevel, boolean checksum) throws IOException, InterruptedException { + String zstd_file = TESTOUTPUT + "/testZstdLibrary" + clevel + checksum + ".zstd"; + + ByteBuffer testBytes = ByteBuffer.allocate(1024); + testBytes.putInt(42); + + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + ZstdOutputStream zstdStream = new ZstdOutputStream(outputStream, clevel); + zstdStream.setChecksum(checksum); + zstdStream.write(Utils.toArray(testBytes)); + zstdStream.close(); + ByteBuffer encodedBytes = ByteBuffer.wrap(outputStream.toByteArray()); + try (FileOutputStream fileOutputStream = new FileOutputStream(zstd_file)) { + fileOutputStream.write(encodedBytes.array()); + } + String command = pythonPath(); + ProcessBuilder pb = new ProcessBuilder(command, TEST_ZSTD_LIBRARY_PATH.toString(), zstd_file); + Process process = pb.start(); + + BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream())); + String line; + while ((line = reader.readLine()) != null) { + System.out.println(line); + } + BufferedReader readerErr = new BufferedReader(new InputStreamReader(process.getErrorStream())); + while ((line = readerErr.readLine()) != null) { + System.err.println(line); + } + int exitCode = process.waitFor(); + assert exitCode == 0; + } + //TODO: add crc32c - //Disabled "zstd": known issue @ParameterizedTest - @ValueSource(strings = {"blosc", "gzip", "bytes", "transpose", "sharding_start", "sharding_end"}) + @ValueSource(strings = {"blosc", "gzip", "zstd", "bytes", "transpose", "sharding_start", "sharding_end"}) public void testWriteToZarrita(String codec) throws IOException, ZarrException, InterruptedException { StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("write_to_zarrita", codec); ArrayMetadataBuilder builder = Array.metadataBuilder() @@ -216,8 +251,31 @@ public void testCodecsWriteRead(String codec) throws IOException, ZarrException, Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.INT)); } + @ParameterizedTest + @CsvSource({"0,true", "0,false", "5, true", "5, false"}) + public void testZstdCodecReadWrite(int clevel, boolean checksum) throws ZarrException, IOException { + int[] testData = new int[16 * 16 * 16]; + Arrays.setAll(testData, p -> p); + + StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("testZstdCodecReadWrite", "checksum_" + checksum, "clevel_" + clevel); + ArrayMetadataBuilder builder = Array.metadataBuilder() + .withShape(16, 16, 16) + .withDataType(DataType.UINT32) + .withChunkShape(2, 4, 8) + .withFillValue(0) + .withCodecs(c -> c.withZstd(clevel, checksum)); + Array writeArray = Array.create(storeHandle, builder.build()); + writeArray.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{16, 16, 16}, testData)); + + Array readArray = Array.open(storeHandle); + ucar.ma2.Array result = readArray.read(); + + Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.INT)); + + } + @Test - public void testCodecTranspose() throws IOException, ZarrException, InterruptedException { + public void testTransposeCodec() throws ZarrException { ucar.ma2.Array testData = ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{2, 3, 3}, new int[]{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}); ucar.ma2.Array testDataTransposed120 = ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{3, 3, 2}, new int[]{ diff --git a/src/test/java/dev/zarr/zarrjava/test_zstd_library.py b/src/test/java/dev/zarr/zarrjava/test_zstd_library.py new file mode 100644 index 0000000..4f34874 --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/test_zstd_library.py @@ -0,0 +1,30 @@ +import struct +import sys + +import zstandard as zstd + +zstd_file = sys.argv[1] + + +def compress_data_to_file(file_path, integer_value): + data = struct.pack('>i', integer_value) + compressor = zstd.ZstdCompressor(level=0) + compressed_data = compressor.compress(data) + with open(file_path, 'wb') as file: + file.write(compressed_data) + + +def decompress_zstd_file(file_path): + with open(file_path, 'rb') as file: + compressed_data = file.read() + decompressor = zstd.ZstdDecompressor() # is with FORMAT_ZSTD1 + + return decompressor.decompress(compressed_data) + + +# for comparison +compress_data_to_file(zstd_file + "_", 42) + +decompressed_data = decompress_zstd_file(zstd_file) +int_value = int.from_bytes(decompressed_data[:4], byteorder='big') +assert int_value == 42 diff --git a/src/test/java/dev/zarr/zarrjava/zarrita_read.py b/src/test/java/dev/zarr/zarrjava/zarrita_read.py index 07cb083..4eff03f 100644 --- a/src/test/java/dev/zarr/zarrjava/zarrita_read.py +++ b/src/test/java/dev/zarr/zarrjava/zarrita_read.py @@ -10,7 +10,7 @@ elif codec_string == "gzip": codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.gzip_codec()] elif codec_string == "zstd": - codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.zstd_codec()] + codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.zstd_codec(checksum=True)] elif codec_string == "bytes": codec = [zarrita.codecs.bytes_codec()] elif codec_string == "transpose": From 26964dc3250f40a5ffadccd44dad4016b3ae7656 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 30 May 2024 14:56:19 +0200 Subject: [PATCH 2/5] fix Zstd compression and decompression --- .../zarrjava/v3/codec/core/ZstdCodec.java | 50 +++++++++------ src/test/java/dev/zarr/zarrjava/ZarrTest.java | 61 +++++++++++++++++-- .../dev/zarr/zarrjava/decompress_print.py | 13 ++++ 3 files changed, 100 insertions(+), 24 deletions(-) create mode 100644 src/test/java/dev/zarr/zarrjava/decompress_print.py diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java index fa85765..01b23b7 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java @@ -2,6 +2,8 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import com.github.luben.zstd.Zstd; +import com.github.luben.zstd.ZstdCompressCtx; import com.github.luben.zstd.ZstdInputStream; import com.github.luben.zstd.ZstdOutputStream; import dev.zarr.zarrjava.ZarrException; @@ -37,30 +39,40 @@ private void copy(InputStream inputStream, OutputStream outputStream) throws IOE } @Override - public ByteBuffer decode(ByteBuffer chunkBytes) - throws ZarrException { - try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); ZstdInputStream inputStream = new ZstdInputStream( - new ByteArrayInputStream(Utils.toArray(chunkBytes)))) { - copy(inputStream, outputStream); - inputStream.close(); - return ByteBuffer.wrap(outputStream.toByteArray()); - } catch (IOException ex) { - throw new ZarrException("Error in decoding zstd.", ex); + public ByteBuffer decode(ByteBuffer compressedBytes) throws ZarrException { + // Extract the byte array from the ByteBuffer + byte[] compressedArray = new byte[compressedBytes.remaining()]; + compressedBytes.get(compressedArray); + + // Determine the original size (optional: you might need to store the original size separately) + long originalSize = Zstd.decompressedSize(compressedArray); + if (originalSize == 0) { + throw new ZarrException("Failed to get decompressed size"); } + + // Create a buffer for the decompressed data + byte[] decompressed = new byte[(int) originalSize]; + + // Perform decompression + long bytesDecompressed = Zstd.decompress(decompressed, compressedArray); + if (bytesDecompressed != originalSize) { + throw new ZarrException("Decompression failed, incorrect decompressed size"); + } + + // Wrap the decompressed byte array into a ByteBuffer + return ByteBuffer.wrap(decompressed); } @Override - public ByteBuffer encode(ByteBuffer chunkBytes) - throws ZarrException { - try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); ZstdOutputStream zstdStream = new ZstdOutputStream( - outputStream, configuration.level).setChecksum( - configuration.checksum)) { - zstdStream.write(Utils.toArray(chunkBytes)); - zstdStream.close(); - return ByteBuffer.wrap(outputStream.toByteArray()); - } catch (IOException ex) { - throw new ZarrException("Error in encoding zstd.", ex); + public ByteBuffer encode(ByteBuffer chunkBytes) throws ZarrException { + byte[] arr = chunkBytes.array(); + byte[] compressed; + try (ZstdCompressCtx ctx = new ZstdCompressCtx()) { + ctx.setLevel(configuration.level); + ctx.setChecksum(configuration.checksum); + compressed = ctx.compress(arr); } + return ByteBuffer.wrap(compressed); } @Override diff --git a/src/test/java/dev/zarr/zarrjava/ZarrTest.java b/src/test/java/dev/zarr/zarrjava/ZarrTest.java index b8ace9d..b4bfe53 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrTest.java @@ -4,6 +4,8 @@ import com.amazonaws.auth.AnonymousAWSCredentials; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.fasterxml.jackson.databind.ObjectMapper; +import com.github.luben.zstd.ZstdCompressCtx; +import com.github.luben.zstd.ZstdInputStream; import com.github.luben.zstd.ZstdOutputStream; import dev.zarr.zarrjava.store.FilesystemStore; import dev.zarr.zarrjava.store.HttpStore; @@ -19,7 +21,11 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.ValueSource; +import com.github.luben.zstd.Zstd; +import ucar.ma2.MAMath; +import java.io.FileOutputStream; +import java.nio.ByteBuffer; import java.io.*; import java.nio.ByteBuffer; import java.nio.file.Files; @@ -36,9 +42,11 @@ public class ZarrTest { final static Path TESTDATA = Paths.get("testdata"); final static Path TESTOUTPUT = Paths.get("testoutput"); - final static Path ZARRITA_WRITE_PATH = Paths.get("src/test/java/dev/zarr/zarrjava/zarrita_write.py"); - final static Path ZARRITA_READ_PATH = Paths.get("src/test/java/dev/zarr/zarrjava/zarrita_read.py"); - final static Path TEST_ZSTD_LIBRARY_PATH = Paths.get("src/test/java/dev/zarr/zarrjava/test_zstd_library.py"); + final static Path TEST_PATH = Paths.get("src/test/java/dev/zarr/zarrjava/"); + + final static Path ZARRITA_WRITE_PATH = TEST_PATH.resolve("zarrita_write.py"); + final static Path ZARRITA_READ_PATH = TEST_PATH.resolve("zarrita_read.py"); + final static Path TEST_ZSTD_LIBRARY_PATH = TEST_PATH.resolve("test_zstd_library.py"); public static String pythonPath() { if (System.getProperty("os.name").startsWith("Windows")) { @@ -93,6 +101,49 @@ public void testReadFromZarrita(String codec) throws IOException, ZarrException, Assertions.assertArrayEquals(expectedData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.INT)); } + private void copy(InputStream inputStream, OutputStream outputStream) throws IOException { + byte[] buffer = new byte[4096]; + int len; + while ((len = inputStream.read(buffer)) > 0) { + outputStream.write(buffer, 0, len); + } + } + + @CsvSource({"0,true", "0,false", "5, true", "5, false"}) + @ParameterizedTest + public void testZstdLibrary2(int clevel, boolean checksumFlag) throws IOException, InterruptedException, ZarrException { + //compress using ZstdCompressCtx + int number = 123456; + byte[] src = ByteBuffer.allocate(4).putInt(number).array(); + byte[] compressed; + try (ZstdCompressCtx ctx = new ZstdCompressCtx()) { + ctx.setLevel(clevel); + ctx.setChecksum(checksumFlag); + compressed = ctx.compress(src); + } + //decompress with Zstd.decompress + long originalSize = Zstd.decompressedSize(compressed); + byte[] decompressed = Zstd.decompress(compressed, (int) originalSize); + Assertions.assertEquals(number, ByteBuffer.wrap(decompressed).getInt()); + + //write compressed to file + String compressedDataPath =TESTOUTPUT.resolve("compressed" + clevel + checksumFlag + ".bin").toString(); + try (FileOutputStream fos = new FileOutputStream(compressedDataPath)) { + fos.write(compressed); + } + + //decompress in python + Process process = new ProcessBuilder( + pythonPath(), + TEST_PATH.resolve("decompress_print.py").toString(), + compressedDataPath, + Integer.toString(number) + ).start(); + int exitCode = process.waitFor(); + assert exitCode == 0; + } + + @ParameterizedTest @CsvSource({"0,true", "0,false", "5, true", "5, false"}) public void testZstdLibrary(int clevel, boolean checksum) throws IOException, InterruptedException { @@ -295,8 +346,8 @@ public void testTransposeCodec() throws ZarrException { transposeCodecWrongOrder2.setCoreArrayMetadata(metadata); transposeCodecWrongOrder3.setCoreArrayMetadata(metadata); - assert ucar.ma2.MAMath.equals(testDataTransposed120, transposeCodec.encode(testData)); - assert ucar.ma2.MAMath.equals(testData, transposeCodec.decode(testDataTransposed120)); + assert MAMath.equals(testDataTransposed120, transposeCodec.encode(testData)); + assert MAMath.equals(testData, transposeCodec.decode(testDataTransposed120)); assertThrows(ZarrException.class, () -> transposeCodecWrongOrder1.encode(testData)); assertThrows(ZarrException.class, () -> transposeCodecWrongOrder2.encode(testData)); assertThrows(ZarrException.class, () -> transposeCodecWrongOrder3.encode(testData)); diff --git a/src/test/java/dev/zarr/zarrjava/decompress_print.py b/src/test/java/dev/zarr/zarrjava/decompress_print.py new file mode 100644 index 0000000..0235fdd --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/decompress_print.py @@ -0,0 +1,13 @@ +import sys + +import zstandard as zstd + +data_path = sys.argv[1] +expected = sys.argv[2] + +with open(data_path, "rb") as f: + compressed = f.read() + +decompressed = zstd.ZstdDecompressor().decompress(compressed) +number = int.from_bytes(decompressed, byteorder='big') +assert number == int(expected) From 2d8b7b29825948087c2fbc3a5d904b282ac749d1 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 30 May 2024 15:15:37 +0200 Subject: [PATCH 3/5] cleanup code --- .../zarrjava/v3/codec/core/ZstdCodec.java | 16 +----- src/test/java/dev/zarr/zarrjava/ZarrTest.java | 53 +++---------------- .../dev/zarr/zarrjava/test_zstd_library.py | 30 ----------- .../zarrita_read.py | 0 .../zarrita_write.py | 0 .../zstd_decompress.py} | 0 6 files changed, 8 insertions(+), 91 deletions(-) delete mode 100644 src/test/java/dev/zarr/zarrjava/test_zstd_library.py rename src/test/{java/dev/zarr/zarrjava => python-scripts}/zarrita_read.py (100%) rename src/test/{java/dev/zarr/zarrjava => python-scripts}/zarrita_write.py (100%) rename src/test/{java/dev/zarr/zarrjava/decompress_print.py => python-scripts/zstd_decompress.py} (100%) diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java index 01b23b7..19d0b75 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java @@ -40,26 +40,14 @@ private void copy(InputStream inputStream, OutputStream outputStream) throws IOE @Override public ByteBuffer decode(ByteBuffer compressedBytes) throws ZarrException { - // Extract the byte array from the ByteBuffer - byte[] compressedArray = new byte[compressedBytes.remaining()]; - compressedBytes.get(compressedArray); + byte[] compressedArray = compressedBytes.array(); - // Determine the original size (optional: you might need to store the original size separately) long originalSize = Zstd.decompressedSize(compressedArray); if (originalSize == 0) { throw new ZarrException("Failed to get decompressed size"); } - // Create a buffer for the decompressed data - byte[] decompressed = new byte[(int) originalSize]; - - // Perform decompression - long bytesDecompressed = Zstd.decompress(decompressed, compressedArray); - if (bytesDecompressed != originalSize) { - throw new ZarrException("Decompression failed, incorrect decompressed size"); - } - - // Wrap the decompressed byte array into a ByteBuffer + byte[] decompressed = Zstd.decompress(compressedArray, (int) originalSize); return ByteBuffer.wrap(decompressed); } diff --git a/src/test/java/dev/zarr/zarrjava/ZarrTest.java b/src/test/java/dev/zarr/zarrjava/ZarrTest.java index b4bfe53..d741a0a 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrTest.java @@ -5,7 +5,6 @@ import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.fasterxml.jackson.databind.ObjectMapper; import com.github.luben.zstd.ZstdCompressCtx; -import com.github.luben.zstd.ZstdInputStream; import com.github.luben.zstd.ZstdOutputStream; import dev.zarr.zarrjava.store.FilesystemStore; import dev.zarr.zarrjava.store.HttpStore; @@ -27,7 +26,6 @@ import java.io.FileOutputStream; import java.nio.ByteBuffer; import java.io.*; -import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -42,11 +40,7 @@ public class ZarrTest { final static Path TESTDATA = Paths.get("testdata"); final static Path TESTOUTPUT = Paths.get("testoutput"); - final static Path TEST_PATH = Paths.get("src/test/java/dev/zarr/zarrjava/"); - - final static Path ZARRITA_WRITE_PATH = TEST_PATH.resolve("zarrita_write.py"); - final static Path ZARRITA_READ_PATH = TEST_PATH.resolve("zarrita_read.py"); - final static Path TEST_ZSTD_LIBRARY_PATH = TEST_PATH.resolve("test_zstd_library.py"); + final static Path PYTHON_TEST_PATH = Paths.get("src/test/python-scripts/"); public static String pythonPath() { if (System.getProperty("os.name").startsWith("Windows")) { @@ -70,7 +64,7 @@ public static void clearTestoutputFolder() throws IOException { public void testReadFromZarrita(String codec) throws IOException, ZarrException, InterruptedException { String command = pythonPath(); - ProcessBuilder pb = new ProcessBuilder(command, ZARRITA_WRITE_PATH.toString(), codec, TESTOUTPUT.toString()); + ProcessBuilder pb = new ProcessBuilder(command, PYTHON_TEST_PATH.resolve("zarrita_write.py").toString(), codec, TESTOUTPUT.toString()); Process process = pb.start(); BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream())); @@ -109,9 +103,9 @@ private void copy(InputStream inputStream, OutputStream outputStream) throws IOE } } - @CsvSource({"0,true", "0,false", "5, true", "5, false"}) + @CsvSource({"0,true", "0,false", "5, true", "10, false"}) @ParameterizedTest - public void testZstdLibrary2(int clevel, boolean checksumFlag) throws IOException, InterruptedException, ZarrException { + public void testZstdLibrary(int clevel, boolean checksumFlag) throws IOException, InterruptedException, ZarrException { //compress using ZstdCompressCtx int number = 123456; byte[] src = ByteBuffer.allocate(4).putInt(number).array(); @@ -135,7 +129,7 @@ public void testZstdLibrary2(int clevel, boolean checksumFlag) throws IOExceptio //decompress in python Process process = new ProcessBuilder( pythonPath(), - TEST_PATH.resolve("decompress_print.py").toString(), + PYTHON_TEST_PATH.resolve("zstd_decompress.py").toString(), compressedDataPath, Integer.toString(number) ).start(); @@ -143,41 +137,6 @@ public void testZstdLibrary2(int clevel, boolean checksumFlag) throws IOExceptio assert exitCode == 0; } - - @ParameterizedTest - @CsvSource({"0,true", "0,false", "5, true", "5, false"}) - public void testZstdLibrary(int clevel, boolean checksum) throws IOException, InterruptedException { - String zstd_file = TESTOUTPUT + "/testZstdLibrary" + clevel + checksum + ".zstd"; - - ByteBuffer testBytes = ByteBuffer.allocate(1024); - testBytes.putInt(42); - - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - ZstdOutputStream zstdStream = new ZstdOutputStream(outputStream, clevel); - zstdStream.setChecksum(checksum); - zstdStream.write(Utils.toArray(testBytes)); - zstdStream.close(); - ByteBuffer encodedBytes = ByteBuffer.wrap(outputStream.toByteArray()); - try (FileOutputStream fileOutputStream = new FileOutputStream(zstd_file)) { - fileOutputStream.write(encodedBytes.array()); - } - String command = pythonPath(); - ProcessBuilder pb = new ProcessBuilder(command, TEST_ZSTD_LIBRARY_PATH.toString(), zstd_file); - Process process = pb.start(); - - BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream())); - String line; - while ((line = reader.readLine()) != null) { - System.out.println(line); - } - BufferedReader readerErr = new BufferedReader(new InputStreamReader(process.getErrorStream())); - while ((line = readerErr.readLine()) != null) { - System.err.println(line); - } - int exitCode = process.waitFor(); - assert exitCode == 0; - } - //TODO: add crc32c @ParameterizedTest @ValueSource(strings = {"blosc", "gzip", "zstd", "bytes", "transpose", "sharding_start", "sharding_end"}) @@ -226,7 +185,7 @@ public void testWriteToZarrita(String codec) throws IOException, ZarrException, String command = pythonPath(); - ProcessBuilder pb = new ProcessBuilder(command, ZARRITA_READ_PATH.toString(), codec, TESTOUTPUT.toString()); + ProcessBuilder pb = new ProcessBuilder(command, PYTHON_TEST_PATH.resolve("zarrita_read.py").toString(), codec, TESTOUTPUT.toString()); Process process = pb.start(); BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream())); diff --git a/src/test/java/dev/zarr/zarrjava/test_zstd_library.py b/src/test/java/dev/zarr/zarrjava/test_zstd_library.py deleted file mode 100644 index 4f34874..0000000 --- a/src/test/java/dev/zarr/zarrjava/test_zstd_library.py +++ /dev/null @@ -1,30 +0,0 @@ -import struct -import sys - -import zstandard as zstd - -zstd_file = sys.argv[1] - - -def compress_data_to_file(file_path, integer_value): - data = struct.pack('>i', integer_value) - compressor = zstd.ZstdCompressor(level=0) - compressed_data = compressor.compress(data) - with open(file_path, 'wb') as file: - file.write(compressed_data) - - -def decompress_zstd_file(file_path): - with open(file_path, 'rb') as file: - compressed_data = file.read() - decompressor = zstd.ZstdDecompressor() # is with FORMAT_ZSTD1 - - return decompressor.decompress(compressed_data) - - -# for comparison -compress_data_to_file(zstd_file + "_", 42) - -decompressed_data = decompress_zstd_file(zstd_file) -int_value = int.from_bytes(decompressed_data[:4], byteorder='big') -assert int_value == 42 diff --git a/src/test/java/dev/zarr/zarrjava/zarrita_read.py b/src/test/python-scripts/zarrita_read.py similarity index 100% rename from src/test/java/dev/zarr/zarrjava/zarrita_read.py rename to src/test/python-scripts/zarrita_read.py diff --git a/src/test/java/dev/zarr/zarrjava/zarrita_write.py b/src/test/python-scripts/zarrita_write.py similarity index 100% rename from src/test/java/dev/zarr/zarrjava/zarrita_write.py rename to src/test/python-scripts/zarrita_write.py diff --git a/src/test/java/dev/zarr/zarrjava/decompress_print.py b/src/test/python-scripts/zstd_decompress.py similarity index 100% rename from src/test/java/dev/zarr/zarrjava/decompress_print.py rename to src/test/python-scripts/zstd_decompress.py From 554715f0e47c9b19aef4e65b1d7fb7779af82679 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 30 May 2024 15:21:44 +0200 Subject: [PATCH 4/5] cleanup code --- .../zarrjava/v3/codec/core/ZstdCodec.java | 116 +++++++++--------- src/test/java/dev/zarr/zarrjava/ZarrTest.java | 27 ++-- src/test/python-scripts/zarrita_write.py | 2 +- 3 files changed, 68 insertions(+), 77 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java index 19d0b75..a4d5d92 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java @@ -4,87 +4,83 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.github.luben.zstd.Zstd; import com.github.luben.zstd.ZstdCompressCtx; -import com.github.luben.zstd.ZstdInputStream; -import com.github.luben.zstd.ZstdOutputStream; import dev.zarr.zarrjava.ZarrException; -import dev.zarr.zarrjava.utils.Utils; import dev.zarr.zarrjava.v3.ArrayMetadata; import dev.zarr.zarrjava.v3.codec.BytesBytesCodec; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; + +import javax.annotation.Nonnull; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; -import javax.annotation.Nonnull; public class ZstdCodec extends BytesBytesCodec { - public final String name = "zstd"; - @Nonnull - public final Configuration configuration; - - @JsonCreator(mode = JsonCreator.Mode.PROPERTIES) - public ZstdCodec( - @Nonnull @JsonProperty(value = "configuration", required = true) Configuration configuration) { - this.configuration = configuration; - } - - private void copy(InputStream inputStream, OutputStream outputStream) throws IOException { - byte[] buffer = new byte[4096]; - int len; - while ((len = inputStream.read(buffer)) > 0) { - outputStream.write(buffer, 0, len); + public final String name = "zstd"; + @Nonnull + public final Configuration configuration; + + @JsonCreator(mode = JsonCreator.Mode.PROPERTIES) + public ZstdCodec( + @Nonnull @JsonProperty(value = "configuration", required = true) Configuration configuration) { + this.configuration = configuration; + } + + private void copy(InputStream inputStream, OutputStream outputStream) throws IOException { + byte[] buffer = new byte[4096]; + int len; + while ((len = inputStream.read(buffer)) > 0) { + outputStream.write(buffer, 0, len); + } } - } - @Override - public ByteBuffer decode(ByteBuffer compressedBytes) throws ZarrException { - byte[] compressedArray = compressedBytes.array(); + @Override + public ByteBuffer decode(ByteBuffer compressedBytes) throws ZarrException { + byte[] compressedArray = compressedBytes.array(); + + long originalSize = Zstd.decompressedSize(compressedArray); + if (originalSize == 0) { + throw new ZarrException("Failed to get decompressed size"); + } - long originalSize = Zstd.decompressedSize(compressedArray); - if (originalSize == 0) { - throw new ZarrException("Failed to get decompressed size"); + byte[] decompressed = Zstd.decompress(compressedArray, (int) originalSize); + return ByteBuffer.wrap(decompressed); } - byte[] decompressed = Zstd.decompress(compressedArray, (int) originalSize); - return ByteBuffer.wrap(decompressed); - } - - @Override - public ByteBuffer encode(ByteBuffer chunkBytes) throws ZarrException { - byte[] arr = chunkBytes.array(); - byte[] compressed; - try (ZstdCompressCtx ctx = new ZstdCompressCtx()) { - ctx.setLevel(configuration.level); - ctx.setChecksum(configuration.checksum); - compressed = ctx.compress(arr); + @Override + public ByteBuffer encode(ByteBuffer chunkBytes) throws ZarrException { + byte[] arr = chunkBytes.array(); + byte[] compressed; + try (ZstdCompressCtx ctx = new ZstdCompressCtx()) { + ctx.setLevel(configuration.level); + ctx.setChecksum(configuration.checksum); + compressed = ctx.compress(arr); + } + return ByteBuffer.wrap(compressed); } - return ByteBuffer.wrap(compressed); - } - @Override - public long computeEncodedSize(long inputByteLength, - ArrayMetadata.CoreArrayMetadata arrayMetadata) throws ZarrException { - throw new ZarrException("Not implemented for Zstd codec."); - } + @Override + public long computeEncodedSize(long inputByteLength, + ArrayMetadata.CoreArrayMetadata arrayMetadata) throws ZarrException { + throw new ZarrException("Not implemented for Zstd codec."); + } - public static final class Configuration { + public static final class Configuration { - public final int level; - public final boolean checksum; + public final int level; + public final boolean checksum; - @JsonCreator(mode = JsonCreator.Mode.PROPERTIES) - public Configuration(@JsonProperty(value = "level", defaultValue = "5") int level, - @JsonProperty(value = "checksum", defaultValue = "true") boolean checksum) - throws ZarrException { - if (level < -131072 || level > 22) { - throw new ZarrException("'level' needs to be between -131072 and 22."); - } - this.level = level; - this.checksum = checksum; + @JsonCreator(mode = JsonCreator.Mode.PROPERTIES) + public Configuration(@JsonProperty(value = "level", defaultValue = "5") int level, + @JsonProperty(value = "checksum", defaultValue = "true") boolean checksum) + throws ZarrException { + if (level < -131072 || level > 22) { + throw new ZarrException("'level' needs to be between -131072 and 22."); + } + this.level = level; + this.checksum = checksum; + } } - } } diff --git a/src/test/java/dev/zarr/zarrjava/ZarrTest.java b/src/test/java/dev/zarr/zarrjava/ZarrTest.java index d741a0a..3fa9edf 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrTest.java @@ -4,14 +4,13 @@ import com.amazonaws.auth.AnonymousAWSCredentials; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.fasterxml.jackson.databind.ObjectMapper; +import com.github.luben.zstd.Zstd; import com.github.luben.zstd.ZstdCompressCtx; -import com.github.luben.zstd.ZstdOutputStream; import dev.zarr.zarrjava.store.FilesystemStore; import dev.zarr.zarrjava.store.HttpStore; import dev.zarr.zarrjava.store.S3Store; import dev.zarr.zarrjava.store.StoreHandle; import dev.zarr.zarrjava.utils.MultiArrayUtils; -import dev.zarr.zarrjava.utils.Utils; import dev.zarr.zarrjava.v3.*; import dev.zarr.zarrjava.v3.codec.core.TransposeCodec; import org.junit.jupiter.api.Assertions; @@ -20,12 +19,10 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.ValueSource; -import com.github.luben.zstd.Zstd; import ucar.ma2.MAMath; -import java.io.FileOutputStream; -import java.nio.ByteBuffer; import java.io.*; +import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -121,7 +118,7 @@ public void testZstdLibrary(int clevel, boolean checksumFlag) throws IOException Assertions.assertEquals(number, ByteBuffer.wrap(decompressed).getInt()); //write compressed to file - String compressedDataPath =TESTOUTPUT.resolve("compressed" + clevel + checksumFlag + ".bin").toString(); + String compressedDataPath = TESTOUTPUT.resolve("compressed" + clevel + checksumFlag + ".bin").toString(); try (FileOutputStream fos = new FileOutputStream(compressedDataPath)) { fos.write(compressed); } @@ -363,8 +360,8 @@ public void testV3ShardingReadCutout() throws IOException, ZarrException { Array array = Array.open(new FilesystemStore(TESTDATA).resolve("l4_sample", "color", "1")); ucar.ma2.Array outArray = array.read(new long[]{0, 3073, 3073, 513}, new int[]{1, 64, 64, 64}); - assertEquals(outArray.getSize(), 64 * 64 * 64); - assertEquals(outArray.getByte(0), -98); + Assertions.assertEquals(outArray.getSize(), 64 * 64 * 64); + Assertions.assertEquals(outArray.getByte(0), -98); } @Test @@ -374,8 +371,8 @@ public void testV3Access() throws IOException, ZarrException { ucar.ma2.Array outArray = readArray.access().withOffset(0, 3073, 3073, 513) .withShape(1, 64, 64, 64) .read(); - assertEquals(outArray.getSize(), 64 * 64 * 64); - assertEquals(outArray.getByte(0), -98); + Assertions.assertEquals(outArray.getSize(), 64 * 64 * 64); + Assertions.assertEquals(outArray.getByte(0), -98); Array writeArray = Array.create( new FilesystemStore(TESTOUTPUT).resolve("l4_sample_2", "color", "1"), @@ -449,9 +446,9 @@ public void testV3ArrayMetadataBuilder() throws ZarrException { @Test public void testV3FillValue() throws ZarrException { - assertEquals((int) ArrayMetadata.parseFillValue(0, DataType.UINT32), 0); - assertEquals((int) ArrayMetadata.parseFillValue("0x00010203", DataType.UINT32), 50462976); - assertEquals((byte) ArrayMetadata.parseFillValue("0b00000010", DataType.UINT8), 2); + Assertions.assertEquals((int) ArrayMetadata.parseFillValue(0, DataType.UINT32), 0); + Assertions.assertEquals((int) ArrayMetadata.parseFillValue("0x00010203", DataType.UINT32), 50462976); + Assertions.assertEquals((byte) ArrayMetadata.parseFillValue("0b00000010", DataType.UINT8), 2); assert Double.isNaN((double) ArrayMetadata.parseFillValue("NaN", DataType.FLOAT64)); assert Double.isInfinite((double) ArrayMetadata.parseFillValue("-Infinity", DataType.FLOAT64)); } @@ -469,9 +466,7 @@ public void testV3Group() throws IOException, ZarrException { ); array.write(new long[]{2, 2}, ucar.ma2.Array.factory(ucar.ma2.DataType.UBYTE, new int[]{8, 8})); - assertArrayEquals( - ((Array) ((Group) group.listAsArray()[0]).listAsArray()[0]).metadata.chunkShape(), - new int[]{5, 5}); + Assertions.assertArrayEquals(((Array) ((Group) group.listAsArray()[0]).listAsArray()[0]).metadata.chunkShape(), new int[]{5, 5}); } @Test diff --git a/src/test/python-scripts/zarrita_write.py b/src/test/python-scripts/zarrita_write.py index 3a3fd3f..ae6611d 100644 --- a/src/test/python-scripts/zarrita_write.py +++ b/src/test/python-scripts/zarrita_write.py @@ -14,7 +14,7 @@ elif codec_string == "bytes": codec = [zarrita.codecs.bytes_codec()] elif codec_string == "transpose": - codec = [zarrita.codecs.transpose_codec([0, 1]), zarrita.codecs.bytes_codec()] + codec = [zarrita.codecs.transpose_codec((0, 1)), zarrita.codecs.bytes_codec()] elif codec_string == "sharding_start": codec = [zarrita.codecs.sharding_codec(chunk_shape=(1, 2), codecs=[zarrita.codecs.bytes_codec()], index_location=zarrita.metadata.ShardingCodecIndexLocation.start)] elif codec_string == "sharding_end": From 29541d5bddb2770ff45e85c6e0767b662304c63e Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 30 May 2024 15:25:41 +0200 Subject: [PATCH 5/5] remove unused method --- .../zarrjava/v3/codec/core/ZstdCodec.java | 11 -------- src/test/java/dev/zarr/zarrjava/ZarrTest.java | 28 +++++++------------ 2 files changed, 10 insertions(+), 29 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java index a4d5d92..f042f11 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java @@ -9,9 +9,6 @@ import dev.zarr.zarrjava.v3.codec.BytesBytesCodec; import javax.annotation.Nonnull; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.nio.ByteBuffer; public class ZstdCodec extends BytesBytesCodec { @@ -26,14 +23,6 @@ public ZstdCodec( this.configuration = configuration; } - private void copy(InputStream inputStream, OutputStream outputStream) throws IOException { - byte[] buffer = new byte[4096]; - int len; - while ((len = inputStream.read(buffer)) > 0) { - outputStream.write(buffer, 0, len); - } - } - @Override public ByteBuffer decode(ByteBuffer compressedBytes) throws ZarrException { byte[] compressedArray = compressedBytes.array(); diff --git a/src/test/java/dev/zarr/zarrjava/ZarrTest.java b/src/test/java/dev/zarr/zarrjava/ZarrTest.java index 3fa9edf..524c741 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrTest.java @@ -12,6 +12,7 @@ import dev.zarr.zarrjava.store.StoreHandle; import dev.zarr.zarrjava.utils.MultiArrayUtils; import dev.zarr.zarrjava.v3.*; +import dev.zarr.zarrjava.v3.codec.CodecBuilder; import dev.zarr.zarrjava.v3.codec.core.TransposeCodec; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -92,17 +93,9 @@ public void testReadFromZarrita(String codec) throws IOException, ZarrException, Assertions.assertArrayEquals(expectedData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.INT)); } - private void copy(InputStream inputStream, OutputStream outputStream) throws IOException { - byte[] buffer = new byte[4096]; - int len; - while ((len = inputStream.read(buffer)) > 0) { - outputStream.write(buffer, 0, len); - } - } - @CsvSource({"0,true", "0,false", "5, true", "10, false"}) @ParameterizedTest - public void testZstdLibrary(int clevel, boolean checksumFlag) throws IOException, InterruptedException, ZarrException { + public void testZstdLibrary(int clevel, boolean checksumFlag) throws IOException, InterruptedException { //compress using ZstdCompressCtx int number = 123456; byte[] src = ByteBuffer.allocate(4).putInt(number).array(); @@ -147,10 +140,10 @@ public void testWriteToZarrita(String codec) throws IOException, ZarrException, switch (codec) { case "blosc": - builder = builder.withCodecs(c -> c.withBlosc()); + builder = builder.withCodecs(CodecBuilder::withBlosc); break; case "gzip": - builder = builder.withCodecs(c -> c.withGzip()); + builder = builder.withCodecs(CodecBuilder::withGzip); break; case "zstd": builder = builder.withCodecs(c -> c.withZstd(0)); @@ -203,7 +196,7 @@ public void testWriteToZarrita(String codec) throws IOException, ZarrException, @ParameterizedTest @ValueSource(strings = {"blosc", "gzip", "zstd", "bytes", "transpose", "sharding_start", "sharding_end"}) - public void testCodecsWriteRead(String codec) throws IOException, ZarrException, InterruptedException { + public void testCodecsWriteRead(String codec) throws IOException, ZarrException { int[] testData = new int[16 * 16 * 16]; Arrays.setAll(testData, p -> p); @@ -217,10 +210,10 @@ public void testCodecsWriteRead(String codec) throws IOException, ZarrException, switch (codec) { case "blosc": - builder = builder.withCodecs(c -> c.withBlosc()); + builder = builder.withCodecs(CodecBuilder::withBlosc); break; case "gzip": - builder = builder.withCodecs(c -> c.withGzip()); + builder = builder.withCodecs(CodecBuilder::withGzip); break; case "zstd": builder = builder.withCodecs(c -> c.withZstd(0)); @@ -440,7 +433,7 @@ public void testV3ArrayMetadataBuilder() throws ZarrException { .withChunkShape(1, 1024, 1024, 1024) .withFillValue(0) .withCodecs( - c -> c.withSharding(new int[]{1, 32, 32, 32}, c1 -> c1.withBlosc())) + c -> c.withSharding(new int[]{1, 32, 32, 32}, CodecBuilder::withBlosc)) .build(); } @@ -470,12 +463,11 @@ public void testV3Group() throws IOException, ZarrException { } @Test - public void testV2() throws IOException, ZarrException { + public void testV2() throws IOException{ FilesystemStore fsStore = new FilesystemStore(""); HttpStore httpStore = new HttpStore("https://static.webknossos.org/data"); - System.out.println( - dev.zarr.zarrjava.v2.Array.open(httpStore.resolve("l4_sample", "color", "1"))); + System.out.println(dev.zarr.zarrjava.v2.Array.open(httpStore.resolve("l4_sample", "color", "1"))); }