Skip to content

Commit

Permalink
Test zarrita compatibility (#3)
Browse files Browse the repository at this point in the history
* add zarrita python script

* add testReadFromZarrita

* renamed zarrita_write

* add testWriteToZarrita

* parameterized codecs in testReadFromZarrita

* fixed camelcase in zarr.json

* add venv for executing zarrita in linux

* adapt test-setup for linux

* adapt test-setup for linux

* fix testReadFromZarrita for gzip

* parameterize codecs in testReadFromZarrita

* remove unused imports

* add testCodecsWriteRead

* reformat

* remove unnecessary argument

* add testCodecTranspose

* start remove constants "C" adn "F" from Transpose Codec's order

* add CoreArrayMetadata to codec object instead of passing as argument in encode and decode
remove transpose order "F" and "C"

* remove wrong dependency

* set fail-fast: false

* specify testSourceDirectory

* added property index_location to ShardingIndexedCodec

* add indexLocation in ShardingCodec.

* change junit version for TestUtils

* remove creation of dir testdata

* update dependencies for JUnit 5

* install zarrita in CI

* correct python version, maven no-transfer-progress

* add venv for executing zarrita to CI

* add /venv_zarrita to .gitignore

* remove deprecated zarrita venv

* test CI

* fix tests for windows

* update python path for windows in ci.yml

* add Development Start-Guide to Run Tests Locally

* correct Development Start-Guide to python3.11

* add support of shardingCodec indexLocation=start

* code cleanup

* add testZstdLibrary

* fix Zstd compression and decompression

* cleanup code

* cleanup code

* remove unused method

* include crc32c codec into tests

* incorporate feedback for java version and merge tests

* add more variation to codec-configuration in tests with zarrita

* fix BloscCodec with shuffle = "shuffle"
  • Loading branch information
brokkoli71 authored Jun 3, 2024
1 parent d30475a commit adc45ae
Show file tree
Hide file tree
Showing 34 changed files with 1,100 additions and 429 deletions.
20 changes: 16 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,41 @@ jobs:
build:
strategy:
matrix:
os: [ ubuntu, windows, macos ]
runs-on: ${{ matrix.os }}-latest
os: [ ubuntu-latest, windows-latest, macos-latest ]
fail-fast: false
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash

steps:
- uses: actions/checkout@v3

- name: Set up JDK
uses: actions/setup-java@v3
with:
java-version: '22'
distribution: 'temurin'
cache: maven

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: Install zarrita
run: |
python -m venv venv_zarrita
if [ "${{ runner.os }}" = "Windows" ]; then venv_zarrita/Scripts/pip install zarrita; else venv_zarrita/bin/pip install zarrita; fi
- name: Download blosc jar
run: |
mkdir -p ../blosc-java/target
curl https://static.webknossos.org/misc/blosc-java-0.1-1.21.4-SNAPSHOT.jar -o ../blosc-java/target/blosc-java-0.1-1.21.4-SNAPSHOT.jar
- name: Download testdata
run: |
mkdir testdata testoutput
mkdir testoutput
curl https://static.webknossos.org/data/zarr_v3/l4_sample.zip -o testdata/l4_sample.zip
cd testdata
unzip l4_sample.zip
Expand All @@ -44,7 +56,7 @@ jobs:
- name: Test
env:
MAVEN_OPTS: "-Xmx6g"
run: mvn test -DargLine="-Xmx6g"
run: mvn --no-transfer-progress test -DargLine="-Xmx6g"

- name: Assemble JAR
run: mvn package -DskipTests
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ build/


### Custom ###
/testdata
/testdata/l4_sample
/testoutput
/venv_zarrita
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,19 @@ array.write(
ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1, 1024, 1024, 1024})
);
```
## Development Start-Guide

### Run Tests Locally
To be able to run the tests locally, make sure to have `python3.11` installed.
Also, you need to set up a venv for zarrita at the root of the project:
`python3.11 -m venv venv_zarrita`.

Then install zarrita there with `venv_zarrita/Scripts/pip install zarrita`
for Windows and `venv_zarrita/bin/pip install zarrita` for Linux.

Furthermore, you will need the `l4_sample` test data:

`curl https://static.webknossos.org/data/zarr_v3/l4_sample.zip -o testdata/l4_sample.zip
&& cd testdata
&& unzip l4_sample.zip
`
37 changes: 36 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,31 @@
<aws.version>1.12.477</aws.version>
<netcdfJavaVersion>5.5.3</netcdfJavaVersion>
<zstdVersion>1.5.5-5</zstdVersion>
<junit-jupiter-version>5.10.2</junit-jupiter-version>
</properties>

<dependencies>
<!-- JUnit 5 dependencies -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>${junit-jupiter-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<version>${junit-jupiter-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<version>${junit-jupiter-version}</version>
<scope>test</scope>
</dependency>

<!-- Other dependencies -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
Expand Down Expand Up @@ -54,6 +76,7 @@
<artifactId>okhttp</artifactId>
<version>2.7.5</version>
</dependency>
<!-- JUnit 4 dependency for backward compatibility if needed -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand All @@ -70,4 +93,16 @@
</repository>
</repositories>

</project>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.2.5</version>
<configuration>
<useSystemClassLoader>false</useSystemClassLoader>
</configuration>
</plugin>
</plugins>
</build>
</project>
20 changes: 20 additions & 0 deletions src/main/java/dev/zarr/zarrjava/utils/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,24 @@ public static <T> T[] concatArrays(T[] array1, T[]... arrays) {
}
return result;
}

public static boolean isPermutation(int[] array) {
if (array.length==0){
return false;
}
int[] arange = new int[array.length];
Arrays.setAll(arange, i -> i);
int[] orderSorted = array.clone();
Arrays.sort(orderSorted);
return Arrays.equals(orderSorted, arange);
}

public static int[] inversePermutation(int[] origin){
assert isPermutation(origin);
int[] inverse = new int[origin.length];
for (int i = 0; i < origin.length; i++) {
inverse[origin[i]] = i;
}
return inverse;
}
}
9 changes: 4 additions & 5 deletions src/main/java/dev/zarr/zarrjava/v3/Array.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ protected Array(StoreHandle storeHandle, ArrayMetadata arrayMetadata)
throws IOException, ZarrException {
super(storeHandle);
this.metadata = arrayMetadata;
this.codecPipeline = new CodecPipeline(arrayMetadata.codecs);
this.codecPipeline = new CodecPipeline(arrayMetadata.codecs, arrayMetadata.coreArrayMetadata);
}

/**
Expand Down Expand Up @@ -171,8 +171,7 @@ public ucar.ma2.Array read(final long[] offset, final int[] shape) throws ZarrEx

if (codecPipeline.supportsPartialDecode()) {
final ucar.ma2.Array chunkArray = codecPipeline.decodePartial(chunkHandle,
Utils.toLongArray(chunkProjection.chunkOffset), chunkProjection.shape,
metadata.coreArrayMetadata);
Utils.toLongArray(chunkProjection.chunkOffset), chunkProjection.shape);
MultiArrayUtils.copyRegion(chunkArray, new int[metadata.ndim()], outputArray,
chunkProjection.outOffset, chunkProjection.shape
);
Expand Down Expand Up @@ -223,7 +222,7 @@ public ucar.ma2.Array readChunk(long[] chunkCoords)
return metadata.allocateFillValueChunk();
}

return codecPipeline.decode(chunkBytes, metadata.coreArrayMetadata);
return codecPipeline.decode(chunkBytes);
}

/**
Expand Down Expand Up @@ -299,7 +298,7 @@ public void writeChunk(long[] chunkCoords, ucar.ma2.Array chunkArray) throws Zar
if (MultiArrayUtils.allValuesEqual(chunkArray, metadata.parsedFillValue)) {
chunkHandle.delete();
} else {
ByteBuffer chunkBytes = codecPipeline.encode(chunkArray, metadata.coreArrayMetadata);
ByteBuffer chunkBytes = codecPipeline.encode(chunkArray);
chunkHandle.set(chunkBytes);
}
}
Expand Down
7 changes: 3 additions & 4 deletions src/main/java/dev/zarr/zarrjava/v3/codec/ArrayArrayCodec.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
package dev.zarr.zarrjava.v3.codec;

import dev.zarr.zarrjava.ZarrException;
import dev.zarr.zarrjava.v3.ArrayMetadata.CoreArrayMetadata;
import ucar.ma2.Array;

public interface ArrayArrayCodec extends Codec {
public abstract class ArrayArrayCodec extends Codec {

Array encode(Array chunkArray, CoreArrayMetadata arrayMetadata)
protected abstract Array encode(Array chunkArray)
throws ZarrException;

Array decode(Array chunkArray, CoreArrayMetadata arrayMetadata)
protected abstract Array decode(Array chunkArray)
throws ZarrException;

}
17 changes: 9 additions & 8 deletions src/main/java/dev/zarr/zarrjava/v3/codec/ArrayBytesCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,24 @@

import dev.zarr.zarrjava.ZarrException;
import dev.zarr.zarrjava.store.StoreHandle;
import dev.zarr.zarrjava.v3.ArrayMetadata.CoreArrayMetadata;
import java.nio.ByteBuffer;
import ucar.ma2.Array;

public interface ArrayBytesCodec extends Codec {
public abstract class ArrayBytesCodec extends Codec {

ByteBuffer encode(Array chunkArray, CoreArrayMetadata arrayMetadata)
protected abstract ByteBuffer encode(Array chunkArray)
throws ZarrException;

Array decode(ByteBuffer chunkBytes, CoreArrayMetadata arrayMetadata)
protected abstract Array decode(ByteBuffer chunkBytes)
throws ZarrException;

interface WithPartialDecode extends ArrayBytesCodec {
public abstract static class WithPartialDecode extends ArrayBytesCodec {

Array decodePartial(
StoreHandle handle, long[] offset, int[] shape,
CoreArrayMetadata arrayMetadata
public abstract Array decode(ByteBuffer shardBytes) throws ZarrException;
public abstract ByteBuffer encode(Array shardArray) throws ZarrException;

protected abstract Array decodePartial(
StoreHandle handle, long[] offset, int[] shape
) throws ZarrException;
}
}
Expand Down
10 changes: 4 additions & 6 deletions src/main/java/dev/zarr/zarrjava/v3/codec/BytesBytesCodec.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
package dev.zarr.zarrjava.v3.codec;

import dev.zarr.zarrjava.ZarrException;
import dev.zarr.zarrjava.v3.ArrayMetadata.CoreArrayMetadata;

import java.nio.ByteBuffer;

public interface BytesBytesCodec extends Codec {
public abstract class BytesBytesCodec extends Codec {

ByteBuffer encode(ByteBuffer chunkBytes, CoreArrayMetadata arrayMetadata)
throws ZarrException;
protected abstract ByteBuffer encode(ByteBuffer chunkBytes) throws ZarrException;

ByteBuffer decode(ByteBuffer chunkBytes, CoreArrayMetadata arrayMetadata)
throws ZarrException;
public abstract ByteBuffer decode(ByteBuffer chunkBytes) throws ZarrException;

}
19 changes: 16 additions & 3 deletions src/main/java/dev/zarr/zarrjava/v3/codec/Codec.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,22 @@
import dev.zarr.zarrjava.v3.ArrayMetadata;

@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "name")
public interface Codec {
public abstract class Codec {

long computeEncodedSize(long inputByteLength, ArrayMetadata.CoreArrayMetadata arrayMetadata)
throws ZarrException;
protected ArrayMetadata.CoreArrayMetadata arrayMetadata;

protected ArrayMetadata.CoreArrayMetadata resolveArrayMetadata() throws ZarrException {
if (arrayMetadata == null) {
throw new ZarrException("arrayMetadata needs to get set in for every codec");
}
return this.arrayMetadata;
}

protected abstract long computeEncodedSize(long inputByteLength, ArrayMetadata.CoreArrayMetadata arrayMetadata)
throws ZarrException;

public void setCoreArrayMetadata(ArrayMetadata.CoreArrayMetadata arrayMetadata) throws ZarrException{
this.arrayMetadata = arrayMetadata;
}
}

36 changes: 23 additions & 13 deletions src/main/java/dev/zarr/zarrjava/v3/codec/CodecBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ public CodecBuilder withBlosc(
}

public CodecBuilder withBlosc(String cname, String shuffle, int clevel, int blockSize) {
if (shuffle.equals("shuffle")){
shuffle = "byteshuffle";
}
return withBlosc(Blosc.Compressor.fromString(cname), Blosc.Shuffle.fromString(shuffle), clevel,
dataType.getByteCount(), blockSize
);
Expand All @@ -62,13 +65,9 @@ public CodecBuilder withBlosc() {
return withBlosc("zstd");
}

public CodecBuilder withTranspose(String order) {
try {
public CodecBuilder withTranspose(int[] order) {
codecs.add(new TransposeCodec(new TransposeCodec.Configuration(order)));
} catch (ZarrException e) {
throw new RuntimeException(e);
}
return this;
return this;
}

public CodecBuilder withBytes(Endian endian) {
Expand Down Expand Up @@ -113,29 +112,40 @@ public CodecBuilder withZstd(int clevel) {
public CodecBuilder withSharding(int[] chunkShape) {
try {
codecs.add(
new ShardingIndexedCodec(new ShardingIndexedCodec.Configuration(chunkShape,
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE))},
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE)), new Crc32cCodec()})));
new ShardingIndexedCodec(new ShardingIndexedCodec.Configuration(chunkShape,
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE))},
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE)), new Crc32cCodec()},
"end")));
} catch (ZarrException e) {
throw new RuntimeException(e);
}
return this;
}

public CodecBuilder withSharding(int[] chunkShape,
Function<CodecBuilder, CodecBuilder> codecBuilder) {
Function<CodecBuilder, CodecBuilder> codecBuilder) {
return withSharding(chunkShape, codecBuilder, "end");
}

public CodecBuilder withSharding(int[] chunkShape,
Function<CodecBuilder, CodecBuilder> codecBuilder, String indexLocation) {
CodecBuilder nestedBuilder = new CodecBuilder(dataType);
try {
codecs.add(new ShardingIndexedCodec(
new ShardingIndexedCodec.Configuration(chunkShape,
codecBuilder.apply(nestedBuilder).build(),
new Codec[]{new BytesCodec(Endian.LITTLE), new Crc32cCodec()})));
new ShardingIndexedCodec.Configuration(chunkShape,
codecBuilder.apply(nestedBuilder).build(),
new Codec[]{new BytesCodec(Endian.LITTLE), new Crc32cCodec()},
indexLocation)));
} catch (ZarrException e) {
throw new RuntimeException(e);
}
return this;
}

public CodecBuilder withCrc32c() {
codecs.add(new Crc32cCodec());
return this;
}
private void autoInsertBytesCodec() {
if (codecs.stream().noneMatch(c -> c instanceof ArrayBytesCodec)) {
Codec[] arrayArrayCodecs = codecs.stream().filter(c -> c instanceof ArrayArrayCodec)
Expand Down
Loading

0 comments on commit adc45ae

Please sign in to comment.