diff --git a/java/source/demo/pom.xml b/java/source/demo/pom.xml index bd4036f6..3546852a 100644 --- a/java/source/demo/pom.xml +++ b/java/source/demo/pom.xml @@ -82,6 +82,11 @@ arrow-avro ${arrow.version} + + org.apache.arrow + arrow-compression + ${arrow.version} + com.h2database h2 diff --git a/java/source/io.rst b/java/source/io.rst index 74f74d15..89709fea 100644 --- a/java/source/io.rst +++ b/java/source/io.rst @@ -320,6 +320,65 @@ We are providing a path with auto generated arrow files for testing purposes, ch Jhon 29 Thomy 33 +Read - From Compressed File +--------------------------- + +We are providing a path with auto generated arrow files for testing purposes, change that at your convenience. + +Compressed file generated through this code example: + +.. code:: python + + import pandas as pd + import pyarrow as pa + + pd.DataFrame({'key': range(4)}).to_feather('lz4.arrow', compression='lz4') + pd.DataFrame({'key': range(4)}).to_feather('zstd.arrow', compression='zstd') + +.. note:: + + Java Vector module offer read files without compression codec, in case reading + compressed files is required consider to also add Java Compression module + dependency. + +.. testcode:: + + import org.apache.arrow.compression.CommonsCompressionFactory; + import org.apache.arrow.memory.BufferAllocator; + import org.apache.arrow.memory.RootAllocator; + import org.apache.arrow.vector.ipc.ArrowFileReader; + import org.apache.arrow.vector.ipc.message.ArrowBlock; + import org.apache.arrow.vector.VectorSchemaRoot; + import java.io.File; + import java.io.FileInputStream; + import java.io.IOException; + + File file = new File("./thirdpartydeps/arrowfiles/lz4.arrow"); + try( + BufferAllocator rootAllocator = new RootAllocator(); + FileInputStream fileInputStream = new FileInputStream(file); + ArrowFileReader reader = new ArrowFileReader(fileInputStream.getChannel(), + rootAllocator, CommonsCompressionFactory.INSTANCE) + ){ + System.out.println("Record batches in file: " + reader.getRecordBlocks().size()); + for (ArrowBlock arrowBlock : reader.getRecordBlocks()) { + reader.loadRecordBatch(arrowBlock); + VectorSchemaRoot vectorSchemaRootRecover = reader.getVectorSchemaRoot(); + System.out.print(vectorSchemaRootRecover.contentToTSVString()); + } + } catch (IOException e) { + e.printStackTrace(); + } + +.. testoutput:: + + Record batches in file: 1 + key + 0 + 1 + 2 + 3 + Read - From Buffer ------------------ diff --git a/java/thirdpartydeps/arrowfiles/lz4.arrow b/java/thirdpartydeps/arrowfiles/lz4.arrow new file mode 100644 index 00000000..dbaad3c9 Binary files /dev/null and b/java/thirdpartydeps/arrowfiles/lz4.arrow differ