From ee0aa2952a813d233ab10ee74272d1975d1b2fd9 Mon Sep 17 00:00:00 2001 From: david dali susanibar arce Date: Fri, 30 Dec 2022 17:20:46 -0500 Subject: [PATCH] doc: adding java read compression files with LZ4 codec --- java/source/demo/pom.xml | 5 ++ java/source/io.rst | 59 +++++++++++++++++++++++ java/thirdpartydeps/arrowfiles/lz4.arrow | Bin 0 -> 1122 bytes 3 files changed, 64 insertions(+) create mode 100644 java/thirdpartydeps/arrowfiles/lz4.arrow diff --git a/java/source/demo/pom.xml b/java/source/demo/pom.xml index bd4036f6..3546852a 100644 --- a/java/source/demo/pom.xml +++ b/java/source/demo/pom.xml @@ -82,6 +82,11 @@ arrow-avro ${arrow.version} + + org.apache.arrow + arrow-compression + ${arrow.version} + com.h2database h2 diff --git a/java/source/io.rst b/java/source/io.rst index 74f74d15..89709fea 100644 --- a/java/source/io.rst +++ b/java/source/io.rst @@ -320,6 +320,65 @@ We are providing a path with auto generated arrow files for testing purposes, ch Jhon 29 Thomy 33 +Read - From Compressed File +--------------------------- + +We are providing a path with auto generated arrow files for testing purposes, change that at your convenience. + +Compressed file generated through this code example: + +.. code:: python + + import pandas as pd + import pyarrow as pa + + pd.DataFrame({'key': range(4)}).to_feather('lz4.arrow', compression='lz4') + pd.DataFrame({'key': range(4)}).to_feather('zstd.arrow', compression='zstd') + +.. note:: + + Java Vector module offer read files without compression codec, in case reading + compressed files is required consider to also add Java Compression module + dependency. + +.. testcode:: + + import org.apache.arrow.compression.CommonsCompressionFactory; + import org.apache.arrow.memory.BufferAllocator; + import org.apache.arrow.memory.RootAllocator; + import org.apache.arrow.vector.ipc.ArrowFileReader; + import org.apache.arrow.vector.ipc.message.ArrowBlock; + import org.apache.arrow.vector.VectorSchemaRoot; + import java.io.File; + import java.io.FileInputStream; + import java.io.IOException; + + File file = new File("./thirdpartydeps/arrowfiles/lz4.arrow"); + try( + BufferAllocator rootAllocator = new RootAllocator(); + FileInputStream fileInputStream = new FileInputStream(file); + ArrowFileReader reader = new ArrowFileReader(fileInputStream.getChannel(), + rootAllocator, CommonsCompressionFactory.INSTANCE) + ){ + System.out.println("Record batches in file: " + reader.getRecordBlocks().size()); + for (ArrowBlock arrowBlock : reader.getRecordBlocks()) { + reader.loadRecordBatch(arrowBlock); + VectorSchemaRoot vectorSchemaRootRecover = reader.getVectorSchemaRoot(); + System.out.print(vectorSchemaRootRecover.contentToTSVString()); + } + } catch (IOException e) { + e.printStackTrace(); + } + +.. testoutput:: + + Record batches in file: 1 + key + 0 + 1 + 2 + 3 + Read - From Buffer ------------------ diff --git a/java/thirdpartydeps/arrowfiles/lz4.arrow b/java/thirdpartydeps/arrowfiles/lz4.arrow new file mode 100644 index 0000000000000000000000000000000000000000..dbaad3c9eb50541160695511676402ea995b46de GIT binary patch literal 1122 zcmeHH!AiqG5S_L$h7u&u3O)2t_S8cOEffV$_2xx{UOb4z)kFv;n~R(;k_$Ll$@wD<|fK`wonOQcoHC|c{AWP!|+4A4qM;dLgn zf~%&5%(CQxlidf|B1{ychwj)Ni#5Qia?Lk!r|v|o(X$CH{UtwL*ba5UjY+$jEvMqN z(PYm1Q09xJ1x3f2PYcnSPo$Xv=Q+LL%lTMr>Q$4nbyjK_I@h>#S22Cm;>naTCqTI# z=lH70frkDOHQD$@Mm3!0UO&&S0WT0koc69}&->VR@D@4^i#knRUQxFUURvK7?`|uJ_6dWTRK2dY=`9Pg|e=f2V)