|
| 1 | +module V3Codecs |
| 2 | + |
| 3 | +import ..Codecs: zencode, zdecode, zencode!, zdecode! |
| 4 | +using CRC32c: CRC32c |
| 5 | + |
| 6 | +abstract type V3Codec{In,Out} end |
| 7 | +const codectypes = Dict{String, V3Codec}() |
| 8 | + |
| 9 | +@enum BloscCompressor begin |
| 10 | + lz4 |
| 11 | + lz4hc |
| 12 | + blosclz |
| 13 | + zstd |
| 14 | + snappy |
| 15 | + zlib |
| 16 | +end |
| 17 | + |
| 18 | +@enum BloscShuffle begin |
| 19 | + noshuffle |
| 20 | + shuffle |
| 21 | + bitshuffle |
| 22 | +end |
| 23 | + |
| 24 | +struct BloscCodec <: V3Codec{:bytes, :bytes} |
| 25 | + cname::BloscCompressor |
| 26 | + clevel::Int64 |
| 27 | + shuffle::BloscShuffle |
| 28 | + typesize::UInt8 |
| 29 | + blocksize::UInt |
| 30 | +end |
| 31 | +name(::BloscCodec) = "blosc" |
| 32 | + |
| 33 | +struct BytesCodec <: V3Codec{:array, :bytes} |
| 34 | +end |
| 35 | +name(::BytesCodec) = "bytes" |
| 36 | + |
| 37 | +struct CRC32cCodec <: V3Codec{:bytes, :bytes} |
| 38 | +end |
| 39 | +name(::CRC32cCodec) = "crc32c" |
| 40 | + |
| 41 | +struct GzipCodec <: V3Codec{:bytes, :bytes} |
| 42 | +end |
| 43 | +name(::GzipCodec) = "gzip" |
| 44 | + |
| 45 | + |
| 46 | +#= |
| 47 | +zencode(a, c::Codec) = error("Unimplemented") |
| 48 | +zencode!(encoded, data, c::Codec) = error("Unimplemented") |
| 49 | +zdecode(a, c::Codec, T::Type) = error("Unimplemented") |
| 50 | +zdecode!(data, encoded, c::Codec) = error("Unimplemented") |
| 51 | +=# |
| 52 | + |
| 53 | +function crc32c_stream!(output::IO, input::IO; buffer = Vector{UInt8}(undef, 1024*32)) |
| 54 | + hash::UInt32 = 0x00000000 |
| 55 | + while(bytesavailable(input) > 0) |
| 56 | + sized_buffer = @view(buffer[1:min(length(buffer), bytesavailable(input))]) |
| 57 | + read!(input, sized_buffer) |
| 58 | + write(output, sized_buffer) |
| 59 | + hash = CRC32c.crc32c(sized_buffer, hash) |
| 60 | + end |
| 61 | + return hash |
| 62 | +end |
| 63 | +function zencode!(encoded::Vector{UInt8}, data::Vector{UInt8}, c::CRC32cCodec) |
| 64 | + output = IOBuffer(encoded, read=false, write=true) |
| 65 | + input = IOBuffer(data, read=true, write=false) |
| 66 | + zencode!(output, input, c) |
| 67 | + return take!(output) |
| 68 | +end |
| 69 | +function zencode!(output::IO, input::IO, c::CRC32cCodec) |
| 70 | + hash = crc32c_stream!(output, input) |
| 71 | + write(output, hash) |
| 72 | + return output |
| 73 | +end |
| 74 | +function zdecode!(encoded::Vector{UInt8}, data::Vector{UInt8}, c::CRC32cCodec) |
| 75 | + output = IOBuffer(encoded, read=false, write=true) |
| 76 | + input = IOBuffer(data, read=true, write=true) |
| 77 | + zdecode!(output, input, c) |
| 78 | + return take!(output) |
| 79 | +end |
| 80 | +function zdecode!(output::IOBuffer, input::IOBuffer, c::CRC32cCodec) |
| 81 | + input_vec = take!(input) |
| 82 | + truncated_input = IOBuffer(@view(input_vec[1:end-4]); read=true, write=false) |
| 83 | + hash = crc32c_stream!(output, truncated_input) |
| 84 | + if input_vec[end-3:end] != reinterpret(UInt8, [hash]) |
| 85 | + throw(IOError("CRC32c hash does not match")) |
| 86 | + end |
| 87 | + return output |
| 88 | +end |
| 89 | + |
| 90 | +struct ShardingCodec{N} <: V3Codec{:array, :bytes} |
| 91 | + chunk_shape::NTuple{N,Int} |
| 92 | + codecs::Vector{V3Codec} |
| 93 | + index_codecs::Vector{V3Codec} |
| 94 | + index_location::Symbol |
| 95 | +end |
| 96 | +name(::ShardingCodec) = "sharding_indexed" |
| 97 | + |
| 98 | +struct TransposeCodec <: V3Codec{:array, :array} |
| 99 | +end |
| 100 | +name(::TransposeCodec) = "transpose" |
| 101 | + |
| 102 | + |
| 103 | +end |
0 commit comments