Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More optimizations #9

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Inflate"
uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9"
authors = ["Gunnar Farnebäck <[email protected]>"]
version = "0.1.2"
version = "0.1.3"

[compat]
julia = "0.7, 1"
Expand Down
132 changes: 76 additions & 56 deletions src/Inflate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,32 +72,19 @@ mutable struct InflateData <: AbstractInflateData
bitpos::Int
literal_or_length_code::Vector{Vector{Int}}
distance_code::Vector{Vector{Int}}
update_input_crc::Bool
crc::UInt32
end

function InflateData(source::Vector{UInt8})
InflateData(source, 0, 1, 0, fixed_literal_or_length_table,
fixed_distance_table, false, init_crc())
fixed_distance_table)
end

function get_input_byte(data::InflateData)
byte = data.bytes[data.bytepos]
data.bytepos += 1
if data.update_input_crc
data.crc = update_crc(data.crc, byte)
end
return byte
end

# This isn't called when reading Gzip header, so no need to
# consider updating crc.
function get_input_bytes(data::InflateData, n::Int)
bytes = @view data.bytes[data.bytepos:(data.bytepos + n - 1)]
data.bytepos += n
return bytes
end

function getbit(data::AbstractInflateData)
if data.bitpos == 0
data.current_byte = Int(get_input_byte(data))
Expand Down Expand Up @@ -131,6 +118,27 @@ function get_aligned_byte(data::AbstractInflateData)
return get_input_byte(data)
end

get_aligned_byte(data::AbstractInflateData, ::Nothing) = get_aligned_byte(data)
function get_aligned_byte(data::AbstractInflateData, crc::Ref{UInt32})
byte = get_input_byte(data)
crc[] = update_crc(crc[], byte)
return byte
end

function get_aligned_uint16(data::AbstractInflateData,
crc::Union{Nothing, Ref{UInt32}} = nothing)
byte1 = get_aligned_byte(data, crc)
byte2 = get_aligned_byte(data, crc)
return (UInt16(byte2) << 8) | UInt16(byte1)
end

function get_aligned_uint32(data::AbstractInflateData,
crc::Union{Nothing, Ref{UInt32}} = nothing)
word1 = get_aligned_uint16(data, crc)
word2 = get_aligned_uint16(data, crc)
return (UInt32(word2) << 16) | UInt32(word1)
end

function get_value_from_code(data::AbstractInflateData,
code::Vector{Vector{Int}})
v = 0
Expand Down Expand Up @@ -221,20 +229,30 @@ function read_code_tables(data::AbstractInflateData)
data.distance_code = transform_code_lengths_to_code(code_lengths[(hlit+1):end])
end

function grow_if_needed!(out, out_pos, n)
while out_pos + n > length(out)
resize!(out, length(out) + 1048576)
end
end

function _inflate(data::InflateData)
out = UInt8[]
out = Vector{UInt8}(undef, 65536)
out_pos = 1
final_block = false
while !final_block
final_block = getbits(data, 1) == 1
compression_mode = getbits(data, 2)
if compression_mode == 0
skip_bits_to_byte_boundary(data)
len = getbits(data, 16)
nlen = getbits(data, 16)
len = get_aligned_uint16(data)
nlen = get_aligned_uint16(data)
if len ⊻ nlen != 0xffff
error("corrupted data")
end
append!(out, get_input_bytes(data, len))
grow_if_needed!(out, out_pos, len)
copyto!(out, out_pos, data.bytes, data.bytepos, len)
out_pos += len
data.bytepos += len
continue
elseif compression_mode == 1
data.literal_or_length_code = fixed_literal_or_length_table
Expand All @@ -248,23 +266,27 @@ function _inflate(data::InflateData)
while true
v = get_literal_or_length(data)
if v < 256
push!(out, UInt8(v))
grow_if_needed!(out, out_pos, 1)
out[out_pos] = UInt8(v)
out_pos += 1
elseif v == 256
break
else
length = getlength(data, v)
distance = getdist(data)
if length <= distance
append!(out, @view out[(end - distance + 1):(end - distance + length)])
else
for i = 1:length
push!(out, out[end - distance + 1])
end
grow_if_needed!(out, out_pos, length)
while length > distance
copyto!(out, out_pos, out, out_pos - distance, distance)
out_pos += distance
length -= distance
end
copyto!(out, out_pos, out, out_pos - distance, length)
out_pos += length
end
end
end

resize!(out, out_pos - 1)
return out
end

Expand Down Expand Up @@ -337,10 +359,11 @@ function crc(x::Vector{UInt8})
return finish_crc(c)
end

function read_zero_terminated_data(data::AbstractInflateData)
function read_zero_terminated_data(data::AbstractInflateData,
crc::Union{Nothing, Ref{UInt32}})
s = UInt8[]
while true
c = get_aligned_byte(data)
c = get_aligned_byte(data, crc)
push!(s, c)
if c == 0
break
Expand Down Expand Up @@ -371,49 +394,54 @@ function read_zlib_header(data::AbstractInflateData)
end

function read_gzip_header(data::AbstractInflateData, headers, compute_crc)
data.update_input_crc = compute_crc
ID1 = get_aligned_byte(data)
ID2 = get_aligned_byte(data)
if compute_crc
crc = Ref{UInt32}(init_crc())
else
crc = nothing
end

ID1 = get_aligned_byte(data, crc)
ID2 = get_aligned_byte(data, crc)
if ID1 != 0x1f || ID2 != 0x8b
error("not gzipped data")
end
CM = get_aligned_byte(data)
CM = get_aligned_byte(data, crc)
if CM != 8
error("unsupported compression method")
end
FLG = get_aligned_byte(data)
MTIME = getbits(data, 32)
XFL = get_aligned_byte(data)
OS = get_aligned_byte(data)
FLG = get_aligned_byte(data, crc)
MTIME = get_aligned_uint32(data, crc)
XFL = get_aligned_byte(data, crc)
OS = get_aligned_byte(data, crc)

if headers != nothing
headers["mtime"] = MTIME
headers["os"] = OS
end

if (FLG & 0x04) != 0 # FLG.FEXTRA
xlen = getbits(data, 16)
xlen = get_aligned_uint16(data, crc)
if headers != nothing
headers["fextra"] = zeros(UInt8, xlen)
end

for i = 1:xlen
b = get_aligned_byte(data)
b = get_aligned_byte(data, crc)
if headers != nothing
headers["fextra"][i] = b
end
end
end

if (FLG & 0x08) != 0 # FLG.FNAME
name = read_zero_terminated_data(data)
name = read_zero_terminated_data(data, crc)
if headers != nothing
headers["fname"] = String(name[1:end-1])
end
end

if (FLG & 0x10) != 0 # FLG.FCOMMENT
comment = read_zero_terminated_data(data)
comment = read_zero_terminated_data(data, crc)
if headers != nothing
headers["fcomment"] = String(comment[1:end-1])
end
Expand All @@ -423,11 +451,10 @@ function read_gzip_header(data::AbstractInflateData, headers, compute_crc)
error("reserved FLG bit set")
end

data.update_input_crc = false
if (FLG & 0x02) != 0 # FLG.FHCRC
crc16 = getbits(data, 16)
crc16 = get_aligned_uint16(data)
if compute_crc
header_crc = finish_crc(data.crc)
header_crc = finish_crc(crc[])
if crc16 != (header_crc & 0xffff)
error("corrupted data, header crc check failed")
end
Expand Down Expand Up @@ -503,11 +530,11 @@ function inflate_gzip(source::Vector{UInt8}; headers = nothing,
out = _inflate(data)

skip_bits_to_byte_boundary(data)
crc32 = getbits(data, 32)
crc32 = get_aligned_uint32(data)
if !ignore_checksum && crc32 != crc(out)
error("corrupted data, crc check failed")
end
isize = getbits(data, 32)
isize = get_aligned_uint32(data)
if isize != length(out)
error("corrupted data, length check failed")
end
Expand Down Expand Up @@ -547,16 +574,14 @@ mutable struct StreamingInflateData <: AbstractInflateData
pending_bytes::Int
distance::Int
reading_final_block::Bool
update_input_crc::Bool
crc::UInt32
end

function StreamingInflateData(stream::IO)
return StreamingInflateData(stream, UInt8[], 1, 0, 0,
fixed_literal_or_length_table,
fixed_distance_table,
zeros(UInt8, buffer_size), 1, 1,
true, 0, -2, false, false, init_crc())
true, 0, -2, false)
end

function get_input_byte(data::StreamingInflateData)
Expand All @@ -566,14 +591,9 @@ function get_input_byte(data::StreamingInflateData)
end
byte = data.input_buffer[data.input_buffer_pos]
data.input_buffer_pos += 1
if data.update_input_crc
data.crc = update_crc(data.crc, byte)
end
return byte
end

# This isn't called when reading Gzip header, so no need to
# consider updating crc.
function get_input_bytes(data::StreamingInflateData, n)
if data.input_buffer_pos > length(data.input_buffer)
data.input_buffer = read(data.stream, 65536)
Expand Down Expand Up @@ -689,11 +709,11 @@ end
function read_trailer(stream::InflateGzipStream)
crc = finish_crc(stream.crc)
skip_bits_to_byte_boundary(stream.data)
crc32 = getbits(stream.data, 32)
crc32 = get_aligned_uint32(stream.data)
if stream.compute_crc && crc32 != crc
error("corrupted data, crc check failed")
end
isize = getbits(stream.data, 32)
isize = get_aligned_uint32(stream.data)
if isize != stream.num_bytes
error("corrupted data, length check failed")
end
Expand Down Expand Up @@ -844,8 +864,8 @@ function getbyte(stream::AbstractInflateStream)
compression_mode = getbits(stream.data, 2)
if compression_mode == 0
skip_bits_to_byte_boundary(stream.data)
len = getbits(stream.data, 16)
nlen = getbits(stream.data, 16)
len = get_aligned_uint16(stream.data)
nlen = get_aligned_uint16(stream.data)
if len ⊻ nlen != 0xffff
error("corrupted data")
end
Expand Down
Loading