From e7dc6e1fbefdf9c3feaae633538bbd11e8d9800d Mon Sep 17 00:00:00 2001 From: "elitsa.marinova" Date: Tue, 20 Dec 2022 15:29:05 +0200 Subject: [PATCH] Add support for DEFLATE64 algorithm when extracting from zip archive --- .../Streams/InflaterInputStream.cs | 20 +- .../Zip/Deflate64/Deflate64OutputWindow.cs | 157 ++++ .../Zip/Deflate64/Deflate64Stream.cs | 479 +++++++++++ .../Zip/Deflate64/HuffmanTree.cs | 323 +++++++ .../Zip/Deflate64/InflaterManaged.cs | 794 ++++++++++++++++++ src/ICSharpCode.SharpZipLib/Zip/ZipEntry.cs | 1 + .../Zip/ZipInputStream.cs | 40 +- 7 files changed, 1812 insertions(+), 2 deletions(-) create mode 100644 src/ICSharpCode.SharpZipLib/Zip/Deflate64/Deflate64OutputWindow.cs create mode 100644 src/ICSharpCode.SharpZipLib/Zip/Deflate64/Deflate64Stream.cs create mode 100644 src/ICSharpCode.SharpZipLib/Zip/Deflate64/HuffmanTree.cs create mode 100644 src/ICSharpCode.SharpZipLib/Zip/Deflate64/InflaterManaged.cs diff --git a/src/ICSharpCode.SharpZipLib/Zip/Compression/Streams/InflaterInputStream.cs b/src/ICSharpCode.SharpZipLib/Zip/Compression/Streams/InflaterInputStream.cs index 7790474d2..c470177b9 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/Compression/Streams/InflaterInputStream.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/Compression/Streams/InflaterInputStream.cs @@ -108,6 +108,24 @@ public void SetInflaterInput(Inflater inflater) } } + /// + /// Resize input buffer according to a specific array size + /// + /// + public void ResizeBuffer(int bufferSize) + { + if (available == 0) + { + int oldSize = rawData.Length; + byte[] resized = rawData; + Array.Resize(ref resized, bufferSize); + rawData = resized; + rawLength = rawData.Length; + clearText = rawData; + clearTextLength = clearText.Length; + } + } + /// /// Fill the buffer from the underlying input stream. /// @@ -696,7 +714,7 @@ public override int Read(byte[] buffer, int offset, int count) /// /// Base stream the inflater reads from. /// - private Stream baseInputStream; + protected Stream baseInputStream; /// /// The compressed size diff --git a/src/ICSharpCode.SharpZipLib/Zip/Deflate64/Deflate64OutputWindow.cs b/src/ICSharpCode.SharpZipLib/Zip/Deflate64/Deflate64OutputWindow.cs new file mode 100644 index 000000000..87aab2739 --- /dev/null +++ b/src/ICSharpCode.SharpZipLib/Zip/Deflate64/Deflate64OutputWindow.cs @@ -0,0 +1,157 @@ +// The content of the class is borrowed from DEFLATE64 support implementation for DotNetZip +// which on its part contains modified code from the .NET Core Libraries (CoreFX and System.IO.Compression/DeflateManaged) +// where deflate64 decompression is implemented. +// https://github.com/haf/DotNetZip.Semverd/blob/master/src/Zip.Shared/Deflate64/OutputWindow.cs + +using System; +using System.Diagnostics; + +namespace ICSharpCode.SharpZipLib.Zip.Deflate64 +{ + /// + /// This class maintains a window for decompressed output. + /// We need to keep this because the decompressed information can be + /// a literal or a length/distance pair. For length/distance pair, + /// we need to look back in the output window and copy bytes from there. + /// We use a byte array of WindowSize circularly. + /// + internal sealed class Deflate64OutputWindow + { + // With Deflate64 we can have up to a 65536 length as well as up to a 65538 distance. This means we need a Window that is at + // least 131074 bytes long so we have space to retrieve up to a full 64kb in lookback and place it in our buffer without + // overwriting existing data. Deflate64OutputWindow requires that the WindowSize be an exponent of 2, so we round up to 2^18. + private const int WindowSize = 262144; + private const int WindowMask = 262143; + + private readonly byte[] _window = new byte[WindowSize]; // The window is 2^18 bytes + private int _end; // this is the position to where we should write next byte + private int _bytesUsed; // The number of bytes in the output window which is not consumed. + + internal void ClearBytesUsed() + { + _bytesUsed = 0; + } + + /// Add a byte to output window. + public void Write(byte b) + { + Debug.Assert(_bytesUsed < WindowSize, "Can't add byte when window is full!"); + _window[_end++] = b; + _end &= WindowMask; + ++_bytesUsed; + } + + public void WriteLengthDistance(int length, int distance) + { + Debug.Assert((_bytesUsed + length) <= WindowSize, "No Enough space"); + + // move backwards distance bytes in the output stream, + // and copy length bytes from this position to the output stream. + _bytesUsed += length; + int copyStart = (_end - distance) & WindowMask; // start position for coping. + + int border = WindowSize - length; + if (copyStart <= border && _end < border) + { + if (length <= distance) + { + Array.Copy(_window, copyStart, _window, _end, length); + _end += length; + } + else + { + // The referenced string may overlap the current + // position; for example, if the last 2 bytes decoded have values + // X and Y, a string reference with + // adds X,Y,X,Y,X to the output stream. + while (length-- > 0) + { + _window[_end++] = _window[copyStart++]; + } + } + } + else + { + // copy byte by byte + while (length-- > 0) + { + _window[_end++] = _window[copyStart++]; + _end &= WindowMask; + copyStart &= WindowMask; + } + } + } + + /// + /// Copy up to length of bytes from input directly. + /// This is used for uncompressed block. + /// + public int CopyFrom(InputBuffer input, int length) + { + length = Math.Min(Math.Min(length, WindowSize - _bytesUsed), input.AvailableBytes); + int copied; + + // We might need wrap around to copy all bytes. + int tailLen = WindowSize - _end; + if (length > tailLen) + { + // copy the first part + copied = input.CopyTo(_window, _end, tailLen); + if (copied == tailLen) + { + // only try to copy the second part if we have enough bytes in input + copied += input.CopyTo(_window, 0, length - tailLen); + } + } + else + { + // only one copy is needed if there is no wrap around. + copied = input.CopyTo(_window, _end, length); + } + + _end = (_end + copied) & WindowMask; + _bytesUsed += copied; + return copied; + } + + /// Free space in output window. + public int FreeBytes => WindowSize - _bytesUsed; + + /// Bytes not consumed in output window. + public int AvailableBytes => _bytesUsed; + + /// Copy the decompressed bytes to output array. + public int CopyTo(byte[] output, int offset, int length) + { + int copy_end; + + if (length > _bytesUsed) + { + // we can copy all the decompressed bytes out + copy_end = _end; + length = _bytesUsed; + } + else + { + copy_end = (_end - _bytesUsed + length) & WindowMask; // copy length of bytes + } + + int copied = length; + + int tailLen = length - copy_end; + if (tailLen > 0) + { + // this means we need to copy two parts separately + // copy tailLen bytes from the end of output window + Array.Copy(_window, WindowSize - tailLen, + output, offset, tailLen); + offset += tailLen; + length = copy_end; + } + Array.Copy(_window, copy_end - length, output, offset, length); + _bytesUsed -= copied; + Debug.Assert(_bytesUsed >= 0, "check this function and find why we copied more bytes than we have"); + return copied; + } + } +} diff --git a/src/ICSharpCode.SharpZipLib/Zip/Deflate64/Deflate64Stream.cs b/src/ICSharpCode.SharpZipLib/Zip/Deflate64/Deflate64Stream.cs new file mode 100644 index 000000000..45547ea45 --- /dev/null +++ b/src/ICSharpCode.SharpZipLib/Zip/Deflate64/Deflate64Stream.cs @@ -0,0 +1,479 @@ +// The content of the classes is borrowed from DEFLATE64 support implementation for DotNetZip +// which on its part contains modified code from the .NET Core Libraries (CoreFX and System.IO.Compression/DeflateManaged) +// where deflate64 decompression is implemented. +// https://github.com/haf/DotNetZip.Semverd/blob/master/src/Zip.Shared/Deflate64/Deflate64Stream.cs +// https://github.com/haf/DotNetZip.Semverd/blob/master/src/Zip.Shared/Deflate64/InputBuffer.cs + +using System; +using System.Diagnostics; +using System.IO; + +namespace ICSharpCode.SharpZipLib.Zip.Deflate64 +{ + /// + /// Deflate64Stream supports decompression of Deflate64 format only + /// + public class Deflate64Stream : Stream + { + internal const int DefaultBufferSize = 8192; + + private Stream _stream; + private long _compressedSize; + private long _reachedSize = 0; + private InflaterManaged inflater; + private readonly byte[] _buffer; + + /// + ///A specific constructor to allow decompression of Deflate64 + /// + public Deflate64Stream(Stream stream, long compressedSize, long uncompressedSize = -1) + { + if (stream == null) + throw new ArgumentNullException(nameof(stream)); + if (!stream.CanRead) + throw new ArgumentException("NotSupported_UnreadableStream", nameof(stream)); + + Inflater = new InflaterManaged(null, true, uncompressedSize); + + _compressedSize = compressedSize; + _stream = stream; + _buffer = new byte[DefaultBufferSize]; + } + + /// + /// Gets a value indicating if the stream supports reading + /// + public override bool CanRead + { + get + { + if (_stream == null) + { + return false; + } + + return _stream.CanRead; + } + } + + /// + /// Gets a value indicating if the stream supports writing + /// + public override bool CanWrite + { + get + { + return false; + } + } + + /// + /// Gets a value indicating whether the current stream supports seeking. + /// This property always returns false + /// + public override bool CanSeek => false; + + /// + /// Gets the length in bytes of the stream + /// Setting the length is not supported and will throw a NotSupportException + /// + public override long Length + { + get { throw new NotSupportedException(); } + } + + /// + /// Gets or sets the streams position + /// Setting/Getting the position is not supported and will throw a NotSupportException + /// + public override long Position + { + get { throw new NotSupportedException(); } + set { throw new NotSupportedException(); } + } + + internal InflaterManaged Inflater { get => inflater; set => inflater = value; } + + /// + /// Flushes the stream + /// + public override void Flush() + { + EnsureNotDisposed(); + } + + /// + /// Set the streams position. This operation is not supported and will throw a NotSupportedException + /// + public override long Seek(long offset, SeekOrigin origin) + { + throw new NotSupportedException(); + } + + /// + /// Sets the length of this stream to the given value. + /// This operation is not supported and will throw a NotSupportedExceptionortedException + /// + public override void SetLength(long value) + { + throw new NotSupportedException(); + } + + /// + /// Read a sequence of bytes and advances the read position by one byte. + /// + /// Array of bytes to store values in + /// Offset in array to begin storing data + /// The maximum number of bytes to read + /// The total number of bytes read into the buffer. This might be less + /// than the number of bytes requested if that number of bytes are not + /// currently available or zero if the end of the stream is reached. + /// + public override int Read(byte[] array, int offset, int count) + { + ValidateParameters(array, offset, count); + EnsureNotDisposed(); + + int bytesRead; + int currentOffset = offset; + int remainingCount = count; + + while (true) + { + bytesRead = Inflater.Inflate(array, currentOffset, remainingCount); + currentOffset += bytesRead; + remainingCount -= bytesRead; + + if (remainingCount == 0) + { + break; + } + + if (Inflater.Finished()) + { + // if we finished decompressing, we can't have anything left in the outputwindow. + Debug.Assert(Inflater.AvailableOutput == 0, "We should have copied all stuff out!"); + break; + } + + //Calculate the availble buffer size according to the file compressed size, otherwise additional data will be read + int availableToRead = (_compressedSize - _reachedSize >= _buffer.Length) ? _buffer.Length : Convert.ToInt32(_compressedSize - _reachedSize); + int bytes = _stream.Read(_buffer, 0, availableToRead > 0 ? availableToRead : 1); + _reachedSize += bytes; + + if (bytes <= 0) + { + break; + } + else if (bytes > _buffer.Length) + { + // The stream is either malicious or poorly implemented and returned a number of + // bytes larger than the buffer supplied to it. + throw new InvalidDataException(); + } + + Inflater.SetInput(_buffer, 0, bytes); + } + + return count - remainingCount; + } + + private void ValidateParameters(byte[] array, int offset, int count) + { + if (array == null) + throw new ArgumentNullException(nameof(array)); + + if (offset < 0) + throw new ArgumentOutOfRangeException(nameof(offset)); + + if (count < 0) + throw new ArgumentOutOfRangeException(nameof(count)); + + if (array.Length - offset < count) + throw new ArgumentException("InvalidArgumentOffsetCount"); + } + + private void EnsureNotDisposed() + { + if (_stream == null) + ThrowStreamClosedException(); + } + + private static void ThrowStreamClosedException() + { + throw new ObjectDisposedException(null, "ObjectDisposed_StreamClosed"); + } + + /// + /// Asynchronous reads are not supported a NotSupportedException is always thrown + /// + public override IAsyncResult BeginRead(byte[] buffer, int offset, int count, AsyncCallback asyncCallback, object asyncState) + { + throw new NotImplementedException(); + } + + /// + /// Asynchronous writes arent supported, a NotSupportedException is always thrown + /// + public override int EndRead(IAsyncResult asyncResult) + { + throw new NotImplementedException(); + } + + /// + /// Writes bytes from an array to the decompressed stream + /// The method is not supported + /// + public override void Write(byte[] array, int offset, int count) + { + throw new InvalidOperationException("CannotWriteToDeflateStream"); + } + + // This is called by Dispose: + private void PurgeBuffers(bool disposing) + { + if (!disposing) + return; + + if (_stream == null) + return; + + Flush(); + } + + /// + /// Stream disposal + /// + protected override void Dispose(bool disposing) + { + try + { + PurgeBuffers(disposing); + } + finally + { + // Close the underlying stream even if PurgeBuffers threw. + // Stream.Close() may throw here (may or may not be due to the same error). + // In this case, we still need to clean up internal resources, hence the inner finally blocks. + try + { + if (disposing && _stream != null) + _stream.Dispose(); + } + finally + { + _stream = null; + + try + { + if (Inflater != null) + Inflater.Dispose(); + } + finally + { + Inflater = null; + base.Dispose(disposing); + } + } + } + } + } + + // This class can be used to read bits from an byte array quickly. + // Normally we get bits from 'bitBuffer' field and bitsInBuffer stores + // the number of bits available in 'BitBuffer'. + // When we used up the bits in bitBuffer, we will try to get byte from + // the byte array and copy the byte to appropiate position in bitBuffer. + // + // The byte array is not reused. We will go from 'start' to 'end'. + // When we reach the end, most read operations will return -1, + // which means we are running out of input. + + internal sealed class InputBuffer + { + private byte[] _buffer; // byte array to store input + private int _start; // start poisition of the buffer + private int _end; // end position of the buffer + private uint _bitBuffer = 0; // store the bits here, we can quickly shift in this buffer + private int _bitsInBuffer = 0; // number of bits available in bitBuffer + + /// Total bits available in the input buffer. + public int AvailableBits => _bitsInBuffer; + + /// Total bytes available in the input buffer. + public int AvailableBytes => (_end - _start) + (_bitsInBuffer / 8); + + /// Ensure that count bits are in the bit buffer. + /// Can be up to 16. + /// Returns false if input is not sufficient to make this true. + public bool EnsureBitsAvailable(int count) + { + Debug.Assert(0 < count && count <= 16, "count is invalid."); + + // manual inlining to improve perf + if (_bitsInBuffer < count) + { + if (NeedsInput()) + { + return false; + } + Debug.Assert(_buffer != null); + // insert a byte to bitbuffer + _bitBuffer |= (uint)_buffer[_start++] << _bitsInBuffer; + _bitsInBuffer += 8; + + if (_bitsInBuffer < count) + { + if (NeedsInput()) + { + return false; + } + // insert a byte to bitbuffer + _bitBuffer |= (uint)_buffer[_start++] << _bitsInBuffer; + _bitsInBuffer += 8; + } + } + + return true; + } + + /// + /// This function will try to load 16 or more bits into bitBuffer. + /// It returns whatever is contained in bitBuffer after loading. + /// The main difference between this and GetBits is that this will + /// never return -1. So the caller needs to check AvailableBits to + /// see how many bits are available. + /// + public uint TryLoad16Bits() + { + Debug.Assert(_buffer != null); + if (_bitsInBuffer < 8) + { + if (_start < _end) + { + _bitBuffer |= (uint)_buffer[_start++] << _bitsInBuffer; + _bitsInBuffer += 8; + } + + if (_start < _end) + { + _bitBuffer |= (uint)_buffer[_start++] << _bitsInBuffer; + _bitsInBuffer += 8; + } + } + else if (_bitsInBuffer < 16) + { + if (_start < _end) + { + _bitBuffer |= (uint)_buffer[_start++] << _bitsInBuffer; + _bitsInBuffer += 8; + } + } + + return _bitBuffer; + } + + private uint GetBitMask(int count) => ((uint)1 << count) - 1; + + /// Gets count bits from the input buffer. Returns -1 if not enough bits available. + public int GetBits(int count) + { + Debug.Assert(0 < count && count <= 16, "count is invalid."); + + if (!EnsureBitsAvailable(count)) + { + return -1; + } + + int result = (int)(_bitBuffer & GetBitMask(count)); + _bitBuffer >>= count; + _bitsInBuffer -= count; + return result; + } + + /// + /// Copies length bytes from input buffer to output buffer starting at output[offset]. + /// You have to make sure, that the buffer is byte aligned. If not enough bytes are + /// available, copies fewer bytes. + /// + /// Returns the number of bytes copied, 0 if no byte is available. + public int CopyTo(byte[] output, int offset, int length) + { + Debug.Assert(output != null); + Debug.Assert(offset >= 0); + Debug.Assert(length >= 0); + Debug.Assert(offset <= output.Length - length); + Debug.Assert((_bitsInBuffer % 8) == 0); + + // Copy the bytes in bitBuffer first. + int bytesFromBitBuffer = 0; + while (_bitsInBuffer > 0 && length > 0) + { + output[offset++] = (byte)_bitBuffer; + _bitBuffer >>= 8; + _bitsInBuffer -= 8; + length--; + bytesFromBitBuffer++; + } + + if (length == 0) + { + return bytesFromBitBuffer; + } + + int avail = _end - _start; + if (length > avail) + { + length = avail; + } + + Debug.Assert(_buffer != null); + Array.Copy(_buffer, _start, output, offset, length); + _start += length; + return bytesFromBitBuffer + length; + } + + /// + /// Return true is all input bytes are used. + /// This means the caller can call SetInput to add more input. + /// + public bool NeedsInput() => _start == _end; + + /// + /// Set the byte array to be processed. + /// All the bits remained in bitBuffer will be processed before the new bytes. + /// We don't clone the byte array here since it is expensive. + /// The caller should make sure after a buffer is passed in. + /// It will not be changed before calling this function again. + /// + public void SetInput(byte[] buffer, int offset, int length) + { + Debug.Assert(buffer != null); + Debug.Assert(offset >= 0); + Debug.Assert(length >= 0); + Debug.Assert(offset <= buffer.Length - length); + + if (_start == _end) + { + _buffer = buffer; + _start = offset; + _end = offset + length; + } + } + + /// Skip n bits in the buffer. + public void SkipBits(int n) + { + Debug.Assert(_bitsInBuffer >= n, "No enough bits in the buffer, Did you call EnsureBitsAvailable?"); + _bitBuffer >>= n; + _bitsInBuffer -= n; + } + + /// Skips to the next byte boundary. + public void SkipToByteBoundary() + { + _bitBuffer >>= (_bitsInBuffer % 8); + _bitsInBuffer = _bitsInBuffer - (_bitsInBuffer % 8); + } + } +} diff --git a/src/ICSharpCode.SharpZipLib/Zip/Deflate64/HuffmanTree.cs b/src/ICSharpCode.SharpZipLib/Zip/Deflate64/HuffmanTree.cs new file mode 100644 index 000000000..737585ef9 --- /dev/null +++ b/src/ICSharpCode.SharpZipLib/Zip/Deflate64/HuffmanTree.cs @@ -0,0 +1,323 @@ +// The content of the class is borrowed from DEFLATE64 support implementation for DotNetZip +// which on its part contains modified code from the .NET Core Libraries (CoreFX and System.IO.Compression/DeflateManaged) +// where deflate64 decompression is implemented. +// https://github.com/haf/DotNetZip.Semverd/blob/master/src/Zip.Shared/Deflate64/HuffmanTree.cs + +using System.Diagnostics; +using System.IO; + +namespace ICSharpCode.SharpZipLib.Zip.Deflate64 +{ + // Strictly speaking this class is not a HuffmanTree, this class is + // a lookup table combined with a HuffmanTree. The idea is to speed up + // the lookup for short symbols (they should appear more frequently ideally.) + // However we don't want to create a huge table since it might take longer to + // build the table than decoding (Deflate usually generates new tables frequently.) + // Jean-loup Gailly and Mark Adler gave a very good explanation about this. + // The full text (algorithm.txt) can be found inside + // ftp://ftp.uu.net/pub/archiving/zip/zlib/zlib.zip. + // Following paper explains decoding in details: + // Hirschberg and Lelewer, "Efficient decoding of prefix codes," + // Comm. ACM, 33,4, April 1990, pp. 449-459. + + internal sealed class HuffmanTree + { + internal const int MaxLiteralTreeElements = 288; + internal const int MaxDistTreeElements = 32; + internal const int EndOfBlockCode = 256; + internal const int NumberOfCodeLengthTreeElements = 19; + + private readonly int _tableBits; + private readonly short[] _table; + private readonly short[] _left; + private readonly short[] _right; + private readonly byte[] _codeLengthArray; +#if DEBUG + private uint[] _codeArrayDebug; +#endif + + private readonly int _tableMask; + + // huffman tree for static block + public static HuffmanTree StaticLiteralLengthTree { get; } = new HuffmanTree(GetStaticLiteralTreeLength()); + + public static HuffmanTree StaticDistanceTree { get; } = new HuffmanTree(GetStaticDistanceTreeLength()); + + public HuffmanTree(byte[] codeLengths) + { + Debug.Assert( + codeLengths.Length == MaxLiteralTreeElements || + codeLengths.Length == MaxDistTreeElements || + codeLengths.Length == NumberOfCodeLengthTreeElements, + "we only expect three kinds of Length here"); + _codeLengthArray = codeLengths; + + if (_codeLengthArray.Length == MaxLiteralTreeElements) + { + // bits for Literal/Length tree table + _tableBits = 9; + } + else + { + // bits for distance tree table and code length tree table + _tableBits = 7; + } + _tableMask = (1 << _tableBits) - 1; + + _table = new short[1 << _tableBits]; + + // I need to find proof that left and right array will always be + // enough. I think they are. + _left = new short[2 * _codeLengthArray.Length]; + _right = new short[2 * _codeLengthArray.Length]; + + CreateTable(); + } + + // Generate the array contains huffman codes lengths for static huffman tree. + // The data is in RFC 1951. + private static byte[] GetStaticLiteralTreeLength() + { + byte[] literalTreeLength = new byte[MaxLiteralTreeElements]; + for (int i = 0; i <= 143; i++) + literalTreeLength[i] = 8; + + for (int i = 144; i <= 255; i++) + literalTreeLength[i] = 9; + + for (int i = 256; i <= 279; i++) + literalTreeLength[i] = 7; + + for (int i = 280; i <= 287; i++) + literalTreeLength[i] = 8; + + return literalTreeLength; + } + + private static byte[] GetStaticDistanceTreeLength() + { + byte[] staticDistanceTreeLength = new byte[MaxDistTreeElements]; + for (int i = 0; i < MaxDistTreeElements; i++) + { + staticDistanceTreeLength[i] = 5; + } + return staticDistanceTreeLength; + } + + // Reverse 'length' of the bits in code + private static uint BitReverse(uint code, int length) + { + uint new_code = 0; + + Debug.Assert(length > 0 && length <= 16, "Invalid len"); + do + { + new_code |= (code & 1); + new_code <<= 1; + code >>= 1; + } while (--length > 0); + + return new_code >> 1; + } + + // Calculate the huffman code for each character based on the code length for each character. + // This algorithm is described in standard RFC 1951 + private uint[] CalculateHuffmanCode() + { + uint[] bitLengthCount = new uint[17]; + foreach (int codeLength in _codeLengthArray) + { + bitLengthCount[codeLength]++; + } + bitLengthCount[0] = 0; // clear count for length 0 + + uint[] nextCode = new uint[17]; + uint tempCode = 0; + for (int bits = 1; bits <= 16; bits++) + { + tempCode = (tempCode + bitLengthCount[bits - 1]) << 1; + nextCode[bits] = tempCode; + } + + uint[] code = new uint[MaxLiteralTreeElements]; + for (int i = 0; i < _codeLengthArray.Length; i++) + { + int len = _codeLengthArray[i]; + + if (len > 0) + { + code[i] = BitReverse(nextCode[len], len); + nextCode[len]++; + } + } + return code; + } + + private void CreateTable() + { + uint[] codeArray = CalculateHuffmanCode(); +#if DEBUG + _codeArrayDebug = codeArray; +#endif + + short avail = (short)_codeLengthArray.Length; + + for (int ch = 0; ch < _codeLengthArray.Length; ch++) + { + // length of this code + int len = _codeLengthArray[ch]; + if (len > 0) + { + // start value (bit reversed) + int start = (int)codeArray[ch]; + + if (len <= _tableBits) + { + // If a particular symbol is shorter than nine bits, + // then that symbol's translation is duplicated + // in all those entries that start with that symbol's bits. + // For example, if the symbol is four bits, then it's duplicated + // 32 times in a nine-bit table. If a symbol is nine bits long, + // it appears in the table once. + // + // Make sure that in the loop below, code is always + // less than table_size. + // + // On last iteration we store at array index: + // initial_start_at + (locs-1)*increment + // = initial_start_at + locs*increment - increment + // = initial_start_at + (1 << tableBits) - increment + // = initial_start_at + table_size - increment + // + // Therefore we must ensure: + // initial_start_at + table_size - increment < table_size + // or: initial_start_at < increment + // + int increment = 1 << len; + if (start >= increment) + { + throw new InvalidDataException("InvalidHuffmanData"); + } + + // Note the bits in the table are reverted. + int locs = 1 << (_tableBits - len); + for (int j = 0; j < locs; j++) + { + _table[start] = (short)ch; + start += increment; + } + } + else + { + // For any code which has length longer than num_elements, + // build a binary tree. + + int overflowBits = len - _tableBits; // the nodes we need to respent the data. + int codeBitMask = 1 << _tableBits; // mask to get current bit (the bits can't fit in the table) + + // the left, right table is used to repesent the + // the rest bits. When we got the first part (number bits.) and look at + // tbe table, we will need to follow the tree to find the real character. + // This is in place to avoid bloating the table if there are + // a few ones with long code. + int index = start & ((1 << _tableBits) - 1); + short[] array = _table; + + do + { + short value = array[index]; + + if (value == 0) + { + // set up next pointer if this node is not used before. + array[index] = (short)-avail; // use next available slot. + value = (short)-avail; + avail++; + } + + if (value > 0) + { + // prevent an IndexOutOfRangeException from array[index] + throw new InvalidDataException("InvalidHuffmanData"); + } + + Debug.Assert(value < 0, "CreateTable: Only negative numbers are used for tree pointers!"); + + if ((start & codeBitMask) == 0) + { + // if current bit is 0, go change the left array + array = _left; + } + else + { + // if current bit is 1, set value in the right array + array = _right; + } + index = -value; // go to next node + + codeBitMask <<= 1; + overflowBits--; + } while (overflowBits != 0); + + array[index] = (short)ch; + } + } + } + } + + // This function will try to get enough bits from input and + // try to decode the bits. + // If there are no enought bits in the input, this function will return -1. + public int GetNextSymbol(InputBuffer input) + { + // Try to load 16 bits into input buffer if possible and get the bitBuffer value. + // If there aren't 16 bits available we will return all we have in the + // input buffer. + uint bitBuffer = input.TryLoad16Bits(); + if (input.AvailableBits == 0) + { // running out of input. + return -1; + } + + // decode an element + int symbol = _table[bitBuffer & _tableMask]; + if (symbol < 0) + { // this will be the start of the binary tree + // navigate the tree + uint mask = (uint)1 << _tableBits; + do + { + symbol = -symbol; + if ((bitBuffer & mask) == 0) + symbol = _left[symbol]; + else + symbol = _right[symbol]; + mask <<= 1; + } while (symbol < 0); + } + + int codeLength = _codeLengthArray[symbol]; + + // huffman code lengths must be at least 1 bit long + if (codeLength <= 0) + { + throw new InvalidDataException("InvalidHuffmanData"); + } + + // + // If this code is longer than the # bits we had in the bit buffer (i.e. + // we read only part of the code), we can hit the entry in the table or the tree + // for another symbol. However the length of another symbol will not match the + // available bits count. + if (codeLength > input.AvailableBits) + { + // We already tried to load 16 bits and maximum length is 15, + // so this means we are running out of input. + return -1; + } + + input.SkipBits(codeLength); + return symbol; + } + } +} diff --git a/src/ICSharpCode.SharpZipLib/Zip/Deflate64/InflaterManaged.cs b/src/ICSharpCode.SharpZipLib/Zip/Deflate64/InflaterManaged.cs new file mode 100644 index 000000000..b4dc5a7a8 --- /dev/null +++ b/src/ICSharpCode.SharpZipLib/Zip/Deflate64/InflaterManaged.cs @@ -0,0 +1,794 @@ +// The content of the class is borrowed from DEFLATE64 support implementation for DotNetZip +// which on its part contains modified code from the .NET Core Libraries (CoreFX and System.IO.Compression/DeflateManaged) +// where deflate64 decompression is implemented. +// https://github.com/haf/DotNetZip.Semverd/blob/master/src/Zip.Shared/Deflate64/InflaterManaged.cs + +using System; +using System.Diagnostics; +using System.IO; + +namespace ICSharpCode.SharpZipLib.Zip.Deflate64 +{ + internal sealed class InflaterManaged + { + // Const tables used in decoding: + // Extra bits for length code 257 - 285. + private static readonly byte[] s_extraLengthBits = + { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, + 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 16 + }; + + // The base length for length code 257 - 285. + // The formula to get the real length for a length code is lengthBase[code - 257] + (value stored in extraBits) + private static readonly int[] s_lengthBase = + { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, + 59, 67, 83, 99, 115, 131, 163, 195, 227, 3 + }; + + // The base distance for distance code 0 - 31 + // The real distance for a distance code is distanceBasePosition[code] + (value stored in extraBits) + private static readonly int[] s_distanceBasePosition = + { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, + 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 32769, 49153 + }; + + // Code lengths for code length alphabet is stored in following order + private static readonly byte[] s_codeOrder = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + + private static readonly byte[] s_staticDistanceTreeTable = + { + 0x00, 0x10, 0x08, 0x18, 0x04, 0x14, 0x0c, 0x1c, 0x02, 0x12, 0x0a, 0x1a, + 0x06, 0x16, 0x0e, 0x1e, 0x01, 0x11, 0x09, 0x19, 0x05, 0x15, 0x0d, 0x1d, + 0x03, 0x13, 0x0b, 0x1b, 0x07, 0x17, 0x0f, 0x1f + }; + + private readonly Deflate64OutputWindow _output; + private readonly InputBuffer _input; + private HuffmanTree _literalLengthTree; + private HuffmanTree _distanceTree; + + private InflaterState _state; + private readonly bool _hasFormatReader; + private int _bfinal; + private BlockType _blockType; + + // uncompressed block + private readonly byte[] _blockLengthBuffer = new byte[4]; + private int _blockLength; + + // compressed block + private int _length; + private int _distanceCode; + private int _extraBits; + + private int _loopCounter; + private int _literalLengthCodeCount; + private int _distanceCodeCount; + private int _codeLengthCodeCount; + private int _codeArraySize; + private int _lengthCode; + + private readonly byte[] _codeList; // temporary array to store the code length for literal/Length and distance + private readonly byte[] _codeLengthTreeCodeLength; + private readonly bool _deflate64; + private HuffmanTree _codeLengthTree; + private readonly long _uncompressedSize; + private long _currentInflatedCount; + + private readonly IFileFormatReader _formatReader; // class to decode header and footer (e.g. gzip) + + internal InflaterManaged(IFileFormatReader reader, bool deflate64, long uncompressedSize) + { + _output = new Deflate64OutputWindow(); + _input = new InputBuffer(); + + _codeList = new byte[HuffmanTree.MaxLiteralTreeElements + HuffmanTree.MaxDistTreeElements]; + _codeLengthTreeCodeLength = new byte[HuffmanTree.NumberOfCodeLengthTreeElements]; + _deflate64 = deflate64; + _uncompressedSize = uncompressedSize; + if (reader != null) + { + _formatReader = reader; + _hasFormatReader = true; + } + Reset(); + } + + private void Reset() + { + _state = _hasFormatReader ? + InflaterState.ReadingHeader : // start by reading Header info + InflaterState.ReadingBFinal; // start by reading BFinal bit + } + + public void SetInput(byte[] inputBytes, int offset, int length) => + _input.SetInput(inputBytes, offset, length); // append the bytes + + public bool Finished() => _state == InflaterState.Done || _state == InflaterState.VerifyingFooter; + + public int AvailableOutput => _output.AvailableBytes; + + public int Inflate(byte[] bytes, int offset, int length) + { + // Copy bytes from output to outputbytes if we have available bytes + // if buffer is not filled up. Keep decoding until no input are available. + // If decodeBlock returns false, throw an exception. + int count = 0; + do + { + int copied = 0; + if (_uncompressedSize == -1) + { + copied = _output.CopyTo(bytes, offset, length); + } + else + { + if (_uncompressedSize > _currentInflatedCount) + { + length = (int)Math.Min(length, _uncompressedSize - _currentInflatedCount); + copied = _output.CopyTo(bytes, offset, length); + _currentInflatedCount += copied; + } + else + { + _state = InflaterState.Done; + _output.ClearBytesUsed(); + } + } + if (copied > 0) + { + if (_hasFormatReader) + { + Debug.Assert(_formatReader != null); + _formatReader.UpdateWithBytesRead(bytes, offset, copied); + } + + offset += copied; + count += copied; + length -= copied; + } + + if (length == 0) + { // filled in the bytes array + break; + } + // Decode will return false when more input is needed + } while (!Finished() && Decode()); + + if (_state == InflaterState.VerifyingFooter) + { // finished reading CRC + // In this case finished is true and output window has all the data. + // But some data in output window might not be copied out. + if (_output.AvailableBytes == 0) + { + Debug.Assert(_formatReader != null); + _formatReader.Validate(); + } + } + + return count; + } + + //Each block of compressed data begins with 3 header bits + // containing the following data: + // first bit BFINAL + // next 2 bits BTYPE + // Note that the header bits do not necessarily begin on a byte + // boundary, since a block does not necessarily occupy an integral + // number of bytes. + // BFINAL is set if and only if this is the last block of the data + // set. + // BTYPE specifies how the data are compressed, as follows: + // 00 - no compression + // 01 - compressed with fixed Huffman codes + // 10 - compressed with dynamic Huffman codes + // 11 - reserved (error) + // The only difference between the two compressed cases is how the + // Huffman codes for the literal/length and distance alphabets are + // defined. + // + // This function returns true for success (end of block or output window is full,) + // false if we are short of input + // + private bool Decode() + { + bool eob = false; + bool result = false; + + if (Finished()) + { + return true; + } + + if (_hasFormatReader) + { + Debug.Assert(_formatReader != null); + if (_state == InflaterState.ReadingHeader) + { + if (!_formatReader.ReadHeader(_input)) + { + return false; + } + _state = InflaterState.ReadingBFinal; + } + else if (_state == InflaterState.StartReadingFooter || _state == InflaterState.ReadingFooter) + { + if (!_formatReader.ReadFooter(_input)) + return false; + + _state = InflaterState.VerifyingFooter; + return true; + } + } + + if (_state == InflaterState.ReadingBFinal) + { + // reading bfinal bit + // Need 1 bit + if (!_input.EnsureBitsAvailable(1)) + return false; + + _bfinal = _input.GetBits(1); + _state = InflaterState.ReadingBType; + } + + if (_state == InflaterState.ReadingBType) + { + // Need 2 bits + if (!_input.EnsureBitsAvailable(2)) + { + _state = InflaterState.ReadingBType; + return false; + } + + _blockType = (BlockType)_input.GetBits(2); + if (_blockType == BlockType.Dynamic) + { + _state = InflaterState.ReadingNumLitCodes; + } + else if (_blockType == BlockType.Static) + { + _literalLengthTree = HuffmanTree.StaticLiteralLengthTree; + _distanceTree = HuffmanTree.StaticDistanceTree; + _state = InflaterState.DecodeTop; + } + else if (_blockType == BlockType.Uncompressed) + { + _state = InflaterState.UncompressedAligning; + } + else + { + throw new InvalidDataException("UnknownBlockType"); + } + } + + if (_blockType == BlockType.Dynamic) + { + if (_state < InflaterState.DecodeTop) + { + // we are reading the header + result = DecodeDynamicBlockHeader(); + } + else + { + result = DecodeBlock(out eob); // this can returns true when output is full + } + } + else if (_blockType == BlockType.Static) + { + result = DecodeBlock(out eob); + } + else if (_blockType == BlockType.Uncompressed) + { + result = DecodeUncompressedBlock(out eob); + } + else + { + throw new InvalidDataException("UnknownBlockType"); + } + + // If we reached the end of the block and the block we were decoding had + // bfinal=1 (final block) + if (eob && (_bfinal != 0)) + { + if (_hasFormatReader) + _state = InflaterState.StartReadingFooter; + else + _state = InflaterState.Done; + } + return result; + } + + + // Format of Non-compressed blocks (BTYPE=00): + // + // Any bits of input up to the next byte boundary are ignored. + // The rest of the block consists of the following information: + // + // 0 1 2 3 4... + // +---+---+---+---+================================+ + // | LEN | NLEN |... LEN bytes of literal data...| + // +---+---+---+---+================================+ + // + // LEN is the number of data bytes in the block. NLEN is the + // one's complement of LEN. + private bool DecodeUncompressedBlock(out bool end_of_block) + { + end_of_block = false; + while (true) + { + switch (_state) + { + case InflaterState.UncompressedAligning: // initial state when calling this function + // we must skip to a byte boundary + _input.SkipToByteBoundary(); + _state = InflaterState.UncompressedByte1; + goto case InflaterState.UncompressedByte1; + + case InflaterState.UncompressedByte1: // decoding block length + case InflaterState.UncompressedByte2: + case InflaterState.UncompressedByte3: + case InflaterState.UncompressedByte4: + int bits = _input.GetBits(8); + if (bits < 0) + { + return false; + } + + _blockLengthBuffer[_state - InflaterState.UncompressedByte1] = (byte)bits; + if (_state == InflaterState.UncompressedByte4) + { + _blockLength = _blockLengthBuffer[0] + ((int)_blockLengthBuffer[1]) * 256; + int blockLengthComplement = _blockLengthBuffer[2] + ((int)_blockLengthBuffer[3]) * 256; + + // make sure complement matches + if ((ushort)_blockLength != (ushort)(~blockLengthComplement)) + { + throw new InvalidDataException("InvalidBlockLength"); + } + } + + _state += 1; + break; + + case InflaterState.DecodingUncompressed: // copying block data + + // Directly copy bytes from input to output. + int bytesCopied = _output.CopyFrom(_input, _blockLength); + _blockLength -= bytesCopied; + + if (_blockLength == 0) + { + // Done with this block, need to re-init bit buffer for next block + _state = InflaterState.ReadingBFinal; + end_of_block = true; + return true; + } + + // We can fail to copy all bytes for two reasons: + // Running out of Input + // running out of free space in output window + if (_output.FreeBytes == 0) + { + return true; + } + + return false; + + default: + Debug.Fail("check why we are here!"); + throw new InvalidDataException("UnknownState"); + } + } + } + + private bool DecodeBlock(out bool end_of_block_code_seen) + { + end_of_block_code_seen = false; + + int freeBytes = _output.FreeBytes; // it is a little bit faster than frequently accessing the property + while (freeBytes > 65536) + { + // With Deflate64 we can have up to a 64kb length, so we ensure at least that much space is available + // in the Deflate64OutputWindow to avoid overwriting previous unflushed output data. + + int symbol; + switch (_state) + { + case InflaterState.DecodeTop: + // decode an element from the literal tree + + Debug.Assert(_literalLengthTree != null); + symbol = _literalLengthTree.GetNextSymbol(_input); + if (symbol < 0) + { + // running out of input + return false; + } + + if (symbol < 256) + { + // literal + _output.Write((byte)symbol); + --freeBytes; + } + else if (symbol == 256) + { + // end of block + end_of_block_code_seen = true; + // Reset state + _state = InflaterState.ReadingBFinal; + return true; + } + else + { + // length/distance pair + symbol -= 257; // length code started at 257 + if (symbol < 8) + { + symbol += 3; // match length = 3,4,5,6,7,8,9,10 + _extraBits = 0; + } + else if (!_deflate64 && symbol == 28) + { + // extra bits for code 285 is 0 + symbol = 258; // code 285 means length 258 + _extraBits = 0; + } + else + { + if (symbol < 0 || symbol >= s_extraLengthBits.Length) + { + throw new InvalidDataException("GenericInvalidData"); + } + _extraBits = s_extraLengthBits[symbol]; + Debug.Assert(_extraBits != 0, "We handle other cases separately!"); + } + _length = symbol; + goto case InflaterState.HaveInitialLength; + } + break; + + case InflaterState.HaveInitialLength: + if (_extraBits > 0) + { + _state = InflaterState.HaveInitialLength; + int bits = _input.GetBits(_extraBits); + if (bits < 0) + { + return false; + } + + if (_length < 0 || _length >= s_lengthBase.Length) + { + throw new InvalidDataException("GenericInvalidData"); + } + _length = s_lengthBase[_length] + bits; + } + _state = InflaterState.HaveFullLength; + goto case InflaterState.HaveFullLength; + + case InflaterState.HaveFullLength: + if (_blockType == BlockType.Dynamic) + { + Debug.Assert(_distanceTree != null); + _distanceCode = _distanceTree.GetNextSymbol(_input); + } + else + { + // get distance code directly for static block + _distanceCode = _input.GetBits(5); + if (_distanceCode >= 0) + { + _distanceCode = s_staticDistanceTreeTable[_distanceCode]; + } + } + + if (_distanceCode < 0) + { + // running out input + return false; + } + + _state = InflaterState.HaveDistCode; + goto case InflaterState.HaveDistCode; + + case InflaterState.HaveDistCode: + // To avoid a table lookup we note that for distanceCode > 3, + // extra_bits = (distanceCode-2) >> 1 + int offset; + if (_distanceCode > 3) + { + _extraBits = (_distanceCode - 2) >> 1; + int bits = _input.GetBits(_extraBits); + if (bits < 0) + { + return false; + } + offset = s_distanceBasePosition[_distanceCode] + bits; + } + else + { + offset = _distanceCode + 1; + } + + _output.WriteLengthDistance(_length, offset); + freeBytes -= _length; + _state = InflaterState.DecodeTop; + break; + + default: + Debug.Fail("check why we are here!"); + throw new InvalidDataException("UnknownState"); + } + } + + return true; + } + + + // Format of the dynamic block header: + // 5 Bits: HLIT, # of Literal/Length codes - 257 (257 - 286) + // 5 Bits: HDIST, # of Distance codes - 1 (1 - 32) + // 4 Bits: HCLEN, # of Code Length codes - 4 (4 - 19) + // + // (HCLEN + 4) x 3 bits: code lengths for the code length + // alphabet given just above, in the order: 16, 17, 18, + // 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 + // + // These code lengths are interpreted as 3-bit integers + // (0-7); as above, a code length of 0 means the + // corresponding symbol (literal/length or distance code + // length) is not used. + // + // HLIT + 257 code lengths for the literal/length alphabet, + // encoded using the code length Huffman code + // + // HDIST + 1 code lengths for the distance alphabet, + // encoded using the code length Huffman code + // + // The code length repeat codes can cross from HLIT + 257 to the + // HDIST + 1 code lengths. In other words, all code lengths form + // a single sequence of HLIT + HDIST + 258 values. + private bool DecodeDynamicBlockHeader() + { + switch (_state) + { + case InflaterState.ReadingNumLitCodes: + _literalLengthCodeCount = _input.GetBits(5); + if (_literalLengthCodeCount < 0) + { + return false; + } + _literalLengthCodeCount += 257; + _state = InflaterState.ReadingNumDistCodes; + goto case InflaterState.ReadingNumDistCodes; + + case InflaterState.ReadingNumDistCodes: + _distanceCodeCount = _input.GetBits(5); + if (_distanceCodeCount < 0) + { + return false; + } + _distanceCodeCount += 1; + _state = InflaterState.ReadingNumCodeLengthCodes; + goto case InflaterState.ReadingNumCodeLengthCodes; + + case InflaterState.ReadingNumCodeLengthCodes: + _codeLengthCodeCount = _input.GetBits(4); + if (_codeLengthCodeCount < 0) + { + return false; + } + _codeLengthCodeCount += 4; + _loopCounter = 0; + _state = InflaterState.ReadingCodeLengthCodes; + goto case InflaterState.ReadingCodeLengthCodes; + + case InflaterState.ReadingCodeLengthCodes: + while (_loopCounter < _codeLengthCodeCount) + { + int bits = _input.GetBits(3); + if (bits < 0) + { + return false; + } + _codeLengthTreeCodeLength[s_codeOrder[_loopCounter]] = (byte)bits; + ++_loopCounter; + } + + for (int i = _codeLengthCodeCount; i < s_codeOrder.Length; i++) + { + _codeLengthTreeCodeLength[s_codeOrder[i]] = 0; + } + + // create huffman tree for code length + _codeLengthTree = new HuffmanTree(_codeLengthTreeCodeLength); + _codeArraySize = _literalLengthCodeCount + _distanceCodeCount; + _loopCounter = 0; // reset loop count + + _state = InflaterState.ReadingTreeCodesBefore; + goto case InflaterState.ReadingTreeCodesBefore; + + case InflaterState.ReadingTreeCodesBefore: + case InflaterState.ReadingTreeCodesAfter: + while (_loopCounter < _codeArraySize) + { + if (_state == InflaterState.ReadingTreeCodesBefore) + { + Debug.Assert(_codeLengthTree != null); + if ((_lengthCode = _codeLengthTree.GetNextSymbol(_input)) < 0) + { + return false; + } + } + + // The alphabet for code lengths is as follows: + // 0 - 15: Represent code lengths of 0 - 15 + // 16: Copy the previous code length 3 - 6 times. + // The next 2 bits indicate repeat length + // (0 = 3, ... , 3 = 6) + // Example: Codes 8, 16 (+2 bits 11), + // 16 (+2 bits 10) will expand to + // 12 code lengths of 8 (1 + 6 + 5) + // 17: Repeat a code length of 0 for 3 - 10 times. + // (3 bits of length) + // 18: Repeat a code length of 0 for 11 - 138 times + // (7 bits of length) + if (_lengthCode <= 15) + { + _codeList[_loopCounter++] = (byte)_lengthCode; + } + else + { + int repeatCount; + if (_lengthCode == 16) + { + if (!_input.EnsureBitsAvailable(2)) + { + _state = InflaterState.ReadingTreeCodesAfter; + return false; + } + + if (_loopCounter == 0) + { + // can't have "prev code" on first code + throw new InvalidDataException(); + } + + byte previousCode = _codeList[_loopCounter - 1]; + repeatCount = _input.GetBits(2) + 3; + + if (_loopCounter + repeatCount > _codeArraySize) + { + throw new InvalidDataException(); + } + + for (int j = 0; j < repeatCount; j++) + { + _codeList[_loopCounter++] = previousCode; + } + } + else if (_lengthCode == 17) + { + if (!_input.EnsureBitsAvailable(3)) + { + _state = InflaterState.ReadingTreeCodesAfter; + return false; + } + + repeatCount = _input.GetBits(3) + 3; + + if (_loopCounter + repeatCount > _codeArraySize) + { + throw new InvalidDataException(); + } + + for (int j = 0; j < repeatCount; j++) + { + _codeList[_loopCounter++] = 0; + } + } + else + { + // code == 18 + if (!_input.EnsureBitsAvailable(7)) + { + _state = InflaterState.ReadingTreeCodesAfter; + return false; + } + + repeatCount = _input.GetBits(7) + 11; + + if (_loopCounter + repeatCount > _codeArraySize) + { + throw new InvalidDataException(); + } + + for (int j = 0; j < repeatCount; j++) + { + _codeList[_loopCounter++] = 0; + } + } + } + _state = InflaterState.ReadingTreeCodesBefore; // we want to read the next code. + } + break; + + default: + Debug.Fail("check why we are here!"); + throw new InvalidDataException("UnknownState"); + } + + byte[] literalTreeCodeLength = new byte[HuffmanTree.MaxLiteralTreeElements]; + byte[] distanceTreeCodeLength = new byte[HuffmanTree.MaxDistTreeElements]; + + // Create literal and distance tables + Array.Copy(_codeList, 0, literalTreeCodeLength, 0, _literalLengthCodeCount); + Array.Copy(_codeList, _literalLengthCodeCount, distanceTreeCodeLength, 0, _distanceCodeCount); + + // Make sure there is an end-of-block code, otherwise how could we ever end? + if (literalTreeCodeLength[HuffmanTree.EndOfBlockCode] == 0) + { + throw new InvalidDataException(); + } + + _literalLengthTree = new HuffmanTree(literalTreeCodeLength); + _distanceTree = new HuffmanTree(distanceTreeCodeLength); + _state = InflaterState.DecodeTop; + return true; + } + + public void Dispose() { } + } + + // Do not rearrange the enum values. + internal enum InflaterState + { + ReadingHeader = 0, // Only applies to GZIP + ReadingBFinal = 2, // About to read bfinal bit + ReadingBType = 3, // About to read blockType bits + ReadingNumLitCodes = 4, // About to read # literal codes + ReadingNumDistCodes = 5, // About to read # dist codes + ReadingNumCodeLengthCodes = 6, // About to read # code length codes + ReadingCodeLengthCodes = 7, // In the middle of reading the code length codes + ReadingTreeCodesBefore = 8, // In the middle of reading tree codes (loop top) + ReadingTreeCodesAfter = 9, // In the middle of reading tree codes (extension; code > 15) + DecodeTop = 10, // About to decode a literal (char/match) in a compressed block + HaveInitialLength = 11, // Decoding a match, have the literal code (base length) + HaveFullLength = 12, // Ditto, now have the full match length (incl. extra length bits) + HaveDistCode = 13, // Ditto, now have the distance code also, need extra dist bits + + /* uncompressed blocks */ + UncompressedAligning = 15, + UncompressedByte1 = 16, + UncompressedByte2 = 17, + UncompressedByte3 = 18, + UncompressedByte4 = 19, + DecodingUncompressed = 20, + + // These three apply only to GZIP + StartReadingFooter = 21, // (Initialisation for reading footer) + ReadingFooter = 22, + VerifyingFooter = 23, + + Done = 24 // Finished + } + + internal enum BlockType + { + Uncompressed = 0, + Static = 1, + Dynamic = 2 + } + internal interface IFileFormatReader + { + bool ReadHeader(InputBuffer input); + bool ReadFooter(InputBuffer input); + void UpdateWithBytesRead(byte[] buffer, int offset, int bytesToCopy); + void Validate(); + } +} diff --git a/src/ICSharpCode.SharpZipLib/Zip/ZipEntry.cs b/src/ICSharpCode.SharpZipLib/Zip/ZipEntry.cs index b0bf15821..d32855b95 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/ZipEntry.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/ZipEntry.cs @@ -1085,6 +1085,7 @@ public object Clone() /// Returns true if the compression method is supported; false otherwise public static bool IsCompressionMethodSupported(CompressionMethod method) => method == CompressionMethod.Deflated + || method == CompressionMethod.Deflate64 || method == CompressionMethod.Stored || method == CompressionMethod.BZip2; diff --git a/src/ICSharpCode.SharpZipLib/Zip/ZipInputStream.cs b/src/ICSharpCode.SharpZipLib/Zip/ZipInputStream.cs index 4d258afc8..7821b25d4 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/ZipInputStream.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/ZipInputStream.cs @@ -5,6 +5,7 @@ using System; using System.Diagnostics; using System.IO; +using ICSharpCode.SharpZipLib.Zip.Deflate64; namespace ICSharpCode.SharpZipLib.Zip { @@ -73,6 +74,9 @@ public class ZipInputStream : InflaterInputStream private Crc32 crc = new Crc32(); private ZipEntry entry; + Deflate64Stream inputDeflate64Stream; + byte[] buffer; + private long size; private CompressionMethod method; private int flags; @@ -197,6 +201,12 @@ public ZipEntry GetNextEntry() CloseEntry(); } + int bufferSize = inputBuffer.RawData.Length; + //Resize the input buffer in order to read file information only and keep the correct position in the stream + //needed for forward-only stream support + //At first step is needed to read the header and after that the file info + inputBuffer.ResizeBuffer(ZipConstants.LocalHeaderBaseSize); + if (!SkipUntilNextEntry()) { Dispose(); @@ -216,6 +226,8 @@ public ZipEntry GetNextEntry() bool isCrypted = (flags & 1) == 1; + //Resize to read the file name and extra data if available + inputBuffer.ResizeBuffer(nameLen + extraLen); byte[] buffer = new byte[nameLen]; inputBuffer.ReadRawBuffer(buffer); @@ -223,6 +235,9 @@ public ZipEntry GetNextEntry() string name = entryEncoding.GetString(buffer); var unicode = entryEncoding.IsZipUnicode(); + //Back to the original size + inputBuffer.ResizeBuffer(bufferSize); + entry = new ZipEntry(name, versionRequiredToExtract, ZipConstants.VersionMadeBy, method, unicode) { Flags = flags, @@ -286,6 +301,12 @@ public ZipEntry GetNextEntry() { throw new ZipException("Stored, but compressed != uncompressed"); } + else if (method == CompressionMethod.Deflate64) + { + //All the needed information for decompression is gathered, no need to proceed + this.inputDeflate64Stream = null; + return entry; + } // Determine how to handle reading of data if this is attempted. if (IsEntryCompressionMethodSupported(entry)) @@ -425,6 +446,12 @@ public void CloseEntry() return; } + if (entry.CompressionMethod == CompressionMethod.Deflate64) + { + //There is no need of inputBuffer processing, all information is available; this would move the stream position + return; + } + if (method == CompressionMethod.Deflated) { if ((flags & 8) != 0) @@ -658,7 +685,18 @@ public override int Read(byte[] buffer, int offset, int count) throw new ArgumentException("Invalid offset/count combination"); } - return internalReader(buffer, offset, count); + if (entry.CompressionMethod == CompressionMethod.Deflate64) + { + if (inputDeflate64Stream == null) + { + inputDeflate64Stream = new Deflate64Stream(base.baseInputStream, entry.CompressedSize); + } + return inputDeflate64Stream.Read(buffer, 0, count); + } + else + { + return internalReader(buffer, offset, count); + } } ///