Skip to content

Commit

Permalink
deflate_compress: optimize outputting the match lengths
Browse files Browse the repository at this point in the history
  • Loading branch information
ebiggers committed Jun 12, 2022
1 parent f3ebd43 commit fe903f6
Showing 1 changed file with 99 additions and 71 deletions.
170 changes: 99 additions & 71 deletions lib/deflate_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,17 +393,11 @@ struct deflate_sequence {
*/
u16 offset;

/*
* If 'length' doesn't indicate end-of-block, then this is the length
* slot of the match which follows the literals.
*/
u8 length_slot;

/*
* If 'length' doesn't indicate end-of-block, then this is the offset
* slot of the match which follows the literals.
*/
u8 offset_slot;
u16 offset_slot;
};

#if SUPPORT_NEAR_OPTIMAL_PARSING
Expand Down Expand Up @@ -514,16 +508,29 @@ struct libdeflate_compressor {
/* The static Huffman codes defined by the DEFLATE format */
struct deflate_codes static_codes;

/* Temporary space for Huffman code output */
u32 precode_freqs[DEFLATE_NUM_PRECODE_SYMS];
u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS];
u32 precode_codewords[DEFLATE_NUM_PRECODE_SYMS];
unsigned precode_items[DEFLATE_NUM_LITLEN_SYMS +
DEFLATE_NUM_OFFSET_SYMS];
unsigned num_litlen_syms;
unsigned num_offset_syms;
unsigned num_explicit_lens;
unsigned num_precode_items;
/* Temporary space for block flushing */
union {
/* Information about the precode */
struct {
u32 freqs[DEFLATE_NUM_PRECODE_SYMS];
u32 codewords[DEFLATE_NUM_PRECODE_SYMS];
u8 lens[DEFLATE_NUM_PRECODE_SYMS];
unsigned items[DEFLATE_NUM_LITLEN_SYMS +
DEFLATE_NUM_OFFSET_SYMS];
unsigned num_litlen_syms;
unsigned num_offset_syms;
unsigned num_explicit_lens;
unsigned num_items;
} precode;
/*
* The "full" length codewords. Used only after the information
* in 'precode' is no longer needed.
*/
struct {
u32 codewords[DEFLATE_MAX_MATCH_LEN + 1];
u8 lens[DEFLATE_MAX_MATCH_LEN + 1];
} length;
} o;

union {
/* Data for greedy or lazy parsing */
Expand Down Expand Up @@ -1532,16 +1539,16 @@ deflate_precompute_huffman_header(struct libdeflate_compressor *c)
{
/* Compute how many litlen and offset symbols are needed. */

for (c->num_litlen_syms = DEFLATE_NUM_LITLEN_SYMS;
c->num_litlen_syms > 257;
c->num_litlen_syms--)
if (c->codes.lens.litlen[c->num_litlen_syms - 1] != 0)
for (c->o.precode.num_litlen_syms = DEFLATE_NUM_LITLEN_SYMS;
c->o.precode.num_litlen_syms > 257;
c->o.precode.num_litlen_syms--)
if (c->codes.lens.litlen[c->o.precode.num_litlen_syms - 1] != 0)
break;

for (c->num_offset_syms = DEFLATE_NUM_OFFSET_SYMS;
c->num_offset_syms > 1;
c->num_offset_syms--)
if (c->codes.lens.offset[c->num_offset_syms - 1] != 0)
for (c->o.precode.num_offset_syms = DEFLATE_NUM_OFFSET_SYMS;
c->o.precode.num_offset_syms > 1;
c->o.precode.num_offset_syms--)
if (c->codes.lens.offset[c->o.precode.num_offset_syms - 1] != 0)
break;

/*
Expand All @@ -1551,64 +1558,86 @@ deflate_precompute_huffman_header(struct libdeflate_compressor *c)
*/
STATIC_ASSERT(offsetof(struct deflate_lens, offset) ==
DEFLATE_NUM_LITLEN_SYMS);
if (c->num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) {
memmove((u8 *)&c->codes.lens + c->num_litlen_syms,
if (c->o.precode.num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) {
memmove((u8 *)&c->codes.lens + c->o.precode.num_litlen_syms,
(u8 *)&c->codes.lens + DEFLATE_NUM_LITLEN_SYMS,
c->num_offset_syms);
c->o.precode.num_offset_syms);
}

/*
* Compute the "items" (RLE / literal tokens and extra bits) with which
* the codeword lengths in the larger code will be output.
*/
c->num_precode_items =
c->o.precode.num_items =
deflate_compute_precode_items((u8 *)&c->codes.lens,
c->num_litlen_syms +
c->num_offset_syms,
c->precode_freqs,
c->precode_items);
c->o.precode.num_litlen_syms +
c->o.precode.num_offset_syms,
c->o.precode.freqs,
c->o.precode.items);

/* Build the precode. */
deflate_make_huffman_code(DEFLATE_NUM_PRECODE_SYMS,
MAX_PRE_CODEWORD_LEN,
c->precode_freqs, c->precode_lens,
c->precode_codewords);
c->o.precode.freqs, c->o.precode.lens,
c->o.precode.codewords);

/* Count how many precode lengths we actually need to output. */
for (c->num_explicit_lens = DEFLATE_NUM_PRECODE_SYMS;
c->num_explicit_lens > 4;
c->num_explicit_lens--)
if (c->precode_lens[deflate_precode_lens_permutation[
c->num_explicit_lens - 1]] != 0)
for (c->o.precode.num_explicit_lens = DEFLATE_NUM_PRECODE_SYMS;
c->o.precode.num_explicit_lens > 4;
c->o.precode.num_explicit_lens--)
if (c->o.precode.lens[deflate_precode_lens_permutation[
c->o.precode.num_explicit_lens - 1]] != 0)
break;

/* Restore the offset codeword lengths if needed. */
if (c->num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) {
if (c->o.precode.num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) {
memmove((u8 *)&c->codes.lens + DEFLATE_NUM_LITLEN_SYMS,
(u8 *)&c->codes.lens + c->num_litlen_syms,
c->num_offset_syms);
(u8 *)&c->codes.lens + c->o.precode.num_litlen_syms,
c->o.precode.num_offset_syms);
}
}

/*
* To make it faster to output matches, compute the "full" match length
* codewords, i.e. the concatenation of the litlen codeword and the extra bits
* for each possible match length.
*/
static void
deflate_compute_full_len_codewords(struct libdeflate_compressor *c,
const struct deflate_codes *codes)
{
unsigned len;

STATIC_ASSERT(MAX_LITLEN_CODEWORD_LEN +
DEFLATE_MAX_EXTRA_LENGTH_BITS <= 32);

for (len = DEFLATE_MIN_MATCH_LEN; len <= DEFLATE_MAX_MATCH_LEN; len++) {
unsigned slot = deflate_length_slot[len];
unsigned litlen_sym = DEFLATE_FIRST_LEN_SYM + slot;
u32 extra_bits = len - deflate_length_slot_base[slot];

c->o.length.codewords[len] =
codes->codewords.litlen[litlen_sym] |
(extra_bits << codes->lens.litlen[litlen_sym]);
c->o.length.lens[len] = codes->lens.litlen[litlen_sym] +
deflate_extra_length_bits[slot];
}
}

/* Write a match to the output buffer. */
#define WRITE_MATCH(codes_, length_, length_slot_, offset_, offset_slot_) \
#define WRITE_MATCH(c_, codes_, length_, offset_, offset_slot_) \
do { \
const struct libdeflate_compressor *c__ = (c_); \
const struct deflate_codes *codes__ = (codes_); \
unsigned length__ = (length_); \
unsigned length_slot__ = (length_slot_); \
unsigned offset__ = (offset_); \
unsigned offset_slot__ = (offset_slot_); \
unsigned litlen_symbol__ = DEFLATE_FIRST_LEN_SYM + length_slot__; \
\
/* Litlen symbol */ \
ADD_BITS(codes__->codewords.litlen[litlen_symbol__], \
codes__->lens.litlen[litlen_symbol__]); \
\
/* Extra length bits */ \
/* Litlen symbol and extra length bits */ \
STATIC_ASSERT(CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + \
DEFLATE_MAX_EXTRA_LENGTH_BITS)); \
ADD_BITS(length__ - deflate_length_slot_base[length_slot__], \
deflate_extra_length_bits[length_slot__]); \
ADD_BITS(c__->o.length.codewords[length__], \
c__->o.length.lens[length__]); \
\
if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + \
DEFLATE_MAX_EXTRA_LENGTH_BITS + \
Expand Down Expand Up @@ -1683,12 +1712,12 @@ deflate_flush_block(struct libdeflate_compressor *c,
deflate_precompute_huffman_header(c);

/* Account for the cost of encoding dynamic Huffman codes. */
dynamic_cost += 5 + 5 + 4 + (3 * c->num_explicit_lens);
dynamic_cost += 5 + 5 + 4 + (3 * c->o.precode.num_explicit_lens);
for (sym = 0; sym < DEFLATE_NUM_PRECODE_SYMS; sym++) {
u32 extra = deflate_extra_precode_bits[sym];

dynamic_cost += c->precode_freqs[sym] *
(extra + c->precode_lens[sym]);
dynamic_cost += c->o.precode.freqs[sym] *
(extra + c->o.precode.lens[sym]);
}

/* Account for the cost of encoding literals. */
Expand Down Expand Up @@ -1738,8 +1767,8 @@ deflate_flush_block(struct libdeflate_compressor *c,
/* Choose and output the cheapest type of block. */
best_cost = MIN(static_cost, uncompressed_cost);
if (dynamic_cost < best_cost) {
const unsigned num_explicit_lens = c->num_explicit_lens;
const unsigned num_precode_items = c->num_precode_items;
const unsigned num_explicit_lens = c->o.precode.num_explicit_lens;
const unsigned num_precode_items = c->o.precode.num_items;
unsigned precode_sym, precode_item;
unsigned i;

Expand All @@ -1750,8 +1779,8 @@ deflate_flush_block(struct libdeflate_compressor *c,
STATIC_ASSERT(CAN_BUFFER(1 + 2 + 5 + 5 + 4 + 3));
ADD_BITS(is_final_block, 1);
ADD_BITS(DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN, 2);
ADD_BITS(c->num_litlen_syms - 257, 5);
ADD_BITS(c->num_offset_syms - 1, 5);
ADD_BITS(c->o.precode.num_litlen_syms - 257, 5);
ADD_BITS(c->o.precode.num_offset_syms - 1, 5);
ADD_BITS(num_explicit_lens - 4, 4);

/* Output the lengths of the codewords in the precode. */
Expand All @@ -1762,13 +1791,13 @@ deflate_flush_block(struct libdeflate_compressor *c,
* flushes we merge one len with the previous fields.
*/
precode_sym = deflate_precode_lens_permutation[0];
ADD_BITS(c->precode_lens[precode_sym], 3);
ADD_BITS(c->o.precode.lens[precode_sym], 3);
FLUSH_BITS();
i = 1; /* num_explicit_lens >= 4 */
do {
precode_sym =
deflate_precode_lens_permutation[i];
ADD_BITS(c->precode_lens[precode_sym], 3);
ADD_BITS(c->o.precode.lens[precode_sym], 3);
} while (++i < num_explicit_lens);
FLUSH_BITS();
} else {
Expand All @@ -1777,7 +1806,7 @@ deflate_flush_block(struct libdeflate_compressor *c,
do {
precode_sym =
deflate_precode_lens_permutation[i];
ADD_BITS(c->precode_lens[precode_sym], 3);
ADD_BITS(c->o.precode.lens[precode_sym], 3);
FLUSH_BITS();
} while (++i < num_explicit_lens);
}
Expand All @@ -1788,11 +1817,11 @@ deflate_flush_block(struct libdeflate_compressor *c,
*/
i = 0;
do {
precode_item = c->precode_items[i];
precode_item = c->o.precode.items[i];
precode_sym = precode_item & 0x1F;
STATIC_ASSERT(CAN_BUFFER(MAX_PRE_CODEWORD_LEN + 7));
ADD_BITS(c->precode_codewords[precode_sym],
c->precode_lens[precode_sym]);
ADD_BITS(c->o.precode.codewords[precode_sym],
c->o.precode.lens[precode_sym]);
ADD_BITS(precode_item >> 5,
deflate_extra_precode_bits[precode_sym]);
FLUSH_BITS();
Expand Down Expand Up @@ -1849,6 +1878,7 @@ deflate_flush_block(struct libdeflate_compressor *c,

/* Output the literals and matches for a dynamic or static block. */
ASSERT(bitcount <= 7);
deflate_compute_full_len_codewords(c, codes);
#if SUPPORT_NEAR_OPTIMAL_PARSING
if (sequences == NULL) {
/* Output the literals and matches from the minimum-cost path */
Expand All @@ -1867,8 +1897,7 @@ deflate_flush_block(struct libdeflate_compressor *c,
FLUSH_BITS();
} else {
/* Match */
WRITE_MATCH(codes, length,
deflate_length_slot[length], offset,
WRITE_MATCH(c, codes, length, offset,
c->p.n.offset_slot_full[offset]);
}
cur_node += length;
Expand Down Expand Up @@ -1934,8 +1963,8 @@ deflate_flush_block(struct libdeflate_compressor *c,
}

/* Output a match. */
WRITE_MATCH(codes, length, seq->length_slot,
seq->offset, seq->offset_slot);
WRITE_MATCH(c, codes, length, seq->offset,
seq->offset_slot);
in_next += length;
}
}
Expand Down Expand Up @@ -2166,7 +2195,6 @@ deflate_choose_match(struct libdeflate_compressor *c,

seq->litrunlen_and_length |= (u32)length << SEQ_LENGTH_SHIFT;
seq->offset = offset;
seq->length_slot = length_slot;
seq->offset_slot = offset_slot;

seq++;
Expand Down

0 comments on commit fe903f6

Please sign in to comment.