Skip to content

Commit b43c9b8

Browse files
committed
hpack - hunt down the bug in the huffman tables generating script
1 parent 6ec0a97 commit b43c9b8

File tree

2 files changed

+75
-8
lines changed

2 files changed

+75
-8
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
target/
22
**/*.rs.bk
33
Cargo.lock
4+
.vscode

scripts/huffman_gen.py

+74-8
Original file line numberDiff line numberDiff line change
@@ -343,10 +343,11 @@ def decode(huffman_string, huffman_tree):
343343
# The number of fast lookup tables is 1 less than the number of symbols in the given alphabet (TABLE).
344344
# This is the same as the number of interior nodes in the huffman tree plus the root node.
345345
def make_fast_tables(huffman_table, huffman_tree):
346-
tables = []
346+
# store a list of pairs, table headers (remainder) and bodies (the computed table)
347+
table_columns = []
347348

348349
null_table = _make_fast_table(huffman_table, huffman_tree, (0, 0))
349-
tables.append(null_table)
350+
table_columns.append(((0, 0), null_table))
350351

351352
remainders = set(map(lambda row: row[2], null_table))
352353
finished_remainders = set((0, 0))
@@ -355,15 +356,38 @@ def make_fast_tables(huffman_table, huffman_tree):
355356
remainder = (remainders - finished_remainders).pop()
356357

357358
table = _make_fast_table(huffman_table, huffman_tree, remainder)
358-
tables.append(table)
359+
table_columns.append((remainder, table))
359360

360361
# add any new remainders to the remainders set
361362
remainders |= set(map(lambda row: row[2], table))
362363

363364
# add the remainder which was just processed to the finished set
364365
finished_remainders.add(remainder)
365366

366-
return tables
367+
# fetch all remainders (column headers) from the table columns, and make sure they are unique.
368+
# then convert them back to a list so they are indexed.
369+
all_remainders = list(set(map(lambda column: column[0], table_columns)))
370+
371+
check = list(map(lambda column: column[0], table_columns))
372+
cdict = {}
373+
for c in check:
374+
if c in cdict:
375+
cdict[c] += 1
376+
else:
377+
cdict[c] = 1
378+
379+
cdict = {key: value for key, value in cdict.items() if value > 1}
380+
381+
# well there's the problem, (0, 0) is captured twice, work out why and fix the loop above.
382+
print(cdict)
383+
384+
385+
print("there are", len(all_remainders), "remainders")
386+
387+
# TODO map indexes onto the table bodies in place of the remainders
388+
# and remove the first value in the tuple, it's not needed for lookup if the whole table is flattened to an array.
389+
390+
return table_columns
367391

368392

369393
# remainder is a 2-tuple with a remainder value from a previous table and the number of bits in the value.
@@ -372,45 +396,60 @@ def _make_fast_table(table, huffman_tree, remainder):
372396

373397
working_root_node = huffman_tree
374398

399+
print("processing table for remainder", remainder)
400+
401+
remainder_path = []
375402
# move to the node in the tree indicated by the remainder
376403
for bit_number in range(remainder[1] - 1, -1, -1):
377404
if remainder[0] & (1 << bit_number) == 1 << bit_number:
378405
working_root_node = working_root_node.right
406+
remainder_path.append(1)
379407
else:
380408
working_root_node = working_root_node.left
409+
remainder_path.append(0)
381410

382411
# If the bit pattern in the remainder can be decoded to a symbol then it should have happened
383412
# in a previous table, so this is an error.
384413
if working_root_node.val != None:
385414
print(remainder)
386415
raise Exception("Unexpected value while processing remainder for fast table")
387416

417+
print("using remainder path", remainder_path)
418+
388419
for i in range(0, 256):
389420
emit = ""
390421

391422
working_node = working_root_node
392423
last_bit_number = 8
424+
425+
path = remainder_path.copy()
393426

394427
for bit_number in range(7, -1, -1):
395428
if i & (1 << bit_number) == 1 << bit_number:
396429
working_node = working_node.right
430+
path.append(1)
397431
else:
398432
working_node = working_node.left
433+
path.append(0)
399434

400435
if working_node.val != None:
401436
emit += chr(working_node.val)
402437
working_node = huffman_tree
403438
last_bit_number = bit_number
439+
path = []
404440

405441
key = (remainder[0] << 8) + i
406442
rem_value = i & ((1 << last_bit_number) - 1)
407443

444+
# the remainder value and length need not point to a table (i.e. be between 0 and 255 in this case)
445+
# it is just a bit pattern, to which we should assign an index, and later map that index into each table.
446+
447+
# we have not emitted anything, which means that the remainder and this current bit pattern yielded nothing
448+
# the bit pattern in the remainder should therefore include the previous remainder
408449
if not emit:
409-
# we haven't been able to find any matches on the remainder followed by this bit pattern
410-
# so we forward the remainder with this bit pattern as a new remainder
411450
rem_value += remainder[0] << 8
412451
last_bit_number += remainder[1]
413-
452+
414453
output.append((key, emit, (rem_value, last_bit_number)))
415454

416455
return output
@@ -420,6 +459,32 @@ def fast_decode():
420459
pass
421460

422461

462+
from collections import deque
463+
def list_internal_node_paths(huffman_tree):
464+
internal_node_paths = []
465+
466+
# do a breadth first traversal of the huffman tree
467+
queue = deque()
468+
queue.append((huffman_tree, []))
469+
while queue:
470+
(node, path) = queue.popleft()
471+
472+
if node.val == None:
473+
internal_node_paths.append(path)
474+
475+
if node.left != None:
476+
new_path = path.copy()
477+
new_path.append(0)
478+
queue.append((node.left, new_path))
479+
480+
if node.right != None:
481+
new_path = path.copy()
482+
new_path.append(1)
483+
queue.append((node.right, new_path))
484+
485+
return internal_node_paths
486+
487+
423488
def print_fast_table(table):
424489
for row in table:
425490
print("{:<12b} | {:<3} | {},{}".format(row[0], row[1], row[2][0], row[2][1]))
@@ -438,7 +503,8 @@ def print_fast_table(table):
438503
print("Decoder is not okay")
439504

440505
fast_tables = make_fast_tables(TABLE, huffman_tree)
441-
#assert 256 == len(fast_tables)
506+
print(len(fast_tables))
507+
assert 256 == len(fast_tables)
442508

443509
print_fast_table(fast_tables[1])
444510

0 commit comments

Comments
 (0)