@@ -343,10 +343,11 @@ def decode(huffman_string, huffman_tree):
343
343
# The number of fast lookup tables is 1 less than the number of symbols in the given alphabet (TABLE).
344
344
# This is the same as the number of interior nodes in the huffman tree plus the root node.
345
345
def make_fast_tables (huffman_table , huffman_tree ):
346
- tables = []
346
+ # store a list of pairs, table headers (remainder) and bodies (the computed table)
347
+ table_columns = []
347
348
348
349
null_table = _make_fast_table (huffman_table , huffman_tree , (0 , 0 ))
349
- tables .append (null_table )
350
+ table_columns .append ((( 0 , 0 ), null_table ) )
350
351
351
352
remainders = set (map (lambda row : row [2 ], null_table ))
352
353
finished_remainders = set ((0 , 0 ))
@@ -355,15 +356,38 @@ def make_fast_tables(huffman_table, huffman_tree):
355
356
remainder = (remainders - finished_remainders ).pop ()
356
357
357
358
table = _make_fast_table (huffman_table , huffman_tree , remainder )
358
- tables .append (table )
359
+ table_columns .append (( remainder , table ) )
359
360
360
361
# add any new remainders to the remainders set
361
362
remainders |= set (map (lambda row : row [2 ], table ))
362
363
363
364
# add the remainder which was just processed to the finished set
364
365
finished_remainders .add (remainder )
365
366
366
- return tables
367
+ # fetch all remainders (column headers) from the table columns, and make sure they are unique.
368
+ # then convert them back to a list so they are indexed.
369
+ all_remainders = list (set (map (lambda column : column [0 ], table_columns )))
370
+
371
+ check = list (map (lambda column : column [0 ], table_columns ))
372
+ cdict = {}
373
+ for c in check :
374
+ if c in cdict :
375
+ cdict [c ] += 1
376
+ else :
377
+ cdict [c ] = 1
378
+
379
+ cdict = {key : value for key , value in cdict .items () if value > 1 }
380
+
381
+ # well there's the problem, (0, 0) is captured twice, work out why and fix the loop above.
382
+ print (cdict )
383
+
384
+
385
+ print ("there are" , len (all_remainders ), "remainders" )
386
+
387
+ # TODO map indexes onto the table bodies in place of the remainders
388
+ # and remove the first value in the tuple, it's not needed for lookup if the whole table is flattened to an array.
389
+
390
+ return table_columns
367
391
368
392
369
393
# remainder is a 2-tuple with a remainder value from a previous table and the number of bits in the value.
@@ -372,45 +396,60 @@ def _make_fast_table(table, huffman_tree, remainder):
372
396
373
397
working_root_node = huffman_tree
374
398
399
+ print ("processing table for remainder" , remainder )
400
+
401
+ remainder_path = []
375
402
# move to the node in the tree indicated by the remainder
376
403
for bit_number in range (remainder [1 ] - 1 , - 1 , - 1 ):
377
404
if remainder [0 ] & (1 << bit_number ) == 1 << bit_number :
378
405
working_root_node = working_root_node .right
406
+ remainder_path .append (1 )
379
407
else :
380
408
working_root_node = working_root_node .left
409
+ remainder_path .append (0 )
381
410
382
411
# If the bit pattern in the remainder can be decoded to a symbol then it should have happened
383
412
# in a previous table, so this is an error.
384
413
if working_root_node .val != None :
385
414
print (remainder )
386
415
raise Exception ("Unexpected value while processing remainder for fast table" )
387
416
417
+ print ("using remainder path" , remainder_path )
418
+
388
419
for i in range (0 , 256 ):
389
420
emit = ""
390
421
391
422
working_node = working_root_node
392
423
last_bit_number = 8
424
+
425
+ path = remainder_path .copy ()
393
426
394
427
for bit_number in range (7 , - 1 , - 1 ):
395
428
if i & (1 << bit_number ) == 1 << bit_number :
396
429
working_node = working_node .right
430
+ path .append (1 )
397
431
else :
398
432
working_node = working_node .left
433
+ path .append (0 )
399
434
400
435
if working_node .val != None :
401
436
emit += chr (working_node .val )
402
437
working_node = huffman_tree
403
438
last_bit_number = bit_number
439
+ path = []
404
440
405
441
key = (remainder [0 ] << 8 ) + i
406
442
rem_value = i & ((1 << last_bit_number ) - 1 )
407
443
444
+ # the remainder value and length need not point to a table (i.e. be between 0 and 255 in this case)
445
+ # it is just a bit pattern, to which we should assign an index, and later map that index into each table.
446
+
447
+ # we have not emitted anything, which means that the remainder and this current bit pattern yielded nothing
448
+ # the bit pattern in the remainder should therefore include the previous remainder
408
449
if not emit :
409
- # we haven't been able to find any matches on the remainder followed by this bit pattern
410
- # so we forward the remainder with this bit pattern as a new remainder
411
450
rem_value += remainder [0 ] << 8
412
451
last_bit_number += remainder [1 ]
413
-
452
+
414
453
output .append ((key , emit , (rem_value , last_bit_number )))
415
454
416
455
return output
@@ -420,6 +459,32 @@ def fast_decode():
420
459
pass
421
460
422
461
462
+ from collections import deque
463
+ def list_internal_node_paths (huffman_tree ):
464
+ internal_node_paths = []
465
+
466
+ # do a breadth first traversal of the huffman tree
467
+ queue = deque ()
468
+ queue .append ((huffman_tree , []))
469
+ while queue :
470
+ (node , path ) = queue .popleft ()
471
+
472
+ if node .val == None :
473
+ internal_node_paths .append (path )
474
+
475
+ if node .left != None :
476
+ new_path = path .copy ()
477
+ new_path .append (0 )
478
+ queue .append ((node .left , new_path ))
479
+
480
+ if node .right != None :
481
+ new_path = path .copy ()
482
+ new_path .append (1 )
483
+ queue .append ((node .right , new_path ))
484
+
485
+ return internal_node_paths
486
+
487
+
423
488
def print_fast_table (table ):
424
489
for row in table :
425
490
print ("{:<12b} | {:<3} | {},{}" .format (row [0 ], row [1 ], row [2 ][0 ], row [2 ][1 ]))
@@ -438,7 +503,8 @@ def print_fast_table(table):
438
503
print ("Decoder is not okay" )
439
504
440
505
fast_tables = make_fast_tables (TABLE , huffman_tree )
441
- #assert 256 == len(fast_tables)
506
+ print (len (fast_tables ))
507
+ assert 256 == len (fast_tables )
442
508
443
509
print_fast_table (fast_tables [1 ])
444
510
0 commit comments