-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHuffman.py
212 lines (176 loc) · 6.28 KB
/
Huffman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
'''
Author : Chinmay Kale
ID : ck1239
Version : 1
Revision : 1
'''
import heapq
import math
import time
#Node class for building Huffman tree.
class Node:
__slots__ = 'value','count','left','right'
def __init__(self,value,count=1):
self.left = None
self.right = None
self.value = value
self.count = count
def __lt__(self, node):
return (self.count < node.count)
def __gt__(self, node):
return (self.count > node.count)
def __str__(self):
return self.value+' : '+str(self.count)
#Get Input from text
def get_text():
with open('text.txt','r') as input_file:
input_text = input_file.read()
#print(input_text)
return input_text
#Get frequency of characters in input text
def get_frequency(input_text):
char_dict = {}
for char in input_text:
if char not in char_dict.keys():
char_dict[char] = 1
else:
char_dict[char] += 1
#print_dict(char_dict)
return char_dict
#Print a Dictionary
def print_dict(input_dict):
for key in input_dict.keys():
print('Key : ',key,', Value : ',input_dict[key])
def huffman(char_dict,encoding_dict):
priority_queue = []
heapq.heapify(priority_queue)
for key in char_dict.keys():
node = Node(key,char_dict[key])
heapq.heappush(priority_queue, node)
while len(priority_queue)>1:
node1 = heapq.heappop(priority_queue)
node2 = heapq.heappop(priority_queue)
new_node = Node('Intermediate',node1.count+node2.count)
new_node.left = node1
new_node.right = node2
heapq.heappush(priority_queue, new_node)
huffman_code_traversal(priority_queue[0],encoding_dict)
#Traverse helper function
def huffman_code_traversal(root,encoding_dict):
inorder(root,'',encoding_dict)
#Traverse huffman tree to get Huffman code for every character
def inorder(node,code,encoding_dict):
if node.left==None and node.right == None:
encoding_dict[node.value] = code
return
inorder(node.left, code+'0',encoding_dict)
inorder(node.right, code+'1',encoding_dict)
#Encode the text file
def encode(input_text, encoding_dict):
encoded_text = ''
for char in input_text:
if char not in encoding_dict.keys():
print('Char not available in encoding_dict: ')
return
encoded_text += encoding_dict[char]
encoded_text_len = len(encoded_text)
input_text_len = len(input_text)*8
compression_ratio = input_text_len/encoded_text_len
'''
print('Length of encoded text is :',encoded_text_len)
print('Length of input text is :',input_text_len)
print('Compression ratio',round(compression_ratio,2))
'''
return encoded_text, encoded_text_len
#Decode the text file
def decode(encoded_text, encoding_dict):
decoded_text = ''
while encoded_text != '':
for key in encoding_dict.keys():
if encoded_text.startswith(encoding_dict[key]):
decoded_text += key
encoded_text = encoded_text.replace(encoding_dict[key],'',1)
return decoded_text
#Traditional Huffman code.
def traditional_huffman(input_text):
char_dict = get_frequency(input_text)
encoding_dict = {}
huffman(char_dict, encoding_dict)
#print_dict(encoding_dict)
encoded_text, encoded_text_len = encode(input_text, encoding_dict)
decoded_text = decode(encoded_text, encoding_dict)
input_text_len = len(input_text)*8
compression_ratio = input_text_len/encoded_text_len
print('Length of encoded text is :',encoded_text_len)
print('Length of input text is :',input_text_len)
print('Compression ratio',round(compression_ratio,2))
if input_text == decoded_text:
print('Decoded text is correct!!')
else:
print('Decoded text doesnt match the input text')
#Create different blocks of input text.
def enblock(input_text):
block_count = 10
#thresh_hold = 2000
input_text_block = []
input_length = len(input_text)
print('Characters : ',input_length)
print('Total input length : ',input_length*8)
#block_count = math.ceil(input_length/thresh_hold)
thresh_hold = math.ceil(input_length/block_count)
print('block_count : ',block_count)
i = 0
while i < block_count:
base_index = i*thresh_hold
if i==block_count-1:
input_text_block.append(input_text[base_index:])
else:
input_text_block.append(input_text[base_index : base_index+thresh_hold])
#print(input_text_block[i])
i = i+1
return input_text_block
#Modified Huffman code
def block_huffman(input_text):
total_encoded_text_len = 0
input_text_block = enblock(input_text)
input_text_block_length = len(input_text_block)
for i in range(input_text_block_length):
encoding_dict = {}
#print('------->')
#print('Block : ',i+1)
char_dict = get_frequency(input_text_block[i])
huffman(char_dict, encoding_dict)
encoded_text, encoded_text_len = encode(input_text_block[i], encoding_dict)
total_encoded_text_len += encoded_text_len
decoded_text = decode(encoded_text, encoding_dict)
input_text_block_len = len(input_text_block[i])*8
'''
if input_text_block[i] == decoded_text:
print('Decoded text is correct!!')
else:
print(input_text_block[i],' : ',decoded_text)
print('Decoded text doesnt match the input text')
'''
#print('<-------')
input_text_len = len(input_text)*8
print('total_encoded_text_len : ',total_encoded_text_len)
print('Total Length of input text is :',input_text_len)
compression_ratio = input_text_len/total_encoded_text_len
print('Total Compression ratio',round(compression_ratio,2))
#Main Function.
def main():
input_text = get_text()
start = time.time()
print('Traditional Huffman Code stats: ')
traditional_huffman(input_text)
end = time.time()
exec_time = round(end - start, 2)
print('Traditional Huffman Code : ', exec_time)
start = time.time()
print('\nImproved Huffman Code(Block Huffman) stats: ')
block_huffman(input_text)
end = time.time()
exec_time = round(end - start, 2)
print('Improved Huffman Code : ', exec_time)
if __name__=='__main__':
main()