Skip to content

Commit 091b3c4

Browse files
foldstersButt4cak3
authored andcommitted
Add Python Implementation of Huffman Encoding (#98)
1 parent b182f3c commit 091b3c4

File tree

1 file changed

+118
-0
lines changed
  • chapters/data_compression/huffman/code/python

1 file changed

+118
-0
lines changed
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# Huffman Encoding
2+
# Python 2.7+
3+
# Submitted by Matthew Giallourakis
4+
5+
from collections import Counter
6+
7+
# constructs the tree
8+
def build_huffman_tree(message):
9+
10+
# get sorted list of character and frequency pairs
11+
frequencies = Counter(message)
12+
trees = frequencies.most_common()
13+
14+
# while there is more than one tree
15+
while len(trees) > 1:
16+
17+
# pop off the two trees of least weight from the trees list
18+
tree_left,weight_left = trees.pop()
19+
tree_right,weight_right = trees.pop()
20+
21+
# combine the nodes and add back to the nodes list
22+
new_tree = [tree_left, tree_right]
23+
new_weight = weight_left + weight_right
24+
25+
# find the first tree that has a weight smaller than new_weight and returns its index in the list
26+
# If no such tree can be found, use len(trees) instead to append
27+
index = next((i for i, tree in enumerate(trees) if tree[1] < new_weight), len(trees))
28+
29+
# insert the new tree there
30+
trees.insert(index, (new_tree, new_weight))
31+
32+
huffman_tree = trees[0][0]
33+
return huffman_tree
34+
35+
# constructs the mapping with recursion
36+
def build_codebook(tree, code=''):
37+
38+
codebook = []
39+
40+
# split the tree
41+
left_tree, right_tree = tree
42+
43+
# if the left node has children, find the mapping of those children
44+
# else pair the character with the current code + 0
45+
if type(left_tree) is list:
46+
codebook += build_codebook(left_tree, code+'0')
47+
else:
48+
codebook.append((left_tree, code+'0'))
49+
50+
# if the right node has children, find the mapping of those children
51+
# else pair the character with the current code + 1
52+
if type(right_tree) is list:
53+
codebook += build_codebook(right_tree, code+'1')
54+
else:
55+
codebook.append((right_tree, code+'1'))
56+
return codebook
57+
58+
# encodes the message
59+
def huffman_encode(codebook, message):
60+
61+
encoded_message = ''
62+
63+
# build a char -> code dictionary
64+
forward_dict = dict(codebook)
65+
66+
# replace each character with its code
67+
for char in message:
68+
encoded_message += forward_dict[char]
69+
70+
return encoded_message
71+
72+
# decodes a message
73+
def huffman_decode(codebook, encoded_message):
74+
75+
decoded_message = ''
76+
key = ''
77+
78+
# build a code -> char dictionary
79+
inverse_dict = dict([(v, k) for k, v in codebook])
80+
81+
# for each bit in the encoding
82+
# if the bit is in the dictionary, replace the bit with the paired character
83+
# else look at the bit and the following bits together until a match occurs
84+
# move to the next bit not yet looked at
85+
for index, bit in enumerate(encoded_message):
86+
key += bit
87+
if key in inverse_dict:
88+
decoded_message += inverse_dict[key]
89+
key = ''
90+
91+
return decoded_message
92+
93+
def main():
94+
95+
# test example
96+
message = 'bibbity_bobbity'
97+
tree = build_huffman_tree(message)
98+
codebook = build_codebook(tree)
99+
encoded_message = huffman_encode(codebook, message)
100+
decoded_message = huffman_decode(codebook, encoded_message)
101+
102+
print('message: ' + message)
103+
print('huffman tree: ' + str(tree))
104+
print('codebook: ' + str(codebook))
105+
print('encoded message: ' + encoded_message)
106+
print('decoded message: ' + decoded_message)
107+
108+
# prints the following:
109+
#
110+
# message: bibbity_bobbity
111+
# huffman_tree: ['b', [[['_', 'o'], 'y'], ['t', 'i']]]
112+
# codebook: [('b', '0'), ('_', '1000'), ('o', '1001'),
113+
# ('y', '101'), ('t', '110'), ('i', '111')]
114+
# encoded_message: 01110011111010110000100100111110101
115+
# decoded_message: bibbity_bobbity
116+
117+
if __name__ == '__main__':
118+
main()

0 commit comments

Comments
 (0)