|
| 1 | +# Huffman Encoding |
| 2 | +# Python 2.7+ |
| 3 | +# Submitted by Matthew Giallourakis |
| 4 | + |
| 5 | +from collections import Counter |
| 6 | + |
| 7 | +# constructs the tree |
| 8 | +def build_huffman_tree(message): |
| 9 | + |
| 10 | + # get sorted list of character and frequency pairs |
| 11 | + frequencies = Counter(message) |
| 12 | + trees = frequencies.most_common() |
| 13 | + |
| 14 | + # while there is more than one tree |
| 15 | + while len(trees) > 1: |
| 16 | + |
| 17 | + # pop off the two trees of least weight from the trees list |
| 18 | + tree_left,weight_left = trees.pop() |
| 19 | + tree_right,weight_right = trees.pop() |
| 20 | + |
| 21 | + # combine the nodes and add back to the nodes list |
| 22 | + new_tree = [tree_left, tree_right] |
| 23 | + new_weight = weight_left + weight_right |
| 24 | + |
| 25 | + # find the first tree that has a weight smaller than new_weight and returns its index in the list |
| 26 | + # If no such tree can be found, use len(trees) instead to append |
| 27 | + index = next((i for i, tree in enumerate(trees) if tree[1] < new_weight), len(trees)) |
| 28 | + |
| 29 | + # insert the new tree there |
| 30 | + trees.insert(index, (new_tree, new_weight)) |
| 31 | + |
| 32 | + huffman_tree = trees[0][0] |
| 33 | + return huffman_tree |
| 34 | + |
| 35 | +# constructs the mapping with recursion |
| 36 | +def build_codebook(tree, code=''): |
| 37 | + |
| 38 | + codebook = [] |
| 39 | + |
| 40 | + # split the tree |
| 41 | + left_tree, right_tree = tree |
| 42 | + |
| 43 | + # if the left node has children, find the mapping of those children |
| 44 | + # else pair the character with the current code + 0 |
| 45 | + if type(left_tree) is list: |
| 46 | + codebook += build_codebook(left_tree, code+'0') |
| 47 | + else: |
| 48 | + codebook.append((left_tree, code+'0')) |
| 49 | + |
| 50 | + # if the right node has children, find the mapping of those children |
| 51 | + # else pair the character with the current code + 1 |
| 52 | + if type(right_tree) is list: |
| 53 | + codebook += build_codebook(right_tree, code+'1') |
| 54 | + else: |
| 55 | + codebook.append((right_tree, code+'1')) |
| 56 | + return codebook |
| 57 | + |
| 58 | +# encodes the message |
| 59 | +def huffman_encode(codebook, message): |
| 60 | + |
| 61 | + encoded_message = '' |
| 62 | + |
| 63 | + # build a char -> code dictionary |
| 64 | + forward_dict = dict(codebook) |
| 65 | + |
| 66 | + # replace each character with its code |
| 67 | + for char in message: |
| 68 | + encoded_message += forward_dict[char] |
| 69 | + |
| 70 | + return encoded_message |
| 71 | + |
| 72 | +# decodes a message |
| 73 | +def huffman_decode(codebook, encoded_message): |
| 74 | + |
| 75 | + decoded_message = '' |
| 76 | + key = '' |
| 77 | + |
| 78 | + # build a code -> char dictionary |
| 79 | + inverse_dict = dict([(v, k) for k, v in codebook]) |
| 80 | + |
| 81 | + # for each bit in the encoding |
| 82 | + # if the bit is in the dictionary, replace the bit with the paired character |
| 83 | + # else look at the bit and the following bits together until a match occurs |
| 84 | + # move to the next bit not yet looked at |
| 85 | + for index, bit in enumerate(encoded_message): |
| 86 | + key += bit |
| 87 | + if key in inverse_dict: |
| 88 | + decoded_message += inverse_dict[key] |
| 89 | + key = '' |
| 90 | + |
| 91 | + return decoded_message |
| 92 | + |
| 93 | +def main(): |
| 94 | + |
| 95 | + # test example |
| 96 | + message = 'bibbity_bobbity' |
| 97 | + tree = build_huffman_tree(message) |
| 98 | + codebook = build_codebook(tree) |
| 99 | + encoded_message = huffman_encode(codebook, message) |
| 100 | + decoded_message = huffman_decode(codebook, encoded_message) |
| 101 | + |
| 102 | + print('message: ' + message) |
| 103 | + print('huffman tree: ' + str(tree)) |
| 104 | + print('codebook: ' + str(codebook)) |
| 105 | + print('encoded message: ' + encoded_message) |
| 106 | + print('decoded message: ' + decoded_message) |
| 107 | + |
| 108 | + # prints the following: |
| 109 | + # |
| 110 | + # message: bibbity_bobbity |
| 111 | + # huffman_tree: ['b', [[['_', 'o'], 'y'], ['t', 'i']]] |
| 112 | + # codebook: [('b', '0'), ('_', '1000'), ('o', '1001'), |
| 113 | + # ('y', '101'), ('t', '110'), ('i', '111')] |
| 114 | + # encoded_message: 01110011111010110000100100111110101 |
| 115 | + # decoded_message: bibbity_bobbity |
| 116 | + |
| 117 | +if __name__ == '__main__': |
| 118 | + main() |
0 commit comments