|
| 1 | +from collections import Counter, deque |
| 2 | +from bisect import bisect |
| 3 | + |
| 4 | +class Tree |
| 5 | + |
| 6 | +data Empty() from Tree |
| 7 | +data Leaf(char, n is int) from Tree: |
| 8 | + def __str__(self): |
| 9 | + return f'Leaf({self.char}, {self.n})' |
| 10 | + |
| 11 | + __repr__ = __str__ |
| 12 | + |
| 13 | +data Node(left is Tree, right is Tree) from Tree: |
| 14 | + def __str__(self): |
| 15 | + return f'Node({str(self.left)}, {str(self.right)})' |
| 16 | + __repr__ = __str__ |
| 17 | + |
| 18 | +def weight(Tree()) = 0 |
| 19 | +addpattern def weight(Leaf(char, n)) = n |
| 20 | +addpattern def weight(Node(left, right)) = weight(left) + weight(right) |
| 21 | + |
| 22 | +def build_huffman_tree(message): |
| 23 | + |
| 24 | + # get sorted list of character and frequency pairs |
| 25 | + frequencies = Counter(message) |
| 26 | + trees = frequencies.most_common() |> map$(t -> Leaf(*t)) |> reversed |> deque |
| 27 | + |
| 28 | + if not trees: |
| 29 | + return Empty() |
| 30 | + |
| 31 | + # while there is more than one tree |
| 32 | + while len(trees) > 1: |
| 33 | + |
| 34 | + # pop off the two trees of least weight from the trees list |
| 35 | + tree_left = trees.popleft() |
| 36 | + tree_right = trees.popleft() |
| 37 | + |
| 38 | + # combine the nodes and add back to the nodes list |
| 39 | + new_tree = Node(tree_left, tree_right) |
| 40 | + |
| 41 | + # find the first tree that has a weight smaller than new_weight |
| 42 | + # and returns its index in the list. |
| 43 | + # If no such tree can be found, use len(trees) instead to append |
| 44 | + index = bisect(trees |> map$(weight) |> list, weight(new_tree)) |
| 45 | + |
| 46 | + # insert the new tree there |
| 47 | + trees.insert(index, new_tree) |
| 48 | + |
| 49 | + huffman_tree = trees[0] |
| 50 | + return huffman_tree |
| 51 | + |
| 52 | + |
| 53 | +def build_codebook(Empty(), code='') = [] |
| 54 | +addpattern def build_codebook(Leaf(char, n), code='') = [(char, code)] |
| 55 | +addpattern def build_codebook(Node(left, right), code='') = |
| 56 | + build_codebook(left, code+'0') + build_codebook(right, code+'1') |
| 57 | + |
| 58 | +def huffman_encode(codebook, message): |
| 59 | + |
| 60 | + if len(codebook) == 1: |
| 61 | + return '0' * len(message) |
| 62 | + |
| 63 | + # build a char -> code dictionary |
| 64 | + forward_dict = dict(codebook) |
| 65 | + |
| 66 | + return ''.join(message |> map$(forward_dict[])) |
| 67 | + |
| 68 | +def huffman_decode(codebook, encoded_message): |
| 69 | + |
| 70 | + decoded_message = [] |
| 71 | + key = '' |
| 72 | + |
| 73 | + if not codebook: |
| 74 | + return '' |
| 75 | + elif len(codebook) == 1: |
| 76 | + return codebook[0][0] * len(encoded_message) |
| 77 | + |
| 78 | + # build a code -> char dictionary |
| 79 | + inverse_dict = dict((v, k) for k, v in codebook) |
| 80 | + |
| 81 | + # for each bit in the encoding |
| 82 | + # if the bit is in the dictionary, replace the bit with the paired |
| 83 | + # character else look at the bit and the following bits together |
| 84 | + # until a match occurs move to the next bit not yet looked at. |
| 85 | + if encoded_message == '': |
| 86 | + return inverse_dict[''] |
| 87 | + |
| 88 | + for bit in encoded_message: |
| 89 | + key += bit |
| 90 | + if key in inverse_dict: |
| 91 | + decoded_message.append(inverse_dict[key]) |
| 92 | + key = '' |
| 93 | + |
| 94 | + return ''.join(decoded_message) |
| 95 | + |
| 96 | + |
| 97 | +if __name__ == '__main__': |
| 98 | + # test example |
| 99 | + message = 'bibbity_bobbity' |
| 100 | + tree = build_huffman_tree(message) |
| 101 | + codebook = build_codebook(tree) |
| 102 | + encoded_message = huffman_encode(codebook, message) |
| 103 | + decoded_message = huffman_decode(codebook, encoded_message) |
| 104 | + |
| 105 | + print('message:', message) |
| 106 | + print('huffman tree:', tree) |
| 107 | + print('codebook:', codebook) |
| 108 | + print('encoded message:', encoded_message) |
| 109 | + print('decoded message:', decoded_message) |
| 110 | + |
| 111 | + # prints the following: |
| 112 | + # |
| 113 | + # message: bibbity_bobbity |
| 114 | + # huffman_tree: Node(Leaf(b, 6), Node(Node(Leaf(y, 2), Leaf(t, 2)), |
| 115 | + # Node(Node(Leaf(o, 1), Leaf(_, 1)), Leaf(i, 3)))) |
| 116 | + # codebook: [('b', '0'), ('y', '100'), ('t', '101'), |
| 117 | + # ('o', '1100'), ('_', '1101'), ('i', '111')] |
| 118 | + # encoded_message: 01110011110110011010110000111101100 |
| 119 | + # decoded_message: bibbity_bobbity |
0 commit comments