algorithm-archivists · Butt4cak3 · May 1, 2018 · Apr 30, 2018 · May 1, 2018 · May 1, 2018
diff --git a/chapters/data_compression/huffman/code/python/huffman.py b/chapters/data_compression/huffman/code/python/huffman.py
@@ -0,0 +1,118 @@
+# Huffman Encoding
+# Python 2.7+
+# Submitted by Matthew Giallourakis 
+
+from collections import Counter
+
+# constructs the tree
+def build_huffman_tree(message):
+
+    # get sorted list of character and frequency pairs
+    frequencies = Counter(message)
+    trees = frequencies.most_common()
+
+    # while there is more than one tree
+    while len(trees) > 1:
+
+        # pop off the two trees of least weight from the trees list
+        tree_left,weight_left = trees.pop()
+        tree_right,weight_right = trees.pop()
+
+        # combine the nodes and add back to the nodes list
+        new_tree = [tree_left, tree_right]
+        new_weight = weight_left + weight_right
+
+        # find the first tree that has a weight smaller than new_weight and returns its index in the list
+        # If no such tree can be found, use len(trees) instead to append
+        index = next((i for i, tree in enumerate(trees) if tree[1] < new_weight), len(trees))
+
+        # insert the new tree there
+        trees.insert(index, (new_tree, new_weight))
+
+    huffman_tree = trees[0][0]
+    return huffman_tree
+
+# constructs the mapping with recursion
+def build_codebook(tree, code=''):
+
+    codebook = []
+
+    # split the tree
+    left_tree, right_tree = tree
+
+    # if the left node has children, find the mapping of those children
+    # else pair the character with the current code + 0 
+    if type(left_tree) is list:
+        codebook += build_codebook(left_tree, code+'0')
+    else:
+        codebook.append((left_tree, code+'0'))
+
+    # if the right node has children, find the mapping of those children
+    # else pair the character with the current code + 1 
+    if type(right_tree) is list:
+        codebook += build_codebook(right_tree, code+'1')
+    else:
+        codebook.append((right_tree, code+'1'))
+    return codebook
+
+# encodes the message
+def huffman_encode(codebook, message):
+
+    encoded_message = ''
+
+    # build a char -> code dictionary
+    forward_dict = dict(codebook)
+
+    # replace each character with its code
+    for char in message:
+        encoded_message += forward_dict[char]
+
+    return encoded_message
+
+# decodes a message
+def huffman_decode(codebook, encoded_message):
+
+    decoded_message = ''
+    key = ''
+
+    # build a code -> char dictionary
+    inverse_dict = dict([(v, k) for k, v in codebook])
+
+    # for each bit in the encoding
+    # if the bit is in the dictionary, replace the bit with the paired character
+    # else look at the bit and the following bits together until a match occurs
+    # move to the next bit not yet looked at
+    for index, bit in enumerate(encoded_message):
+        key += bit
+        if key in inverse_dict:
+            decoded_message += inverse_dict[key]
+            key = ''
+
+    return decoded_message
+
+def main():
+
+    # test example
+    message = 'bibbity_bobbity'
+    tree = build_huffman_tree(message)
+    codebook = build_codebook(tree)
+    encoded_message = huffman_encode(codebook, message)
+    decoded_message = huffman_decode(codebook, encoded_message)
+
+    print('message: ' + message)
+    print('huffman tree: ' + str(tree))
+    print('codebook: ' + str(codebook))
+    print('encoded message: ' + encoded_message)
+    print('decoded message: ' + decoded_message)
+
+    # prints the following:
+    #
+    #  message: bibbity_bobbity
+    #  huffman_tree: ['b', [[['_', 'o'], 'y'], ['t', 'i']]]
+    #  codebook: [('b', '0'), ('_', '1000'), ('o', '1001'),
+    #             ('y', '101'), ('t', '110'), ('i', '111')]
+    #  encoded_message: 01110011111010110000100100111110101
+    #  decoded_message: bibbity_bobbity
+
+if __name__ == '__main__':
+    main()