From 2bf034e715bc522986f84f2557e02b704d4e7d1f Mon Sep 17 00:00:00 2001
From: foldsters <37962412+foldsters@users.noreply.github.com>
Date: Mon, 30 Apr 2018 18:04:32 -0400
Subject: [PATCH 1/3] Add Python Implementation of Huffman Encoding

---
 .../huffman/code/python/huffman.py            | 116 ++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 chapters/data_compression/huffman/code/python/huffman.py

diff --git a/chapters/data_compression/huffman/code/python/huffman.py b/chapters/data_compression/huffman/code/python/huffman.py
new file mode 100644
index 000000000..1dfbcab31
--- /dev/null
+++ b/chapters/data_compression/huffman/code/python/huffman.py
@@ -0,0 +1,116 @@
+# Huffman Encoding
+# Python 2.7+
+# Submitted by Matthew Giallourakis 
+
+from collections import Counter
+
+# constructs the tree
+def build_tree(message):
+    
+    # get sorted list of character,frequency pairs
+    frequencies = Counter(message)
+    trees = frequencies.most_common()
+
+    # while there is more than one tree
+    while len(trees) > 1:
+        
+        # pop off the two trees of least weight from the trees list
+        tree_left,weight_left = trees.pop()
+        tree_right,weight_right = trees.pop()
+        
+        # combine the nodes and add back to the nodes list
+        new_tree = [tree_left,tree_right]
+        new_weight = weight_left+weight_right
+        trees.append((new_tree,new_weight))
+
+        # sort the trees list by weight
+        trees = sorted(trees, key=lambda n: n[1], reverse=True)
+
+    tree = trees[0][0]
+    return tree
+
+# constructs the mapping with recursion
+def build_mapping(tree,code=''):
+
+    results = []
+
+    # split the tree
+    left_tree,right_tree = tree
+
+    # if the left node has children, find the mapping of those children
+    # else pair the character with the current code + 0 
+    if type(left_tree) is list:
+        results += build_mapping(left_tree,code+'0')
+    else:
+        results.append((left_tree,code+'0'))
+
+    # if the right node has children, find the mapping of those children
+    # else pair the character with the current code + 1 
+    if type(right_tree) is list:
+        results += build_mapping(right_tree,code+'1')
+    else:
+        results.append((right_tree,code+'1'))
+
+    return results
+
+# encodes the message
+def encode(mapping,message):
+    
+    encoding = ""
+
+    # build a char -> code dictionary
+    forward_dict = dict(mapping)
+
+    # replace each character with its code
+    for char in message:
+        encoding += forward_dict[char]
+    
+    return encoding
+
+# decodes a message
+def decode(mapping,encoding):
+    
+    message = ""
+    key = ""
+
+    # build a code -> char dictionary
+    inverse_dict = dict([(v,k) for k,v in mapping])
+
+    # for each bit in the encoding
+    # if the bit is in the dictionary, replace the bit with the paired character
+    # else look at the bit and the following bits together until a match occurs
+    # move to the next bit not yet looked at
+    for index,bit in enumerate(encoding):
+        key += bit
+        if key in inverse_dict:
+            message += inverse_dict[key]
+            key = ""
+    
+    return message
+
+def main():
+
+    # test example
+    message = "bibbity_bobbity"
+    tree = build_tree(message)
+    mapping = build_mapping(tree)
+    encoding = encode(mapping,message)
+    decoding = decode(mapping,encoding)
+
+    print('message: '+message)
+    print('tree: '+str(tree))
+    print('mapping: '+str(mapping))
+    print('encoding: '+encoding)
+    print('decoding: '+decoding)
+
+    # prints the following:
+    #
+    #  message: bibbity_bobbity
+    #  tree: ['b', [[['_', 'o'], 'y'], ['t', 'i']]]
+    #  mapping: [('b', '0'), ('_', '1000'), ('o', '1001'),
+    #            ('y', '101'), ('t', '110'), ('i', '111')]
+    #  encoding: 01110011111010110000100100111110101
+    #  decoding: bibbity_bobbity
+    
+if __name__ == '__main__':
+    main()

From 9016746a909f9ebaea5486bbae00d882007bc93b Mon Sep 17 00:00:00 2001
From: foldsters <37962412+foldsters@users.noreply.github.com>
Date: Tue, 1 May 2018 16:14:13 -0400
Subject: [PATCH 2/3] Update huffman.py

---
 .../huffman/code/python/huffman.py            | 96 ++++++++++---------
 1 file changed, 49 insertions(+), 47 deletions(-)

diff --git a/chapters/data_compression/huffman/code/python/huffman.py b/chapters/data_compression/huffman/code/python/huffman.py
index 1dfbcab31..5030a9cf3 100644
--- a/chapters/data_compression/huffman/code/python/huffman.py
+++ b/chapters/data_compression/huffman/code/python/huffman.py
@@ -5,9 +5,9 @@
 from collections import Counter
 
 # constructs the tree
-def build_tree(message):
+def build_huffman_tree(message):
     
-    # get sorted list of character,frequency pairs
+    # get sorted list of character and frequency pairs
     frequencies = Counter(message)
     trees = frequencies.most_common()
 
@@ -19,98 +19,100 @@ def build_tree(message):
         tree_right,weight_right = trees.pop()
         
         # combine the nodes and add back to the nodes list
-        new_tree = [tree_left,tree_right]
-        new_weight = weight_left+weight_right
-        trees.append((new_tree,new_weight))
+        new_tree = [tree_left, tree_right]
+        new_weight = weight_left + weight_right
 
-        # sort the trees list by weight
-        trees = sorted(trees, key=lambda n: n[1], reverse=True)
+        # find the first tree that has a weight smaller than new_weight and returns its index in the list
+        # If no such tree can be found, use len(trees) instead to append
+        index = next((i for i, tree in enumerate(trees) if tree[1] < new_weight), len(trees))
+        
+        # insert the new tree there
+        trees.insert(index, (new_tree, new_weight))
 
-    tree = trees[0][0]
-    return tree
+    huffman_tree = trees[0][0]
+    return huffman_tree
 
 # constructs the mapping with recursion
-def build_mapping(tree,code=''):
+def build_codebook(tree, code=''):
 
-    results = []
+    codebook = []
 
     # split the tree
-    left_tree,right_tree = tree
+    left_tree, right_tree = tree
 
     # if the left node has children, find the mapping of those children
     # else pair the character with the current code + 0 
     if type(left_tree) is list:
-        results += build_mapping(left_tree,code+'0')
+        codebook += build_codebook(left_tree, code+'0')
     else:
-        results.append((left_tree,code+'0'))
+        codebook.append((left_tree, code+'0'))
 
     # if the right node has children, find the mapping of those children
     # else pair the character with the current code + 1 
     if type(right_tree) is list:
-        results += build_mapping(right_tree,code+'1')
+        codebook += build_codebook(right_tree, code+'1')
     else:
-        results.append((right_tree,code+'1'))
-
-    return results
+        codebook.append((right_tree, code+'1'))
+    return codebook
 
 # encodes the message
-def encode(mapping,message):
+def huffman_encode(codebook, message):
     
-    encoding = ""
+    encoded_message = ''
 
     # build a char -> code dictionary
-    forward_dict = dict(mapping)
+    forward_dict = dict(codebook)
 
     # replace each character with its code
     for char in message:
-        encoding += forward_dict[char]
+        encoded_message += forward_dict[char]
     
-    return encoding
+    return encoded_message
 
 # decodes a message
-def decode(mapping,encoding):
+def huffman_decode(codebook, encoded_message):
     
-    message = ""
-    key = ""
+    decoded_message = ''
+    key = ''
 
     # build a code -> char dictionary
-    inverse_dict = dict([(v,k) for k,v in mapping])
+    inverse_dict = dict([(v, k) for k, v in codebook])
 
     # for each bit in the encoding
     # if the bit is in the dictionary, replace the bit with the paired character
     # else look at the bit and the following bits together until a match occurs
     # move to the next bit not yet looked at
-    for index,bit in enumerate(encoding):
+    for index, bit in enumerate(encoded_message):
         key += bit
         if key in inverse_dict:
-            message += inverse_dict[key]
-            key = ""
+            decoded_message += inverse_dict[key]
+            key = ''
     
-    return message
+    return decoded_message
 
 def main():
 
     # test example
-    message = "bibbity_bobbity"
-    tree = build_tree(message)
-    mapping = build_mapping(tree)
-    encoding = encode(mapping,message)
-    decoding = decode(mapping,encoding)
-
-    print('message: '+message)
-    print('tree: '+str(tree))
-    print('mapping: '+str(mapping))
-    print('encoding: '+encoding)
-    print('decoding: '+decoding)
+    message = 'bibbity_bobbity'
+    tree = build_huffman_tree(message)
+    codebook = build_codebook(tree)
+    encoded_message = huffman_encode(codebook, message)
+    decoded_message = huffman_decode(codebook, encoded_message)
+
+    print('message: ' + message)
+    print('huffman tree: ' + str(tree))
+    print('codebook: ' + str(codebook))
+    print('encoded message: ' + encoded_message)
+    print('decoding: ' + decoded_message)
 
     # prints the following:
     #
     #  message: bibbity_bobbity
-    #  tree: ['b', [[['_', 'o'], 'y'], ['t', 'i']]]
-    #  mapping: [('b', '0'), ('_', '1000'), ('o', '1001'),
-    #            ('y', '101'), ('t', '110'), ('i', '111')]
-    #  encoding: 01110011111010110000100100111110101
-    #  decoding: bibbity_bobbity
+    #  huffman_tree: ['b', [[['_', 'o'], 'y'], ['t', 'i']]]
+    #  codebook: [('b', '0'), ('_', '1000'), ('o', '1001'),
+    #             ('y', '101'), ('t', '110'), ('i', '111')]
+    #  encoded_message: 01110011111010110000100100111110101
+    #  decoded_message: bibbity_bobbity
     
 if __name__ == '__main__':
     main()

From 130b408f499b7c18c369b3a5c946f6857e12b7d0 Mon Sep 17 00:00:00 2001
From: foldsters <37962412+foldsters@users.noreply.github.com>
Date: Tue, 1 May 2018 16:17:31 -0400
Subject: [PATCH 3/3] Update huffman.py

---
 chapters/data_compression/huffman/code/python/huffman.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/data_compression/huffman/code/python/huffman.py b/chapters/data_compression/huffman/code/python/huffman.py
index 5030a9cf3..700dc68b9 100644
--- a/chapters/data_compression/huffman/code/python/huffman.py
+++ b/chapters/data_compression/huffman/code/python/huffman.py
@@ -103,7 +103,7 @@ def main():
     print('huffman tree: ' + str(tree))
     print('codebook: ' + str(codebook))
     print('encoded message: ' + encoded_message)
-    print('decoding: ' + decoded_message)
+    print('decoded message: ' + decoded_message)
 
     # prints the following:
     #