From 1ec08892882e2d4dc72a858fcc965926024cd615 Mon Sep 17 00:00:00 2001 From: foldsters <37962412+foldsters@users.noreply.github.com> Date: Sat, 28 Apr 2018 20:07:49 -0400 Subject: [PATCH 1/3] Huffman encoding python implementation Compatible Python 2.7+ --- .../data_compression/huffman/code/python.py | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 chapters/data_compression/huffman/code/python.py diff --git a/chapters/data_compression/huffman/code/python.py b/chapters/data_compression/huffman/code/python.py new file mode 100644 index 00000000..246a39f9 --- /dev/null +++ b/chapters/data_compression/huffman/code/python.py @@ -0,0 +1,116 @@ +# Huffman Encoding +# Python 2.7+ +# Submitted by Matthew Giallourakis + +from collections import Counter + +# constructs the tree +def build_tree(message): + + # get sorted list of character,frequency pairs + frequencies = Counter(message) + trees = frequencies.most_common() + + # while there is more than one tree + while len(trees) > 1: + + # pop off the two trees of least weight from the trees list + tree_left,weight_left = trees.pop() + tree_right,weight_right = trees.pop() + + # combine the nodes and add back to the nodes list + new_tree = [tree_left,tree_right] + new_weight = weight_left+weight_right + trees.append((new_tree,new_weight)) + + # sort the trees list by weight + trees = sorted(trees, key=lambda n: n[1], reverse=True) + + tree = trees[0][0] + return tree + +# constructs the mapping with recursion +def build_mapping(tree,code=''): + + results = [] + + # split the tree + left_tree,right_tree = tree + + # if the left node has children, find the mapping of those children + # else pair the character with the current code + 0 + if type(left_tree) is list: + results += build_mapping(left_tree,code+'0') + else: + results.append((left_tree,code+'0')) + + # if the right node has children, find the mapping of those children + # else pair the character with the current code + 1 + if type(right_tree) is list: + results += build_mapping(right_tree,code+'1') + else: + results.append((right_tree,code+'1')) + + return results + +# encodes the message +def encode(mapping,message): + + encoding = "" + + # build a char -> code dictionary + forward_dict = dict(mapping) + + # replace each character with its code + for char in message: + encoding += forward_dict[char] + + return encoding + +# decodes a message +def decode(mapping,encoding): + + message = "" + key = "" + + # build a code -> char dictionary + inverse_dict = dict([(v,k) for k,v in mapping]) + + # for each bit in the encoding + # if the bit is in the dictionary, replace the bit with the paired character + # else look at the bit and the following bits together until a match occurs + # move to the next bit not yet looked at + for index,bit in enumerate(encoding): + key += bit + if key in inverse_dict: + message += inverse_dict[key] + key = "" + + return message + +def main(): + + # test example + message = "bibbity_bobbity" + tree = build_tree(message) + mapping = build_mapping(tree) + encoding = encode(mapping,message) + decoding = decode(mapping,encoding) + + print('message: '+message) + print('tree: '+str(tree)) + print('mapping: '+str(mapping)) + print('encoding: '+encoding) + print('decoding: '+decoding) + + # prints the following: + # + # message: bibbity_bobbity + # tree: ['b', [[['_', 'o'], 'y'], ['t', 'i']]] + # mapping: [('b', '0'), ('_', '1000'), ('o', '1001'), + # ('y', '101'), ('t', '110'), ('i', '111')] + # encoding: 01110011111010110000100100111110101 + # decoding: bibbity_bobbity + +if __name__ == '__main__': + main() From 7d74ce2f419d0483bd348286a642213225045e8a Mon Sep 17 00:00:00 2001 From: foldsters <37962412+foldsters@users.noreply.github.com> Date: Sat, 28 Apr 2018 20:11:56 -0400 Subject: [PATCH 2/3] Rename chapters/data_compression/huffman/code/python.py to chapters/data_compression/huffman/code/python/python.py --- chapters/data_compression/huffman/code/{ => python}/python.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename chapters/data_compression/huffman/code/{ => python}/python.py (100%) diff --git a/chapters/data_compression/huffman/code/python.py b/chapters/data_compression/huffman/code/python/python.py similarity index 100% rename from chapters/data_compression/huffman/code/python.py rename to chapters/data_compression/huffman/code/python/python.py From 43a56e1fc81adbb81d38cb142dc154ceeeb82e95 Mon Sep 17 00:00:00 2001 From: foldsters <37962412+foldsters@users.noreply.github.com> Date: Sat, 28 Apr 2018 20:13:00 -0400 Subject: [PATCH 3/3] Rename python.py to huffman.py --- .../huffman/code/python/{python.py => huffman.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename chapters/data_compression/huffman/code/python/{python.py => huffman.py} (100%) diff --git a/chapters/data_compression/huffman/code/python/python.py b/chapters/data_compression/huffman/code/python/huffman.py similarity index 100% rename from chapters/data_compression/huffman/code/python/python.py rename to chapters/data_compression/huffman/code/python/huffman.py