From 2bf034e715bc522986f84f2557e02b704d4e7d1f Mon Sep 17 00:00:00 2001 From: foldsters <37962412+foldsters@users.noreply.github.com> Date: Mon, 30 Apr 2018 18:04:32 -0400 Subject: [PATCH 1/3] Add Python Implementation of Huffman Encoding --- .../huffman/code/python/huffman.py | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 chapters/data_compression/huffman/code/python/huffman.py diff --git a/chapters/data_compression/huffman/code/python/huffman.py b/chapters/data_compression/huffman/code/python/huffman.py new file mode 100644 index 000000000..1dfbcab31 --- /dev/null +++ b/chapters/data_compression/huffman/code/python/huffman.py @@ -0,0 +1,116 @@ +# Huffman Encoding +# Python 2.7+ +# Submitted by Matthew Giallourakis + +from collections import Counter + +# constructs the tree +def build_tree(message): + + # get sorted list of character,frequency pairs + frequencies = Counter(message) + trees = frequencies.most_common() + + # while there is more than one tree + while len(trees) > 1: + + # pop off the two trees of least weight from the trees list + tree_left,weight_left = trees.pop() + tree_right,weight_right = trees.pop() + + # combine the nodes and add back to the nodes list + new_tree = [tree_left,tree_right] + new_weight = weight_left+weight_right + trees.append((new_tree,new_weight)) + + # sort the trees list by weight + trees = sorted(trees, key=lambda n: n[1], reverse=True) + + tree = trees[0][0] + return tree + +# constructs the mapping with recursion +def build_mapping(tree,code=''): + + results = [] + + # split the tree + left_tree,right_tree = tree + + # if the left node has children, find the mapping of those children + # else pair the character with the current code + 0 + if type(left_tree) is list: + results += build_mapping(left_tree,code+'0') + else: + results.append((left_tree,code+'0')) + + # if the right node has children, find the mapping of those children + # else pair the character with the current code + 1 + if type(right_tree) is list: + results += build_mapping(right_tree,code+'1') + else: + results.append((right_tree,code+'1')) + + return results + +# encodes the message +def encode(mapping,message): + + encoding = "" + + # build a char -> code dictionary + forward_dict = dict(mapping) + + # replace each character with its code + for char in message: + encoding += forward_dict[char] + + return encoding + +# decodes a message +def decode(mapping,encoding): + + message = "" + key = "" + + # build a code -> char dictionary + inverse_dict = dict([(v,k) for k,v in mapping]) + + # for each bit in the encoding + # if the bit is in the dictionary, replace the bit with the paired character + # else look at the bit and the following bits together until a match occurs + # move to the next bit not yet looked at + for index,bit in enumerate(encoding): + key += bit + if key in inverse_dict: + message += inverse_dict[key] + key = "" + + return message + +def main(): + + # test example + message = "bibbity_bobbity" + tree = build_tree(message) + mapping = build_mapping(tree) + encoding = encode(mapping,message) + decoding = decode(mapping,encoding) + + print('message: '+message) + print('tree: '+str(tree)) + print('mapping: '+str(mapping)) + print('encoding: '+encoding) + print('decoding: '+decoding) + + # prints the following: + # + # message: bibbity_bobbity + # tree: ['b', [[['_', 'o'], 'y'], ['t', 'i']]] + # mapping: [('b', '0'), ('_', '1000'), ('o', '1001'), + # ('y', '101'), ('t', '110'), ('i', '111')] + # encoding: 01110011111010110000100100111110101 + # decoding: bibbity_bobbity + +if __name__ == '__main__': + main() From 9016746a909f9ebaea5486bbae00d882007bc93b Mon Sep 17 00:00:00 2001 From: foldsters <37962412+foldsters@users.noreply.github.com> Date: Tue, 1 May 2018 16:14:13 -0400 Subject: [PATCH 2/3] Update huffman.py --- .../huffman/code/python/huffman.py | 96 ++++++++++--------- 1 file changed, 49 insertions(+), 47 deletions(-) diff --git a/chapters/data_compression/huffman/code/python/huffman.py b/chapters/data_compression/huffman/code/python/huffman.py index 1dfbcab31..5030a9cf3 100644 --- a/chapters/data_compression/huffman/code/python/huffman.py +++ b/chapters/data_compression/huffman/code/python/huffman.py @@ -5,9 +5,9 @@ from collections import Counter # constructs the tree -def build_tree(message): +def build_huffman_tree(message): - # get sorted list of character,frequency pairs + # get sorted list of character and frequency pairs frequencies = Counter(message) trees = frequencies.most_common() @@ -19,98 +19,100 @@ def build_tree(message): tree_right,weight_right = trees.pop() # combine the nodes and add back to the nodes list - new_tree = [tree_left,tree_right] - new_weight = weight_left+weight_right - trees.append((new_tree,new_weight)) + new_tree = [tree_left, tree_right] + new_weight = weight_left + weight_right - # sort the trees list by weight - trees = sorted(trees, key=lambda n: n[1], reverse=True) + # find the first tree that has a weight smaller than new_weight and returns its index in the list + # If no such tree can be found, use len(trees) instead to append + index = next((i for i, tree in enumerate(trees) if tree[1] < new_weight), len(trees)) + + # insert the new tree there + trees.insert(index, (new_tree, new_weight)) - tree = trees[0][0] - return tree + huffman_tree = trees[0][0] + return huffman_tree # constructs the mapping with recursion -def build_mapping(tree,code=''): +def build_codebook(tree, code=''): - results = [] + codebook = [] # split the tree - left_tree,right_tree = tree + left_tree, right_tree = tree # if the left node has children, find the mapping of those children # else pair the character with the current code + 0 if type(left_tree) is list: - results += build_mapping(left_tree,code+'0') + codebook += build_codebook(left_tree, code+'0') else: - results.append((left_tree,code+'0')) + codebook.append((left_tree, code+'0')) # if the right node has children, find the mapping of those children # else pair the character with the current code + 1 if type(right_tree) is list: - results += build_mapping(right_tree,code+'1') + codebook += build_codebook(right_tree, code+'1') else: - results.append((right_tree,code+'1')) - - return results + codebook.append((right_tree, code+'1')) + return codebook # encodes the message -def encode(mapping,message): +def huffman_encode(codebook, message): - encoding = "" + encoded_message = '' # build a char -> code dictionary - forward_dict = dict(mapping) + forward_dict = dict(codebook) # replace each character with its code for char in message: - encoding += forward_dict[char] + encoded_message += forward_dict[char] - return encoding + return encoded_message # decodes a message -def decode(mapping,encoding): +def huffman_decode(codebook, encoded_message): - message = "" - key = "" + decoded_message = '' + key = '' # build a code -> char dictionary - inverse_dict = dict([(v,k) for k,v in mapping]) + inverse_dict = dict([(v, k) for k, v in codebook]) # for each bit in the encoding # if the bit is in the dictionary, replace the bit with the paired character # else look at the bit and the following bits together until a match occurs # move to the next bit not yet looked at - for index,bit in enumerate(encoding): + for index, bit in enumerate(encoded_message): key += bit if key in inverse_dict: - message += inverse_dict[key] - key = "" + decoded_message += inverse_dict[key] + key = '' - return message + return decoded_message def main(): # test example - message = "bibbity_bobbity" - tree = build_tree(message) - mapping = build_mapping(tree) - encoding = encode(mapping,message) - decoding = decode(mapping,encoding) - - print('message: '+message) - print('tree: '+str(tree)) - print('mapping: '+str(mapping)) - print('encoding: '+encoding) - print('decoding: '+decoding) + message = 'bibbity_bobbity' + tree = build_huffman_tree(message) + codebook = build_codebook(tree) + encoded_message = huffman_encode(codebook, message) + decoded_message = huffman_decode(codebook, encoded_message) + + print('message: ' + message) + print('huffman tree: ' + str(tree)) + print('codebook: ' + str(codebook)) + print('encoded message: ' + encoded_message) + print('decoding: ' + decoded_message) # prints the following: # # message: bibbity_bobbity - # tree: ['b', [[['_', 'o'], 'y'], ['t', 'i']]] - # mapping: [('b', '0'), ('_', '1000'), ('o', '1001'), - # ('y', '101'), ('t', '110'), ('i', '111')] - # encoding: 01110011111010110000100100111110101 - # decoding: bibbity_bobbity + # huffman_tree: ['b', [[['_', 'o'], 'y'], ['t', 'i']]] + # codebook: [('b', '0'), ('_', '1000'), ('o', '1001'), + # ('y', '101'), ('t', '110'), ('i', '111')] + # encoded_message: 01110011111010110000100100111110101 + # decoded_message: bibbity_bobbity if __name__ == '__main__': main() From 130b408f499b7c18c369b3a5c946f6857e12b7d0 Mon Sep 17 00:00:00 2001 From: foldsters <37962412+foldsters@users.noreply.github.com> Date: Tue, 1 May 2018 16:17:31 -0400 Subject: [PATCH 3/3] Update huffman.py --- chapters/data_compression/huffman/code/python/huffman.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapters/data_compression/huffman/code/python/huffman.py b/chapters/data_compression/huffman/code/python/huffman.py index 5030a9cf3..700dc68b9 100644 --- a/chapters/data_compression/huffman/code/python/huffman.py +++ b/chapters/data_compression/huffman/code/python/huffman.py @@ -103,7 +103,7 @@ def main(): print('huffman tree: ' + str(tree)) print('codebook: ' + str(codebook)) print('encoded message: ' + encoded_message) - print('decoding: ' + decoded_message) + print('decoded message: ' + decoded_message) # prints the following: #