-
-
Notifications
You must be signed in to change notification settings - Fork 359
Re-added Huffman encoding in Coconut #732
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
b8410d1
d64bf49
c4fb8c6
7cec66e
43d4f76
90f0c96
fe10711
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
# Huffman Encoding | ||
# Coconut | ||
# Submitted by Amaras | ||
# Inspired by Matthew Giallourakis | ||
|
||
from collections import Counter, deque | ||
from bisect import bisect | ||
|
||
class Tree | ||
|
||
data Empty() from Tree | ||
data Leaf(char, n is int) from Tree: | ||
def __str__(self): | ||
return f'Leaf({self.char}, {self.n})' | ||
|
||
__repr__ = __str__ | ||
|
||
data Node(left is Tree, right is Tree) from Tree: | ||
def __str__(self): | ||
return f'Node({str(self.left)}, {str(self.right)})' | ||
__repr__ = __str__ | ||
|
||
def weight(Tree()) = 0 | ||
addpattern def weight(Leaf(char, n)) = n | ||
addpattern def weight(Node(left, right)) = weight(left) + weight(right) | ||
|
||
# constructs the tree | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is one of the comments that I'd drop from the Python implementation. |
||
def build_huffman_tree(message): | ||
|
||
# get sorted list of character and frequency pairs | ||
frequencies = Counter(message) | ||
trees = frequencies.most_common() |> map$(t -> Leaf(*t)) |> reversed |> deque | ||
|
||
if not trees: | ||
return Empty() | ||
|
||
# while there is more than one tree | ||
while len(trees) > 1: | ||
|
||
# pop off the two trees of least weight from the trees list | ||
tree_left = trees.popleft() | ||
tree_right = trees.popleft() | ||
|
||
# combine the nodes and add back to the nodes list | ||
new_tree = Node(tree_left, tree_right) | ||
|
||
# find the first tree that has a weight smaller than new_weight | ||
# and returns its index in the list. | ||
# If no such tree can be found, use len(trees) instead to append | ||
index = bisect(trees |> map$(weight) |> list, weight(new_tree)) | ||
|
||
# insert the new tree there | ||
trees.insert(index, new_tree) | ||
|
||
huffman_tree = trees[0] | ||
return huffman_tree | ||
|
||
|
||
def build_codebook(Empty(), code='') = [] | ||
addpattern def build_codebook(Leaf(char, n), code='') = [(char, code)] | ||
addpattern def build_codebook(Node(left, right), code='') = | ||
build_codebook(left, code+'0') + build_codebook(right, code+'1') | ||
|
||
# encodes the message | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same. |
||
def huffman_encode(codebook, message): | ||
|
||
if len(codebook) == 1: | ||
return '0' * len(message) | ||
|
||
# build a char -> code dictionary | ||
forward_dict = dict(codebook) | ||
|
||
return ''.join(message |> map$(forward_dict[])) | ||
|
||
# decodes a message | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same. |
||
def huffman_decode(codebook, encoded_message): | ||
|
||
decoded_message = [] | ||
key = '' | ||
|
||
if not codebook: | ||
return '' | ||
elif len(codebook) == 1: | ||
return codebook[0][0] * len(encoded_message) | ||
|
||
# build a code -> char dictionary | ||
inverse_dict = dict((v, k) for k, v in codebook) | ||
|
||
# for each bit in the encoding | ||
# if the bit is in the dictionary, replace the bit with the paired | ||
# character else look at the bit and the following bits together | ||
# until a match occurs move to the next bit not yet looked at. | ||
if encoded_message == '': | ||
return inverse_dict[''] | ||
|
||
for bit in encoded_message: | ||
key += bit | ||
if key in inverse_dict: | ||
decoded_message.append(inverse_dict[key]) | ||
key = '' | ||
|
||
return ''.join(decoded_message) | ||
|
||
|
||
if __name__ == '__main__': | ||
# test example | ||
message = 'bibbity_bobbity' | ||
tree = build_huffman_tree(message) | ||
codebook = build_codebook(tree) | ||
encoded_message = huffman_encode(codebook, message) | ||
decoded_message = huffman_decode(codebook, encoded_message) | ||
|
||
print('message:', message) | ||
print('huffman tree:', tree) | ||
print('codebook:', codebook) | ||
print('encoded message:', encoded_message) | ||
print('decoded message:', decoded_message) | ||
|
||
# prints the following: | ||
# | ||
# message: bibbity_bobbity | ||
# huffman_tree: Node(Leaf(b, 6), Node(Node(Leaf(y, 2), Leaf(t, 2)), | ||
# Node(Node(Leaf(o, 1), Leaf(_, 1)), Leaf(i, 3)))) | ||
# codebook: [('b', '0'), ('y', '100'), ('t', '101'), | ||
# ('o', '1100'), ('_', '1101'), ('i', '111')] | ||
# encoded_message: 01110011110110011010110000111101100 | ||
# decoded_message: bibbity_bobbity |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can get rid of this header, since some of it is redundant and
git blame
and the log is the source of truth.