Skip to content

Commit 122a5a8

Browse files
authored
Huffman encoding in Coconut (#732)
1 parent bb87921 commit 122a5a8

File tree

2 files changed

+121
-0
lines changed

2 files changed

+121
-0
lines changed
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
from collections import Counter, deque
2+
from bisect import bisect
3+
4+
class Tree
5+
6+
data Empty() from Tree
7+
data Leaf(char, n is int) from Tree:
8+
def __str__(self):
9+
return f'Leaf({self.char}, {self.n})'
10+
11+
__repr__ = __str__
12+
13+
data Node(left is Tree, right is Tree) from Tree:
14+
def __str__(self):
15+
return f'Node({str(self.left)}, {str(self.right)})'
16+
__repr__ = __str__
17+
18+
def weight(Tree()) = 0
19+
addpattern def weight(Leaf(char, n)) = n
20+
addpattern def weight(Node(left, right)) = weight(left) + weight(right)
21+
22+
def build_huffman_tree(message):
23+
24+
# get sorted list of character and frequency pairs
25+
frequencies = Counter(message)
26+
trees = frequencies.most_common() |> map$(t -> Leaf(*t)) |> reversed |> deque
27+
28+
if not trees:
29+
return Empty()
30+
31+
# while there is more than one tree
32+
while len(trees) > 1:
33+
34+
# pop off the two trees of least weight from the trees list
35+
tree_left = trees.popleft()
36+
tree_right = trees.popleft()
37+
38+
# combine the nodes and add back to the nodes list
39+
new_tree = Node(tree_left, tree_right)
40+
41+
# find the first tree that has a weight smaller than new_weight
42+
# and returns its index in the list.
43+
# If no such tree can be found, use len(trees) instead to append
44+
index = bisect(trees |> map$(weight) |> list, weight(new_tree))
45+
46+
# insert the new tree there
47+
trees.insert(index, new_tree)
48+
49+
huffman_tree = trees[0]
50+
return huffman_tree
51+
52+
53+
def build_codebook(Empty(), code='') = []
54+
addpattern def build_codebook(Leaf(char, n), code='') = [(char, code)]
55+
addpattern def build_codebook(Node(left, right), code='') =
56+
build_codebook(left, code+'0') + build_codebook(right, code+'1')
57+
58+
def huffman_encode(codebook, message):
59+
60+
if len(codebook) == 1:
61+
return '0' * len(message)
62+
63+
# build a char -> code dictionary
64+
forward_dict = dict(codebook)
65+
66+
return ''.join(message |> map$(forward_dict[]))
67+
68+
def huffman_decode(codebook, encoded_message):
69+
70+
decoded_message = []
71+
key = ''
72+
73+
if not codebook:
74+
return ''
75+
elif len(codebook) == 1:
76+
return codebook[0][0] * len(encoded_message)
77+
78+
# build a code -> char dictionary
79+
inverse_dict = dict((v, k) for k, v in codebook)
80+
81+
# for each bit in the encoding
82+
# if the bit is in the dictionary, replace the bit with the paired
83+
# character else look at the bit and the following bits together
84+
# until a match occurs move to the next bit not yet looked at.
85+
if encoded_message == '':
86+
return inverse_dict['']
87+
88+
for bit in encoded_message:
89+
key += bit
90+
if key in inverse_dict:
91+
decoded_message.append(inverse_dict[key])
92+
key = ''
93+
94+
return ''.join(decoded_message)
95+
96+
97+
if __name__ == '__main__':
98+
# test example
99+
message = 'bibbity_bobbity'
100+
tree = build_huffman_tree(message)
101+
codebook = build_codebook(tree)
102+
encoded_message = huffman_encode(codebook, message)
103+
decoded_message = huffman_decode(codebook, encoded_message)
104+
105+
print('message:', message)
106+
print('huffman tree:', tree)
107+
print('codebook:', codebook)
108+
print('encoded message:', encoded_message)
109+
print('decoded message:', decoded_message)
110+
111+
# prints the following:
112+
#
113+
# message: bibbity_bobbity
114+
# huffman_tree: Node(Leaf(b, 6), Node(Node(Leaf(y, 2), Leaf(t, 2)),
115+
# Node(Node(Leaf(o, 1), Leaf(_, 1)), Leaf(i, 3))))
116+
# codebook: [('b', '0'), ('y', '100'), ('t', '101'),
117+
# ('o', '1100'), ('_', '1101'), ('i', '111')]
118+
# encoded_message: 01110011110110011010110000111101100
119+
# decoded_message: bibbity_bobbity

contents/huffman_encoding/huffman_encoding.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ The code snippet was taken from this [scratch project](https://scratch.mit.edu/p
9898
<p>
9999
<img class="center" src="code/scratch/huffman.svg" width="700" />
100100
</p>
101+
{% sample lang="coco" %}
102+
[import, lang:"coconut"](code/coconut/huffman.coco)
101103
{% endmethod %}
102104

103105
<script>

0 commit comments

Comments
 (0)