|
| 1 | +local function frequency_array(str) |
| 2 | + -- Collect all frequency values into a dict |
| 3 | + local map = {} |
| 4 | + for c in str:gmatch"." do -- Iterate over each character in str |
| 5 | + map[c] = (map[c] or 0) + 1 -- Increment map[c] (default 0) by 1 |
| 6 | + end |
| 7 | + |
| 8 | + -- We have a dict of frequencies but we want it in a sorted list |
| 9 | + -- Dump each key value pair into an array |
| 10 | + local arr = {} |
| 11 | + for k, v in pairs(map) do |
| 12 | + arr[#arr + 1] = {k, v} |
| 13 | + end |
| 14 | + table.sort(arr, function(a, b) return a[2] > b[2] end) -- Sort by frequency descending |
| 15 | + return arr |
| 16 | +end |
| 17 | + |
| 18 | +function build_huffman_tree(message) |
| 19 | + |
| 20 | + if #message == 0 then return end |
| 21 | + |
| 22 | + local freq = frequency_array(message) |
| 23 | + |
| 24 | + while #freq > 1 do -- Repeat until we have only 1 node |
| 25 | + |
| 26 | + -- Take two of the least frequent nodes |
| 27 | + local node1, node2 = table.remove(freq), table.remove(freq) |
| 28 | + |
| 29 | + -- Group node values in first index, and sum of node frequencies in second |
| 30 | + local node3 = { {node1[1], node2[1] }, node1[2] + node2[2] } |
| 31 | + |
| 32 | + local i = 1 |
| 33 | + while i <= #freq and freq[i][2] <= node3[2] do -- Sorted insertion, faster than inserting then sorting again |
| 34 | + i = i + 1 |
| 35 | + end |
| 36 | + |
| 37 | + table.insert(freq, i, node3) |
| 38 | + end |
| 39 | + |
| 40 | + return freq[1][1] -- Return value of only element in freq array |
| 41 | +end |
| 42 | + |
| 43 | +local function _create_codebook(node, codebook, code) |
| 44 | + if not node then |
| 45 | + return |
| 46 | + elseif type(node) == "string" then |
| 47 | + codebook[node] = code -- if node is a leaf then add it to codebook |
| 48 | + else |
| 49 | + _create_codebook(node[1], codebook, code .. "0") -- Left side |
| 50 | + _create_codebook(node[2], codebook, code .. "1") -- Right side |
| 51 | + end |
| 52 | +end |
| 53 | + |
| 54 | +function create_codebook(tree) |
| 55 | + local codebook = {} |
| 56 | + _create_codebook(tree, codebook, "") |
| 57 | + return codebook |
| 58 | +end |
| 59 | + |
| 60 | +function huffman_encode(codebook, message) |
| 61 | + local encoded_chars = {} |
| 62 | + for c in message:gmatch"." do -- Iterate over each character in message |
| 63 | + encoded_chars[#encoded_chars + 1] = codebook[c] |
| 64 | + end |
| 65 | + return table.concat(encoded_chars) -- table.concat to avoid slow string bufferin |
| 66 | +end |
| 67 | + |
| 68 | +local function _huffman_decode(node, bitstring, i) |
| 69 | + if type(node) == "string" then |
| 70 | + return node, i -- If it's a leaf node then return the value along with the next bit to read |
| 71 | + end |
| 72 | + if bitstring:sub(i, i) == "0" then |
| 73 | + return _huffman_decode(node[1], bitstring, i + 1) -- If it's 0 traverse down the left side |
| 74 | + elseif bitstring:sub(i, i) == "1" then |
| 75 | + return _huffman_decode(node[2], bitstring, i + 1) -- If it's 1 traverse down the right side |
| 76 | + end |
| 77 | +end |
| 78 | + |
| 79 | +function huffman_decode(tree, bitstring) |
| 80 | + -- i is the current position in the bitstring, we can track which bit we are to look at next without using string.sub |
| 81 | + local decoded_chars, i = {}, 1 |
| 82 | + while i <= #bitstring do |
| 83 | + decoded_chars[#decoded_chars + 1], i = _huffman_decode(tree, bitstring, i) |
| 84 | + end |
| 85 | + |
| 86 | + return table.concat(decoded_chars) |
| 87 | +end |
| 88 | + |
| 89 | +local message = "bibbity_bobbity" |
| 90 | + |
| 91 | +local tree = build_huffman_tree(message) |
| 92 | +local codebook = create_codebook(tree) |
| 93 | + |
| 94 | +local bitstring = huffman_encode(codebook, message) |
| 95 | +print("Encoded: " .. bitstring) |
| 96 | + |
| 97 | +print("Decoded: " .. huffman_decode(tree, bitstring)) |
0 commit comments