|
| 1 | +defmodule SnappyEx do |
| 2 | + @moduledoc """ |
| 3 | + Encoder/decoder implementation for the [Snappy framing format](https://github.com/google/snappy/blob/main/framing_format.txt) |
| 4 | + """ |
| 5 | + import Bitwise |
| 6 | + |
| 7 | + @bit_mask_32 2 ** 32 - 1 |
| 8 | + @chunk_size_limit 65_540 |
| 9 | + |
| 10 | + @stream_identifier "sNaPpY" |
| 11 | + |
| 12 | + @id_compressed_data 0x00 |
| 13 | + @id_uncompressed_data 0x01 |
| 14 | + @id_padding 0xFE |
| 15 | + @id_stream_identifier 0xFF |
| 16 | + |
| 17 | + @ids_payload_chunks [@id_compressed_data, @id_uncompressed_data] |
| 18 | + @ids_reserved_unskippable_chunks 0x02..0x7F |
| 19 | + @ids_reserved_skippable_chunks 0x80..0xFD |
| 20 | + |
| 21 | + ########################## |
| 22 | + ### Public API |
| 23 | + ########################## |
| 24 | + |
| 25 | + @doc """ |
| 26 | + Compresses the given data. |
| 27 | + Returns the compressed data. |
| 28 | +
|
| 29 | + ## Examples |
| 30 | +
|
| 31 | + iex> SnappyEx.compress("") |
| 32 | + <<0xFF, 6::little-size(24)>> <> "sNaPpY" |
| 33 | + """ |
| 34 | + @spec compress(binary()) :: binary() |
| 35 | + def compress(data) when is_binary(data) do |
| 36 | + # TODO: implement |
| 37 | + <<@id_stream_identifier, 6::little-size(24)>> <> @stream_identifier |
| 38 | + end |
| 39 | + |
| 40 | + @doc """ |
| 41 | + Uncompresses a given stream. |
| 42 | + Returns a result tuple with the uncompressed data or an error message. |
| 43 | +
|
| 44 | + ## Examples |
| 45 | +
|
| 46 | + iex> SnappyEx.decompress(<<0xFF, 6::little-size(24)>> <> "sNaPpY") |
| 47 | + {:ok, ""} |
| 48 | + """ |
| 49 | + @spec decompress(nonempty_binary()) :: {:ok, binary()} | {:error, String.t()} |
| 50 | + def decompress(<<@id_stream_identifier>> <> _ = chunks), do: decompress_frames(chunks, <<>>) |
| 51 | + |
| 52 | + @spec decompress(<<>>) :: {:error, String.t()} |
| 53 | + def decompress(chunks) when is_binary(chunks), do: {:error, "no stream identifier at beginning"} |
| 54 | + |
| 55 | + @spec compute_checksum(binary()) :: Types.uint32() |
| 56 | + def compute_checksum(data) when is_binary(data) do |
| 57 | + checksum = Crc32c.calc!(data) |
| 58 | + |
| 59 | + # the crc32c checksum of the uncompressed data is masked before inserted into the |
| 60 | + # frame using masked_checksum = ((checksum >> 15) | (checksum << 17)) + 0xa282ead8 |
| 61 | + (checksum >>> 15 ||| checksum <<< 17) + 0xA282EAD8 &&& @bit_mask_32 |
| 62 | + end |
| 63 | + |
| 64 | + ########################## |
| 65 | + ### Private Functions |
| 66 | + ########################## |
| 67 | + |
| 68 | + defp decompress_frames("", acc), do: {:ok, acc} |
| 69 | + |
| 70 | + defp decompress_frames(chunks, acc) do |
| 71 | + with {:ok, {id, data, remaining_chunks}} <- process_chunk_metadata(chunks), |
| 72 | + {:ok, new_acc} <- parse_chunk(acc, id, data) do |
| 73 | + decompress_frames(remaining_chunks, new_acc) |
| 74 | + end |
| 75 | + end |
| 76 | + |
| 77 | + # chunk layout: 1-byte chunk_id, 3-bytes chunk_size, remaining chunks if any data present. |
| 78 | + defp process_chunk_metadata(chunks) when byte_size(chunks) < 4, |
| 79 | + do: {:error, "header too small"} |
| 80 | + |
| 81 | + defp process_chunk_metadata(<<_id::size(8), size::little-size(24), rest::binary>>) |
| 82 | + when byte_size(rest) < size, |
| 83 | + do: {:error, "missing data in chunk. expected: #{byte_size(rest)}. got: #{size}"} |
| 84 | + |
| 85 | + defp process_chunk_metadata(<<id::size(8), size::little-size(24), rest::binary>>) do |
| 86 | + <<chunk::binary-size(size), remaining_chunks::binary>> = rest |
| 87 | + {:ok, {id, chunk, remaining_chunks}} |
| 88 | + end |
| 89 | + |
| 90 | + # Stream identifier |
| 91 | + # NOTE: it can appear more than once, and must be validated each time |
| 92 | + defp parse_chunk(acc, @id_stream_identifier, @stream_identifier), do: {:ok, acc} |
| 93 | + defp parse_chunk(_, @id_stream_identifier, _), do: {:error, "invalid stream identifier"} |
| 94 | + |
| 95 | + # Data-carrying chunks (compressed or uncompressed) |
| 96 | + defp parse_chunk(_acc, id, data) |
| 97 | + when id in @ids_payload_chunks and byte_size(data) > @chunk_size_limit, |
| 98 | + do: {:error, "chunk is bigger than limit"} |
| 99 | + |
| 100 | + defp parse_chunk(acc, id, data) when id in @ids_payload_chunks do |
| 101 | + <<checksum::little-size(32), compressed_data::binary>> = data |
| 102 | + |
| 103 | + with {:ok, uncompressed_data} <- decompress_payload(id, compressed_data), |
| 104 | + :ok <- verify_checksum(uncompressed_data, checksum) do |
| 105 | + {:ok, <<acc::binary, uncompressed_data::binary>>} |
| 106 | + end |
| 107 | + end |
| 108 | + |
| 109 | + # Skippable chunks (padding or reserved) |
| 110 | + defp parse_chunk(acc, id, _data) |
| 111 | + when id == @id_padding or id in @ids_reserved_skippable_chunks, |
| 112 | + do: {:ok, acc} |
| 113 | + |
| 114 | + # Reserved unskippable chunks |
| 115 | + defp parse_chunk(_acc, id, _data) when id in @ids_reserved_unskippable_chunks, |
| 116 | + do: {:error, "unskippable chunk of type: #{id}"} |
| 117 | + |
| 118 | + defp decompress_payload(@id_compressed_data, data), do: :snappyer.decompress(data) |
| 119 | + defp decompress_payload(@id_uncompressed_data, data), do: {:ok, data} |
| 120 | + |
| 121 | + defp verify_checksum(data, checksum) do |
| 122 | + if checksum == compute_checksum(data), |
| 123 | + do: :ok, |
| 124 | + else: {:error, "invalid checksum"} |
| 125 | + end |
| 126 | +end |
0 commit comments