Ugly? compression using huffman coding

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
{-# LANGUAGE OverloadedStrings #-}

module Compression (compress) where

import Control.Monad
import Control.Monad.ST

import Data.Bits
import Data.Vector (fromList, (!), Vector())
import Data.Word

import Data.Array.BitArray.ST hiding (map)
import Data.Array.BitArray.ByteString

import qualified Data.ByteString as B
import Data.ByteString (ByteString)

compress :: ByteString -> ByteString
compress bs = toByteString $ runST $ do
    arr <- newArray_ (0, (B.length bs + 1) * 11) -- worst case every Word8 is encoded as 11 bits
    (arr', len, bits) <- foldM writeWord (arr, 0, 7) $ map ((table !) . fromIntegral) $ B.unpack bs
    (arr'', len', bits') <- writeWord (arr', len, bits) $ table ! 256

    freeze =<< mapIndices (0, len' * 8 + bits') id arr''

-- (array, byte index, bit index) (bits remaining, word to encode bits from)
writeWord :: (STBitArray s Int, Int, Int) -> (Int, Word16)
    -> ST s (STBitArray s Int, Int, Int)
writeWord (a, i, j) (0, _) = return (a, i, j)
writeWord (a, i, j) (r, w) = do
    writeArray a (i * 8 + j) $ testBit w (r - 1)
    if j == 0
        then writeWord (a, succ i, 7) (pred r, w)
        else writeWord (a, i, pred j) (pred r, w)

-- (number of bits, data)
table :: Vector (Int, Word16)
table = fromList [
        (0x02, 0x00),   (0x05, 0x1F),   (0x06, 0x22),   (0x07, 0x34),
        (0x07, 0x75),   (0x06, 0x28),   (0x06, 0x3B),   (0x07, 0x32),
        (0x08, 0xE0),   (0x08, 0x62),   (0x07, 0x56),   (0x08, 0x79),
        (0x09, 0x19D),  (0x08, 0x97),   (0x06, 0x2A),   (0x07, 0x57),
        (0x08, 0x71),   (0x08, 0x5B),   (0x09, 0x1CC),  (0x08, 0xA7),
        (0x07, 0x25),   (0x07, 0x4F),   (0x08, 0x66),   (0x08, 0x7D),
        (0x09, 0x191),  (0x09, 0x1CE),  (0x07, 0x3F),   (0x09, 0x90),
        (0x08, 0x59),   (0x08, 0x7B),   (0x08, 0x91),   (0x08, 0xC6),
        (0x06, 0x2D),   (0x09, 0x186),  (0x08, 0x6F),   (0x09, 0x93),
        (0x0A, 0x1CC),  (0x08, 0x5A),   (0x0A, 0x1AE),  (0x0A, 0x1C0),
        (0x09, 0x148),  (0x09, 0x14A),  (0x09, 0x82),   (0x0A, 0x19F),
        (0x09, 0x171),  (0x09, 0x120),  (0x09, 0xE7),   (0x0A, 0x1F3),
        (0x09, 0x14B),  (0x09, 0x100),  (0x09, 0x190),  (0x06, 0x13),
        (0x09, 0x161),  (0x09, 0x125),  (0x09, 0x133),  (0x09, 0x195),
        (0x09, 0x173),  (0x09, 0x1CA),  (0x09, 0x86),   (0x09, 0x1E9),
        (0x09, 0xDB),   (0x09, 0x1EC),  (0x09, 0x8B),   (0x09, 0x85),
        (0x05, 0x0A),   (0x08, 0x96),   (0x08, 0x9C),   (0x09, 0x1C3),
        (0x09, 0x19C),  (0x09, 0x8F),   (0x09, 0x18F),  (0x09, 0x91),
        (0x09, 0x87),   (0x09, 0xC6),   (0x09, 0x177),  (0x09, 0x89),
        (0x09, 0xD6),   (0x09, 0x8C),   (0x09, 0x1EE),  (0x09, 0x1EB),
        (0x09, 0x84),   (0x09, 0x164),  (0x09, 0x175),  (0x09, 0x1CD),
        (0x08, 0x5E),   (0x09, 0x88),   (0x09, 0x12B),  (0x09, 0x172),
        (0x09, 0x10A),  (0x09, 0x8D),   (0x09, 0x13A),  (0x09, 0x11C),
        (0x0A, 0x1E1),  (0x0A, 0x1E0),  (0x09, 0x187),  (0x0A, 0x1DC),
        (0x0A, 0x1DF),  (0x07, 0x74),   (0x09, 0x19F),  (0x08, 0x8D),
        (0x08, 0xE4),   (0x07, 0x79),   (0x09, 0xEA),   (0x09, 0xE1),
        (0x08, 0x40),   (0x07, 0x41),   (0x09, 0x10B),  (0x09, 0xB0),
        (0x08, 0x6A),   (0x08, 0xC1),   (0x07, 0x71),   (0x07, 0x78),
        (0x08, 0xB1),   (0x09, 0x14C),  (0x07, 0x43),   (0x08, 0x76),
        (0x07, 0x66),   (0x07, 0x4D),   (0x09, 0x8A),   (0x06, 0x2F),
        (0x08, 0xC9),   (0x09, 0xCE),   (0x09, 0x149),  (0x09, 0x160),
        (0x0A, 0x1BA),  (0x0A, 0x19E),  (0x0A, 0x39F),  (0x09, 0xE5),
        (0x09, 0x194),  (0x09, 0x184),  (0x09, 0x126),  (0x07, 0x30),
        (0x08, 0x6C),   (0x09, 0x121),  (0x09, 0x1E8),  (0x0A, 0x1C1),
        (0x0A, 0x11D),  (0x0A, 0x163),  (0x0A, 0x385),  (0x0A, 0x3DB),
        (0x0A, 0x17D),  (0x0A, 0x106),  (0x0A, 0x397),  (0x0A, 0x24E),
        (0x07, 0x2E),   (0x08, 0x98),   (0x0A, 0x33C),  (0x0A, 0x32E),
        (0x0A, 0x1E9),  (0x09, 0xBF),   (0x0A, 0x3DF),  (0x0A, 0x1DD),
        (0x0A, 0x32D),  (0x0A, 0x2ED),  (0x0A, 0x30B),  (0x0A, 0x107),
        (0x0A, 0x2E8),  (0x0A, 0x3DE),  (0x0A, 0x125),  (0x0A, 0x1E8),
        (0x09, 0xE9),   (0x0A, 0x1CD),  (0x0A, 0x1B5),  (0x09, 0x165),
        (0x0A, 0x232),  (0x0A, 0x2E1),  (0x0B, 0x3AE),  (0x0B, 0x3C6),
        (0x0B, 0x3E2),  (0x0A, 0x205),  (0x0A, 0x29A),  (0x0A, 0x248),
        (0x0A, 0x2CD),  (0x0A, 0x23B),  (0x0B, 0x3C5),  (0x0A, 0x251),
        (0x0A, 0x2E9),  (0x0A, 0x252),  (0x09, 0x1EA),  (0x0B, 0x3A0),
        (0x0B, 0x391),  (0x0A, 0x23C),  (0x0B, 0x392),  (0x0B, 0x3D5),
        (0x0A, 0x233),  (0x0A, 0x2CC),  (0x0B, 0x390),  (0x0A, 0x1BB),
        (0x0B, 0x3A1),  (0x0B, 0x3C4),  (0x0A, 0x211),  (0x0A, 0x203),
        (0x09, 0x12A),  (0x0A, 0x231),  (0x0B, 0x3E0),  (0x0A, 0x29B),
        (0x0B, 0x3D7),  (0x0A, 0x202),  (0x0B, 0x3AD),  (0x0A, 0x213),
        (0x0A, 0x253),  (0x0A, 0x32C),  (0x0A, 0x23D),  (0x0A, 0x23F),
        (0x0A, 0x32F),  (0x0A, 0x11C),  (0x0A, 0x384),  (0x0A, 0x31C),
        (0x0A, 0x17C),  (0x0A, 0x30A),  (0x0A, 0x2E0),  (0x0A, 0x276),
        (0x0A, 0x250),  (0x0B, 0x3E3),  (0x0A, 0x396),  (0x0A, 0x18F),
        (0x0A, 0x204),  (0x0A, 0x206),  (0x0A, 0x230),  (0x0A, 0x265),
        (0x0A, 0x212),  (0x0A, 0x23E),  (0x0B, 0x3AC),  (0x0B, 0x393),
        (0x0B, 0x3E1),  (0x0A, 0x1DE),  (0x0B, 0x3D6),  (0x0A, 0x31D),
        (0x0B, 0x3E5),  (0x0B, 0x3E4),  (0x0A, 0x207),  (0x0B, 0x3C7),
        (0x0A, 0x277),  (0x0B, 0x3D4),  (0x08, 0xC0),   (0x0A, 0x162),
        (0x0A, 0x3DA),  (0x0A, 0x124),  (0x0A, 0x1B4),  (0x0A, 0x264),
        (0x0A, 0x33D),  (0x0A, 0x1D1),  (0x0A, 0x1AF),  (0x0A, 0x39E),
        (0x0A, 0x24F),  (0x0B, 0x373),  (0x0A, 0x249),  (0x0B, 0x372),
        (0x09, 0x167),  (0x0A, 0x210),  (0x0A, 0x23A),  (0x0A, 0x1B8),
        (0x0B, 0x3AF),  (0x0A, 0x18E),  (0x0A, 0x2EC),  (0x07, 0x62),
        (0x04, 0x0D) ]