compressed Data.Map

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
{-# LANGUAGE RecordWildCards #-}
{- |

'Data.CMap' is a wrapper around 'Data.Map' which transparently
compresses all the values (but not the keys) stored in the map.

To do this, the values must be an instance of the class 'Serialize'
from the the 'cereal' package.

This implementation is just proof-of-concept. In practice, it will
probably require more RAM than Data.Map since each value is compressed
individually.

A better implementation would probably store a common dictionary in
the 'CMap' which would be used when compressing the values. This would
result in a higher overall compression ratio.

-}
module Data.CMap where

import Codec.Compression.GZip ( CompressParams, compressWith
                              , decompress, defaultCompressParams)
import Control.Arrow          (second)
import Data.ByteString.Lazy   (ByteString)
import qualified Data.Map     as Map
import Data.Serialize         (Serialize, encodeLazy, decodeLazy)

------------------------------------------------------------------------------
-- CMap
------------------------------------------------------------------------------

data CMap key value = CMap 
    { params :: CompressParams
    , c_map  :: Map.Map key ByteString 
    }

------------------------------------------------------------------------------
-- fromList
------------------------------------------------------------------------------

-- | create a CMap from a list of key/value pairs
--
-- uses 'defaultCompressParams'
fromList :: (Ord key, Serialize value) =>
            [(key, value)] -- ^ list of key/value pairs
         -> CMap key value
fromList = fromListWith defaultCompressParams


-- | create a CMap from a list of key/value pairs
--
-- similar to 'fromList' but allows you to specify 'CompressParams'
fromListWith :: (Ord key, Serialize value) =>
            CompressParams -- ^ compressor settings
         -> [(key, value)] -- ^ list of key/value pairs
         -> CMap key value
fromListWith p kvs = 
    CMap { params = p
         , c_map  = Map.fromList (map (second $ compressor p) kvs)
         }

------------------------------------------------------------------------------
-- toList
------------------------------------------------------------------------------

-- | convert a 'CMap' back to a list of key/value pairs
toList :: (Serialize value) =>
          CMap key value  -- ^ CMap
       -> [(key, value)]
toList cmap = 
    map (second decoder) $ Map.toList (c_map cmap)

------------------------------------------------------------------------------
-- insert
------------------------------------------------------------------------------

-- | insert a new key/value pair into a 'CMap'
insert :: (Ord key, Serialize value) => 
          key             -- ^ key
       -> value           -- ^ value
       -> CMap key value  -- ^ CMap
       -> CMap key value
insert k v cmap@(CMap{..}) = 
    cmap { c_map = Map.insert k (compressor params v) c_map }

------------------------------------------------------------------------------
-- lookup
------------------------------------------------------------------------------


-- | lookup a key and return the value if a match is found
lookup :: (Ord key, Serialize value) => 
          key             -- ^ key to look for
       -> CMap key value  -- ^ 'CMap' to look in
       -> Maybe value
lookup k CMap{..} =
    fmap decoder $ Map.lookup k c_map

------------------------------------------------------------------------------
-- internal helpers for compressing/decompressing
------------------------------------------------------------------------------

decoder :: (Serialize a) => ByteString -> a
decoder = right . decodeLazy . decompress
    where
      right (Left e)  = error "toList: decode failed. :("
      right (Right v) = v

compressor :: (Serialize a) => CompressParams -> a -> ByteString
compressor params a =
    compressWith params (encodeLazy a)
32:1: Warning: Use camelCase
Found:
data CMap key value = CMap{params :: CompressParams,
c_map :: Map.Map key ByteString}
Why not:
data CMap key value = CMap{params :: CompressParams,
cMap :: Map.Map key ByteString}