pipes file parse II

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE OverloadedStrings  #-}
{-# LANGUAGE NoMonomorphismRestriction#-}
{-# LANGUAGE BangPatterns#-}

import           Blaze.ByteString.Builder  (Builder, fromByteString, toByteString)
import           Control.Exception         (Exception)
import           Control.Monad.Trans.Class (lift)
import           Data.ByteString           (ByteString)
import qualified Data.ByteString           as S
import qualified Data.ByteString.Lazy      as L
import           Data.Monoid
import           Data.Text                 (Text)
import qualified Data.Text                 as T
import qualified Data.Text.Encoding        as TEE
import qualified Data.Text.Lazy            as TL
import qualified Data.Text.Lazy.Encoding   as TLE
import           Data.Typeable             (Typeable)

import Pipes
import Pipes.Parse
import qualified Pipes.Prelude as PP
import qualified Pipes.ByteString as PB
import qualified Pipes.Text as PT
import Control.Lens
import Control.Monad
import qualified System.IO as IO
import Control.Monad.Trans.Maybe
import Control.Monad.State.Class

input :: [File]
input =
    [ File "utf8.txt" $ TEE.encodeUtf8 "This file is in UTF-8"
    , File "utf16.txt" $ TEE.encodeUtf16LE "This file is in UTF-16"
    , File "binary.dat" "we'll pretend to be binary"
    ]
 
data File = File
    { fileName     :: !Text
    , fileContents :: !ByteString
    }
    deriving Show

encodeFile :: File -> Builder
encodeFile (File name contents) =
    tellLen (T.length name) <>
    fromByteString (TEE.encodeUtf8 name) <>
    tellLen (S.length contents) <>
    fromByteString contents
  where
    tellLen i = fromByteString $ TEE.encodeUtf8 $ T.pack $ shows i ":"

encodeFiles :: [File] -> Builder
encodeFiles = mconcat . map encodeFile

parseText :: Monad m => MaybeT (StateT (Producer Text m x) m) (Text, Int)
parseText = do nameLength    <- parseNumber
               names         <- zoom (PT.splitAt nameLength) $ lift drawAll 
               contentLength <- parseNumber
               return (T.concat names, contentLength)

parseFile :: Monad m => MaybeT (StateT (Producer ByteString m x) m) File
parseFile  = do (name, len) <- zoom (PB.splitAt 50 . PT.decodeUtf8) parseText
                contents    <- zoom (PB.splitAt len) $ lift drawAll
                return (File name (S.concat contents))

parseNumber :: Monad m => MaybeT (StateT (Producer Text m x) m) Int
parseNumber = loop 0 where
   loop !n = do c <- MaybeT  PT.drawChar
                case c of ':' -> return n
                          _   -> do guard ('0' <= c && c <= '9') 
                                    loop $! n * 10 + (fromEnum c - fromEnum '0')

parseFiles p = do (m,p') <- lift (runStateT (runMaybeT parseFile) p)
                  case m of Nothing -> return p'
                            Just file -> do yield file
                                            parseFiles p'

main :: IO ()
main = IO.withBinaryFile fp IO.ReadMode $ \h ->
            do p' <- runEffect $ parseFiles (PB.fromHandle h) >-> PP.print
               runEffect $ p' >-> PP.print
 where fp = "encoded.pharse"