parsing proxy noodling

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
{-#LANGUAGE OverloadedStrings#-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}

import Control.Applicative
import Control.Monad.Trans.State
import Control.Proxy
import Data.Sequence hiding (empty, take, drop)
import qualified Data.Sequence as S
import Data.Text (Text)
import qualified Data.Text as T
import Prelude hiding (take, takeWhile)

type Input a = Seq (Maybe a)
type Responder p a m  = RespondT p () (Maybe a) (Input a) m 

newtype ParseT p a m r = ParseT
    { unParseT :: StateT (Input a)              -- state is Sequence of maybes 
                         (Responder p a m)      -- transformed monad
                         r }                    -- return type of parser
    deriving (Functor, Applicative, Monad)

-- newtype RespondT p a' a b' m b = RespondT {runRespondT :: p a' a b' b m b'}
-- i.e. the underlying pipe returns what it gets from downstream (b').
-- But here it is newtyped so that the return type is what the underlying pipe 
-- sends downstream (b)

-- The silly synonym Responder p a m r is thus
-- newtype RespondT p () (Maybe a) (Input a) m r = 
    --    RespondT {runRespondT :: p () (Maybe a) (Input a) r m (Input a)}
    
    --    RespondT {runRespondT :: p  ()      (Maybe a)     <- Upstream
    --                                  
    --                                ^          |
    --                                |          v
     
    --                               (Input a)   r          <- DownStream
    
    --                                                      m (Input a)}   <- return type

    -- So, finally, ParseT p a m r is basically
    -- Input a -> p () (Maybe a) (Input a) (r,Input a) m (Input a)  -- not quite?
    -- since StateT is acting on r, the return value of the newtyped proxy
    -- In the underlying pipe, respond puts (r,Input a) and  gets Input a , i.e. a Seq (Maybe a)
    -- the values r that are 'flowing downstream' are the intuitive parsed values
    -- why it recieves Maybe a from 'upstream' I'm not seeing at the moment


-- dubiously useful anti-boilerplate wrapper and unwrappers
parser stepper     = ParseT $ StateT $ \s -> RespondT $ runIdentityP (stepper s)
runParser p state  = IdentityP $ runRespondT $ runStateT  (unParseT p) state
evalParser p state = IdentityP $ runRespondT $ evalStateT (unParseT p) state


instance (Monad m, Proxy p) => Alternative (ParseT p a m) where
    empty     = parser $ \_ -> return S.empty 
    p1 <|> p2 = parser $ \s -> do d1 <- runParser p1 s
                                  d2 <- runParser p2 (s >< d1)
                                  return (d1 >< d2)

chunkMaybe :: (Monad m, Proxy p) => ParseT p a m (Maybe a)
chunkMaybe = parser chunking where 
  chunking input = case viewl input of 
     EmptyL ->  do ma <- request ()  -- request returns Maybe a in any case with ParseT p a
                   case ma of Nothing -> do more <- respond (ma, singleton ma)
                                -- respond yields up an (intuitive parsed, Seq (Maybe a))
                                -- (thus here Nothing and a singleton Seq)
                                -- it gets back an input i.e. Seq (Maybe a)
                                            return (ma <| more) 
                              _       -> do more <- respond (ma, input)
                                            return (ma <| more)

     ma:<mas ->    case ma of Nothing -> respond (ma, input)
                              _       -> respond (ma, mas)


returnChunk :: (Monad m, Proxy p) =>  a -> ParseT p a m ()
returnChunk a = ParseT $ modify (Just a <|)
returnChunk' :: (Monad m, Proxy p) =>  a -> ParseT p a m ()
returnChunk' a = parser $ \sequ ->  return (Just a <| sequ)


runParseT :: (Monad m, Proxy p) => ParseT p a m r -> () -> p () (Maybe a)
                                                             ()  r         m ()
runParseT p () = runIdentityP $ do
    evalParser p S.empty //> \r -> do
        respond r
        return S.empty
    return ()


-- chunk one chunk of input or fail with 'empty' if at end of file
chunk :: (Monad m, Proxy p) => ParseT p a m a  
chunk = do
    ma <- chunkMaybe
    case ma of
        Nothing -> empty -- i.e. parser $ \_ -> return S.empty
        Just a  -> return a


-- take exactly N characters of Text
take :: (Monad m, Proxy p) => Int -> ParseT p Text m Text
take n = do txt <- chunk
            let len = T.length txt
            if len >= n
            then case T.splitAt n txt of (prefix, suffix) -> do returnChunk suffix
                                                                return prefix
            else do txt' <- take (n - len)
                    return (T.append txt txt')


-- Take as many characters that satisfy a predicate as possible
takeWhile :: (Monad m, Proxy p) => (Char -> Bool) -> ParseT p Text m Text
takeWhile predicate = do
    txt <- chunk
    case T.span predicate txt of 
       (prefix, suffix) -> if T.null suffix
                              then do txt' <- takeWhile predicate
                                      return (T.append txt txt')
                              else do returnChunk suffix
                                      return prefix


-- Match a specific string
match :: (Monad m, Proxy p) => Text -> ParseT p Text m Text
match txt = do txt' <- take (T.length txt)
               if txt == txt' then return txt' else empty


-- Like 'many', except returns results in reverse
-- This is useful for incremental parsing
few :: (Alternative f) => f a -> f [a]
few fa = pure [] <|> ((:) <$> fa <*> few fa)


parseSomething :: (Monad m, Proxy p) => ParseT p Text m [Text]
parseSomething = many $ do
    match "<a>"
    x <- takeWhile (\c -> not (c == '<'))
    match "</a>"
    return x
    
-- ll we're missing is a sample incremental text source 
-- (I'd ordinarily use an incremental file reader, but I still haven't released a 
-- Text library for pipes yet):
-- Pretends to be an impure source of Text values
textSource :: (Proxy p) => () -> Producer p (Maybe Text) IO ()
textSource = fromListS
    [ Just "<a>"
    , Just "Element1</a"
    , Just "><a>Element2"
    , Just "</a><a>"
    , Just "Element3</a><a>Element4</a>"
    , Nothing
    ]
    
ex1 :: IO ()
ex1  =  runProxy $ textSource >-> printD >-> runParseT parseSomething >-> printD
ex1b =  runProxy $ textSource >-> printD >-> runParseT parseSome_a >-> printD

parseSome_a :: (Monad m, Proxy p) => ParseT p Text m [Text]
parseSome_a = do
    xs <- few a_element
    x  <- a_element
    let n = read $ drop 7 $ T.unpack x
    if even n then return (xs ++ [x]) else empty

a_element :: (Monad m, Proxy p) => ParseT p Text m Text
a_element = do
        match "<a>"
        x <- takeWhile (\c -> not (c == '<'))
        match "</a>"
        return x

userInput :: (Proxy p) => () -> Producer p (Maybe Text) IO ()
userInput () = runIdentityP $ do unlessQuitText ()
                                 respond Nothing   
  where unlessQuitText = stdinS >-> takeWhileD (/= "quit") >-> mapD (Just . T.pack)
  
ex2 :: IO ()
ex2 = runProxy $ userInput >-> runParseT parseSome_a >-> printD
137:5: Warning: Reduce duplication
Found:
match "<a>"
x <- takeWhile (\ c -> not (c == '<'))
match "</a>"
return x
Why not:
Combine with /tmp/87594.hs:169:9
138:27: Error: Use /=
Found:
not (c == '<')
Why not:
c /= '<'
Note: incorrect if either value is NaN
161:1: Warning: Use camelCase
Found:
parseSome_a = ...
Why not:
parseSomeA = ...
168:1: Warning: Use camelCase
Found:
a_element = ...
Why not:
aElement = ...
170:31: Error: Use /=
Found:
not (c == '<')
Why not:
c /= '<'
Note: incorrect if either value is NaN