SQL Parser

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
module SQLParser ( SqlStatement, p_sql ) where

import Text.Parsec.Token
import Text.Parsec.Language
import Text.Parsec.Expr
import Control.Applicative
import Control.Monad (MonadPlus(..), ap)
import Text.ParserCombinators.Parsec hiding (many, optional, (<|>), Column)


data ColumnIdent = ColumnIdent {
      colIdTable :: Maybe String
    , colIdName  :: String
    , colAlias   :: Maybe String
} deriving Show

data ColumnType = IntT
                | DoubleT
                | StringT
                | CharT
                | BoolT
  deriving Show

data ColumnDef = ColumnDef String ColumnType
  deriving Show

data TableIdent = TableIdent {
      tabName  :: String
    , tabAlias :: Maybe String
} deriving Show

data Star = Star deriving Show

data Expression = IntLiteral Integer
                | FloatLiteral Double
                | StringLiteral String
                | CharLiteral Char
                | BoolLiteral Bool
                | Null
                | Pow Expression Expression   -- ^
                | Mul Expression Expression   -- *
                | Div Expression Expression   -- /
                | Mod Expression Expression   -- %
                | Plus Expression Expression  -- +
                | Minus Expression Expression -- -
                | Lt Expression Expression    -- <
                | Gt Expression Expression    -- >
                | Lte Expression Expression   -- <=
                | Gte Expression Expression   -- >=
                | Eq Expression Expression    -- =
                | Is Expression Expression    -- IS
                | And Expression Expression   -- AND
                | Or Expression Expression    -- OR
                | Pos Expression              -- Unary positive
                | Neg Expression              -- Unary complement
                | Not Expression              -- Logical complement
                | ColumnVar ColumnIdent       -- column reference
  deriving Show

data SqlStatement = Select { selectColumns     :: Either Star [ColumnIdent]
                           , fromClause        :: [TableIdent]
                           , selectWhereClause :: Maybe Expression
                    }
                  | Update { updateTable       :: TableIdent
                           , setClause         :: [(String, Expression)]
                           , updateWhereClause :: Maybe Expression
                    }
                  | Insert { insertTable   :: String
                           , insertColumns :: Maybe [String]
                           , insertValues  :: [[Expression]]
                    }
                  | Delete { deleteTable       :: TableIdent
                           , deleteWhereClause :: Maybe Expression
                    }
                  | Create { createTable   :: String
                           , createColumns :: [ColumnDef]
                    }
                  | Drop { existCheck :: Bool
                         , dropTables :: [String]
                    }
  deriving Show

sqlStyle :: LanguageDef st
sqlStyle = emptyDef{ commentStart = "/*"
                   , commentEnd = "*/"
                   , nestedComments = True
                   , identStart = letter
                   , identLetter = alphaNum <|> char '_'
                   , reservedOpNames = [ ".", "+", "-", "^", "*"
                                       , "/", "%", "<", ">", "="]
                   , reservedNames = [ "SELECT", "FROM", "WHERE", "AS" -- Select Statement
                                     , "IS" , "NOT", "AND", "OR"       -- Operators
                                     , "TRUE", "FALSE", "NULL"         -- Literals
                                     , "CREATE", "TABLE"               -- Create Statement
                                     , "INT", "DOUBLE", "STRING"       -- Data Types
                                     , "CHAR" , "BOOL"
                                     , "UPDATE", "SET"                 -- Update Statement
                                     , "INSERT" , "INTO", "VALUES"     -- Insert Statement
                                     , "DROP", "TABLE", "IF", "EXISTS" -- Drop Statement
                                     ]
                   , caseSensitive = False
}

TokenParser{ identifier     = p_identifier
           , reserved       = p_reserved
           , reservedOp     = p_reservedOp
           , symbol         = p_symbol
           , lexeme         = p_lexeme
           , parens         = p_parens
           , naturalOrFloat = p_numeric
           , stringLiteral  = p_stringLiteral
           , charLiteral    = p_charLiteral
           , dot            = p_dot
           , comma          = p_comma
           , whiteSpace     = p_whiteSpace
           , semi           = p_semi
} = makeTokenParser sqlStyle

p_alias :: Parser String
p_alias = p_reserved "AS" *> p_identifier <?> "alias"

p_columnIdent :: Parser ColumnIdent
p_columnIdent = try p_tableAndColumn
            <|> p_columnOnly
            <?> "column"
    where p_tableAndColumn = ColumnIdent <$> (Just <$> p_identifier)
                            <*> (p_dot *> p_identifier)
                            <*> optionMaybe p_alias
          p_columnOnly = ColumnIdent Nothing <$> p_identifier
                        <*> optionMaybe p_alias

p_tableIdent :: Parser TableIdent
p_tableIdent = TableIdent <$> p_identifier
           <*> optionMaybe p_alias
           <?> "table"

{-
   | Operator  | Associativity | Description                        |
   | _________ | _____________ | __________________________________ |
   | + -       | right         | unary plus, unary minus            |
   | ^         | left          | exponentiation                     |
   | * / %     | left          | multiplication, division, modulo   |
   | + -       | left          | addition, subtraction              |
   | IS        | left          | IS TRUE, IS FALSE, IS NULL, etc.   |
   | < > <= >= | left          | less than, greater than            |
   | =         | right         | equality                           |
   | NOT       | right         | logical negation                   |
   | AND       | left          | logical conjunction                |
   | OR        | left          | logical disjunction                |
 -}

opTable = [ [ Prefix (Neg   <$ p_reservedOp    "-")
            , Prefix (Pos   <$ p_reservedOp    "+") ]
          , [ Infix  (Pow   <$ p_reservedOp    "^") AssocLeft ]
          , [ Infix  (Mul   <$ p_reservedOp    "*") AssocLeft
            , Infix  (Div   <$ p_reservedOp    "/") AssocLeft
            , Infix  (Mod   <$ p_reservedOp    "%") AssocLeft ]
          , [ Infix  (Plus  <$ p_reservedOp    "+") AssocLeft
            , Infix  (Minus <$ p_reservedOp    "-") AssocLeft ]
          , [ Infix  (Is    <$ p_reserved     "IS") AssocLeft ]
          , [ Infix  (Lt    <$ p_reservedOp    "<") AssocLeft
            , Infix  (Gt    <$ p_reservedOp    ">") AssocLeft
            , Infix  (Lte   <$ p_reservedOp   "<=") AssocLeft
            , Infix  (Gte   <$ p_reservedOp   ">=") AssocLeft ]
          , [ Infix  (Eq    <$ p_reservedOp    "=") AssocRight ]
          , [ Prefix (Not   <$ p_reserved    "NOT") ]
          , [ Infix  (And   <$ p_reserved    "AND") AssocLeft ]
          , [ Infix  (Or    <$ p_reserved     "Or") AssocLeft ]
        ]

p_expression :: Parser Expression
p_expression = buildExpressionParser opTable p_expression' <?> "expression"
    where p_expression' = p_parens p_expression
                      <|> ColumnVar <$> p_columnIdent
                      <|> extractNum <$> p_numeric
                      <|> StringLiteral <$> p_stringLiteral
                      <|> CharLiteral <$> p_charLiteral
                      <|> BoolLiteral True <$ p_reserved "TRUE"
                      <|> BoolLiteral False <$ p_reserved "FALSE"
                      <|> Null <$ p_reserved "NULL"
          extractNum (Left x) = IntLiteral x
          extractNum (Right x) = FloatLiteral x

-- Warning: here it doesn't handle the "inner/left outer [...] join [...] on [...]" clause 
-- which has been added to the SQL standard
-- Nonetheless it is a great start for SQL parsing as well as Parsec training. 
-- Many thanks to the original author.
p_select :: Parser SqlStatement
p_select = Select <$> (p_reserved "SELECT" *> p_selectColumns)
       <*> (p_reserved "FROM" *> sepBy1 p_tableIdent p_comma)
       <*> optionMaybe (p_reserved "WHERE" *> p_expression)
       <?> "select statement"
  where p_selectColumns :: Parser (Either Star [ColumnIdent])
        p_selectColumns = Left Star <$ p_reservedOp "*"
                      <|> Right <$> sepBy1 p_columnIdent p_comma
                      <?> "column list"

{- Note: p_columnType will have to left factor the parser or use the try combinator
         if data types are added that have common leading substrings.-}
p_columnType :: Parser ColumnType
p_columnType = IntT    <$ p_reserved "INT"
           <|> DoubleT <$ p_reserved "DOUBLE"
           <|> StringT <$ p_reserved "STRING"
           <|> CharT   <$ p_reserved "CHAR"
           <|> BoolT   <$ p_reserved "BOOL"
           <?> "column type"

p_columnDef :: Parser ColumnDef
p_columnDef = ColumnDef <$> p_identifier
          <*> p_columnType
          <?> "column name and type"

p_create :: Parser SqlStatement
p_create = Create <$> (p_reserved "CREATE" *> p_reserved "TABLE" *> p_identifier)
       <*> p_parens (sepBy1 p_columnDef p_comma)
       <?> "CREATE statement"

p_update :: Parser SqlStatement
p_update = Update <$> (p_reserved "UPDATE" *> p_tableIdent)
       <*> (p_reserved "SET" *> sepBy1 p_setStatement p_comma)
       <*> optionMaybe (p_reserved "WHERE" *> p_expression)
       <?> "update statement"

p_setStatement :: Parser (String, Expression)
p_setStatement = (,) <$> p_identifier <* p_reservedOp "="  <*> p_expression
             <?> "update statement"

p_delete :: Parser SqlStatement
p_delete = Delete <$> (p_reserved "DELETE" *> p_reserved "FROM" *> p_tableIdent)
       <*> optionMaybe (p_reserved "WHERE" *> p_expression)
       <?> "delete statement"

p_insert :: Parser SqlStatement
p_insert = Insert <$> (p_reserved "INSERT" *> p_reserved "INTO" *> p_identifier)
       <*> optionMaybe (p_parens $ sepBy1 p_identifier p_comma)
       <*> (p_reserved "VALUES" *>
           sepBy1 (p_parens $ sepBy1 p_expression p_comma) p_comma)
       <?> "insert statement"

p_drop :: Parser SqlStatement
p_drop = Drop <$> (p_reserved "DROP" *> p_reserved "TABLE"
      *> option False (True <$ p_reserved "IF" <* p_reserved "EXISTS"))
     <*> sepBy1 p_identifier p_comma
     <?> "drop statement"

p_sqlStatement :: Parser SqlStatement
p_sqlStatement = p_select
             <|> p_update
             <|> p_insert
             <|> p_delete
             <|> p_create
             <|> p_drop

p_sql = parse (p_whiteSpace *> many (p_sqlStatement <* p_semi) <* eof)
              "Error while parsing SQL statement"
120:1: Warning: Use camelCase
Found:
p_alias = ...
Why not:
pAlias = ...
123:1: Warning: Use camelCase
Found:
p_columnIdent = ...
Why not:
pColumnIdent = ...
133:1: Warning: Use camelCase
Found:
p_tableIdent = ...
Why not:
pTableIdent = ...
172:1: Warning: Use camelCase
Found:
p_expression = ...
Why not:
pExpression = ...
189:1: Warning: Use camelCase
Found:
p_select = ...
Why not:
pSelect = ...
201:1: Warning: Use camelCase
Found:
p_columnType = ...
Why not:
pColumnType = ...
209:1: Warning: Use camelCase
Found:
p_columnDef = ...
Why not:
pColumnDef = ...
214:1: Warning: Use camelCase
Found:
p_create = ...
Why not:
pCreate = ...
219:1: Warning: Use camelCase
Found:
p_update = ...
Why not:
pUpdate = ...
225:1: Warning: Use camelCase
Found:
p_setStatement = ...
Why not:
pSetStatement = ...
229:1: Warning: Use camelCase
Found:
p_delete = ...
Why not:
pDelete = ...
234:1: Warning: Use camelCase
Found:
p_insert = ...
Why not:
pInsert = ...
241:1: Warning: Use camelCase
Found:
p_drop = ...
Why not:
pDrop = ...
247:1: Warning: Use camelCase
Found:
p_sqlStatement = ...
Why not:
pSqlStatement = ...
254:1: Warning: Use camelCase
Found:
p_sql = ...
Why not:
pSql = ...