Haskell Array Index out of range

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import qualified Text.Parsec.Token as T
import Text.Parsec.Combinator
{--
import Text.Parsec.Char
import Text.Parsec.String
--}
import Text.ParserCombinators.Parsec
import Text.Parsec.Language
import Data.Char
import qualified Data.Map as M
import qualified Data.Vector as V
import Data.Vector hiding (forM_, foldl', (!), map, length)
import Data.Vector.Mutable as MV hiding (length)
import Data.Map hiding (map)
import qualified Data.Array as A
import qualified Data.Array.Unboxed as UA
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Generic.Mutable as GM
import Data.Set as S hiding (map)
import Data.List
import Control.Monad
import Control.Exception
import SVM

type User = Int
type Item = Int
type Rating = Double

type RTuple = (User, Item, Rating)

type UserRating = M.Map Item Rating
type ItemRating = M.Map User Rating

type Vec = V.Vector
--natural = T.natural haskell
--whiteSpace = T.whiteSpace haskell
natural :: Parser Int
natural = fmap (post 0) $ many1 digit
  where
    post ac []     = (ac * 10) 
    post ac [x]    = (ac * 10) + digitToInt x
    post ac (x:xs) = post ((ac * 10) + digitToInt x) xs
whiteSpace = many (oneOf " \t") >> return ()

tuples = do {ts <- sepEndBy tuple (char '\n'); return ts}
--tuples = many tuple
tuple = do
    u <- natural
    whiteSpace
    i <- natural
    whiteSpace
    r <- natural
    whiteSpace
    t <- many1 digit
    return (u, i, fromIntegral r::Double)
    --return (fromIntegral u::Int, fromIntegral i::Int, fromIntegral r::Double)

tuplesFromFile :: FilePath -> IO [RTuple]
tuplesFromFile file = do
    res <- parseFromFile tuples file
    case res of
        Left e -> error $ show e
        Right ts -> return ts

convert :: [RTuple] -> IO (V.Vector UserRating, V.Vector ItemRating)
convert ts = construct where
    (nu, ni) = foldl' maxTuple (0, 0) ts
    maxTuple :: (Int, Int) -> RTuple -> (Int, Int)
    maxTuple (a, b) (i, j, _) = (max a i, max b j)

    construct :: IO (V.Vector UserRating, V.Vector ItemRating)
    construct = do
        urs <- GM.replicate (nu + 1) M.empty
        irs <- GM.replicate (ni + 1) M.empty
        --urs :: MV.MVector m UserRating
        --irs :: MV.MVector m ItemRating
        forM_ ts $ \ (u, i, r) -> do
            ur <- MV.read urs u
            let ur' = M.insert i r ur
            MV.write urs u ur'

            ir <- MV.read irs i
            let ir' = M.insert u r ir
            MV.write irs i ir'
        urs' <- G.unsafeFreeze urs
        irs' <- G.unsafeFreeze irs
        return (urs', irs')

{--
instance HasSize Map where
    size = M.size
--}
--statistics :: Vec Map k v ->  Vec Map 
avg :: (Fractional v) => Map k v -> v
avg m = M.fold (+) 0 m / fromIntegral (M.size m)

similarity :: Vec (Map Int Double) -> Int -> Int -> Double
similarity v i j = go (S.elems $ S.union (keysSet mi)  (keysSet mj)) 0 0 0 where
    mi = v V.! i
    mj = v V.! j
    avgs = V.map avg v -- TODO: optimize this
    go [] ac1 ac2 ac3
        | ac2 == 0 || ac3 == 0 = 0
        | otherwise = ac1 / (sqrt ac2 * sqrt ac3)
    go (ck:cks) ac1 ac2 ac3 = go cks ac1' ac2' ac3' where
        ac1' = ac1 + (mi ! ck - avgs V.! i) * (mj ! ck - avgs V.! j)
        ac2' = ac2 + (mi ! ck - avgs V.! i) ^ 2
        ac3' = ac3 + (mj ! ck - avgs V.! j) ^ 2

newLSSVM :: Vec UserRating -> Vec ItemRating -> LSSVM [Int]
newLSSVM urs irs = LSSVM (KernelFunction kf) cost params where
    kf :: [Double] -> [Int] -> [Int] -> Double
    kf (uw:iw:_) (ua:ia:_) (ub:ib:_) = similarity urs ua ub * uw + similarity irs ia ib * iw

    cost = 0.5
    params = let
        nu = fromIntegral (V.length urs - 1) :: Double
        ni = fromIntegral (V.length irs - 1) :: Double
        s = nu + ni in [nu / s, ni / s]

runLSSVM :: LSSVM [Int] -> Double -> Int -> [RTuple] -> SVMSolution [Int]
runLSSVM svm eps iterNum rts = solve svm dataset eps iterNum where
    dataset = DataSet points values
    points :: A.Array Int [Int]
    points = assert (len > 0) $ A.listArray (1::Int, len) $ map (\(u,i,r) -> [u,i]) rts
    values :: UA.UArray Int Double
    values = UA.listArray (1::Int, len)  $ map (\(u,i,r) -> r) rts
    len = length rts :: Int


simLSSVM :: LSSVM [Int] -> SVMSolution [Int] -> [RTuple] -> Double
simLSSVM svm sol rts = simulate svm sol points `rmse` values where

    points = A.listArray (1::Int, length rts) $ map (\(u,i,r) -> [u,i]) rts
    values = map (\(u,i,r) -> r) rts

rmse :: [Double] -> [Double] -> Double
rmse as bs = assert (len == length bs) (go as bs 0) where
    len = length as
    go :: [Double] -> [Double] -> Double -> Double
    go [] [] acc = sqrt (acc / fromIntegral len)
    go (a:as') (b:bs') acc = go as' bs' (acc + (a - b)^2)

main = do
    train <- tuplesFromFile "ua.base"
    test <- tuplesFromFile "ua.test"
    (urs, irs) <- Main.convert train
    let svm = newLSSVM urs irs
        sol = runLSSVM svm 0.001 100 train
        res = simLSSVM svm sol test
    putStrLn $ show res
40:22: Warning: Redundant bracket
Found:
(ac * 10)
Why not:
ac * 10
43:14: Warning: Use void
Found:
many (oneOf " \t") >> return ()
Why not:
void (many (oneOf " \t"))
45:10: Error: Redundant return
Found:
do ts <- sepEndBy tuple (char '\n')
return ts
Why not:
do sepEndBy tuple (char '\n')
151:5: Error: Use print
Found:
putStrLn $ show res
Why not:
print res