GATTACA

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import Data.Deque
import Control.Monad
import Control.Arrow
import Data.List

-- imports for bonus solution:

import Data.Char

data Base = A | C | G | T
   deriving (Show, Read, Eq)

sample :: [Base]
sample = [A, A, A, A, C, G, G, A, A, C, T, T, T, T]

-- Exercise 1: pack

pack :: [Base] -> [[Base]]
pack [] = []
pack (h : t) = p' [h] t Empty
   where p' residual [] dl = dequeToList (dl |< residual)
         p' bases@(base:_) (h:t) dl | base == h = p' (h:bases) t dl
                                    | otherwise = p' [h] t (dl |< bases)

-- I use a deque to preserve order, but that's not 
-- essential to the problem solution.

-- AHEM! Remember how 90% of Haskell hacking is reinventing the 
-- standard libraries? AHEM!

pack' :: [Base] -> [[Base]]
pack' = group

-- Exercise 2: join ... oops, I meant 'unpack'

unpack :: [[Base]] -> [Base]
unpack = join

-- Exercise 3: encode

encode :: [Base] -> [(Int, Base)]
encode = map (length &&& head) . pack

-- Exercise 4: decode

decode :: [(Int, Base)] -> [Base]
decode = unpack . map (uncurry replicate) 

-- Exercise 6: run -- we do ex6 first to make ex5's implementation trivial

run :: [Base] -> Int -> [Base]
run bases n = decode $ map (const n &&& id) bases

-- Exercise 5: duplicate

duplicate :: [Base] -> [Base]
duplicate = flip run 2

-------- Time to solve exercises 1-6 for moi: 15 minutes -------------------
-------- Lines of code (sans declarations): 6 lines ------------------------

-- p.s.: Apparently, I don’t know concatMap. Ooh! New things to learn!

-- BONUS: ------------------------------------------------------------------

{--

So, the problem here is that these input files, for example:

ccg
ga
gg
aagatgtct

are not in Read-able format. So, but, this is a real-world problem. The
customer gives you the above, and you're not really in a position to say:

"Could you give me those files in the below format, instead?

[C,C,G]
[G,A]
[G,G]
[A,A,G,A,T,G,T,C,T]"

But we do have a recourse! Haskell, itself, can do these transformations!

 --}

tripleR ::

--- 'Triple R': Readin' 'Ritin' 'Rithmetic

            FilePath -> IO [[Base]]

tripleR filename = readFile filename >>=
                   return . map (map $ read . return . toUpper) . lines

{-- 

So, instead of asking your customer to do your job for you, you
just wrote two lines of code that saved all that headache and
recrimination! Sweet!

Now, with

tripleR "kmspdw.snippets" ~> [[C,C,G],[G,A],[G,G],[A,A,G,A,T,G,T,C,T]]

we can

(unpack >>> fn)

over the result, where fn is any of the functions we've defined above.

so:

tripleR "kmspdw.snippets" >>= return . (unpack >>> pack)

   ~> [[C,C],[G,G],[A],[G,G],[A,A],[G],[A],[T],[G],[T],[C],[T]]

and:

tripleR "etiwitwikngfmtr.snippets" >>= return . (unpack >>> encode)

gives us a list of tuples of length 94, where the highest 
repetition is (4, G) (twice).

We can test any of our [Base] -> a functions with the below:

 --}

testRun :: FilePath -> ([Base] -> a) -> IO a
testRun filename fn = tripleR filename >>= return . (unpack >>> fn)

-- Cool story, bro!

----------------- Lines of code for Bonus solution: 3 lines ------------
-------- Time to solve bonus exercise for moi: 35 minutes --------------
95:20: Warning: Use liftM
Found:
readFile filename >>=
return . map (map $ read . return . toUpper) . lines
Why not:
liftM (map (map $ read . return . toUpper) . lines)
(readFile filename)
132:23: Warning: Use liftM
Found:
tripleR filename >>= return . (unpack >>> fn)
Why not:
liftM (unpack >>> fn) (tripleR filename)