Data.Binary performance problem (annotation)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
{-# LANGUAGE DoRec #-}
{-# LANGUAGE TemplateHaskell #-}

module Main where

import Data.Binary.Get
import Data.Binary.IEEE754
import Data.ByteString.UTF8 (toString)
import Data.Maybe
import Control.Monad
import Data.Word
import Control.Applicative
import Data.IntMap (IntMap)
import qualified Data.IntMap as IntMap
import Codec.Archive.LibZip
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString as BS
import System.Path
import Control.DeepSeq

import Data.Derive.NFData   (makeNFData)
import Data.DeriveTH        (derive)

data Constant = ConstantClass { constClass :: String }
              | ConstantString String
              | ConstantIgnored
              deriving Show

$( derive makeNFData ''Constant )

type ConstIdx = Word16    
getConstIdx = getWord16be

type ConstTable = IntMap Constant

getConstant :: ConstTable -> Get (ConstIdx, Constant)
getConstant constTable = do 
  tag <- getWord8
  case tag of
    1 -> noSkip <$> (ConstantString <$> getString)
    3 -> noSkip <$> (ConstantIgnored <$ getWord32be)
    4 -> noSkip <$> (ConstantIgnored <$ getFloat32be)
    5 -> skip <$> (ConstantIgnored <$ getWord64be)
    6 -> skip <$> (ConstantIgnored <$ getFloat64be)
    7 -> noSkip <$> (ConstantClass <$> getStringRef)
    8 -> noSkip <$> (ConstantIgnored <$ getStringRef)
    9 -> noSkip <$> (ConstantIgnored <$ getConstIdx <* getStringRef)
    10 -> noSkip <$> (ConstantIgnored <$ getConstIdx <* getStringRef)
    11 -> noSkip <$> (ConstantIgnored <$ getConstIdx <* getStringRef)
    12 -> noSkip <$> (ConstantIgnored <$ getConstIdx <* getConstIdx)
    tag -> fail $ unwords ["Unknown tag type:", show tag]
  where 
    getString = toString <$> (getByteString =<< (fromIntegral <$> getWord16be))
    
    lookupString idx = 
      case fromJust $ IntMap.lookup (fromIntegral idx) constTable of
        ConstantString s -> s
      
    getStringRef = lookupString <$> getConstIdx
      
    noSkip x = (1, x)
    skip x = (2, x)
                 
fromToM from to f | from > to = return []
                  | otherwise = do 
  (step, x) <- f from
  liftM (x:) $ fromToM (from + step) to f
     
getConstants :: Get ConstTable
getConstants = do
  count <- getWord16be
  rec 
    -- According to the Java .class spec, count is, for some reason, one larger than the actual number of constants
    consts <- IntMap.fromAscList <$> (fromToM 1 (count - 1) $ \i -> 
      do (step, const) <- getConstant consts
         return (step, (fromIntegral i, const)))
  return consts

getHeader = do 
  signature <- replicateM 4 getWord8
  unless (signature == [0xCA, 0xFE, 0xBA, 0xBE]) $ 
    fail "Not a class file"
  minor <- getWord16be
  major <- getWord16be
  return (major, minor)
     
getClass = do
  (_major, _minor) <- getHeader 
  constants <- getConstants
  _ <- getWord16be
  idx <- fromIntegral <$> getConstIdx
  return $ IntMap.lookup idx constants
  

main = do 
  output <- withArchive [CheckConsFlag] jarPath $ do
    classfiles <- filter isClassfile <$> fileNames []
    forM classfiles $ \classfile -> do 
      stream <- BL.pack <$> fileContents [] classfile
      let cls = runGet getClass stream
      rnf cls `seq` return cls
  mapM_ print output
  
  where       
    isClassfile path = snd (splitExt path) == ".class"
    
    jarPath = "/usr/lib/jvm/java-6-sun-1.6.0.22/jre/lib/rt.jar"