-
Notifications
You must be signed in to change notification settings - Fork 12
/
ProfFile.hs
159 lines (141 loc) · 5.88 KB
/
ProfFile.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
{-# LANGUAGE TupleSections #-}
-- | Parser for .prof files generated by GHC.
module ProfFile
( Time(..)
, Line(..)
, lIndividualTime
, lInheritedTime
, lIndividualAlloc
, lInheritedAlloc
, parse
, processLines
, findStart
) where
import Control.Arrow (second, left)
import Data.Char (isSpace)
import Data.List (isPrefixOf)
import Text.Read (readEither)
import Control.Monad (unless)
import Control.Applicative
import Prelude -- Quash AMP related warnings in GHC>=7.10
data Time = Time
{ tIndividual :: Double
, tInherited :: Double
} deriving (Show, Eq)
data Line = Line
{ lCostCentre :: String
, lModule :: String
, lNumber :: Int
, lEntries :: Int
, lTime :: Time
, lAlloc :: Time
, lTicks :: Int
, lBytes :: Int
, lChildren :: [Line]
} deriving (Show, Eq)
lIndividualTime :: Line -> Double
lIndividualTime = tIndividual . lTime
lInheritedTime :: Line -> Double
lInheritedTime = tInherited . lTime
lIndividualAlloc :: Line -> Double
lIndividualAlloc = tIndividual . lAlloc
lInheritedAlloc :: Line -> Double
lInheritedAlloc = tInherited . lAlloc
data ProfFormat = NoSources | IncludesSources
-- | Returns a function accepting the children and returning a fully
-- formed 'Line'.
parseLine :: ProfFormat -> String -> Either String ([Line] -> Line)
parseLine format s =
case format of
NoSources ->
case words s of
(costCentre:module_:no:entries:indTime:indAlloc:inhTime:inhAlloc:other) ->
parse' costCentre module_ no entries indTime indAlloc inhTime inhAlloc other
_ -> Left $ "Malformed .prof file line:\n" ++ s
IncludesSources ->
case words s of
(costCentre:module_:rest) | (no:entries:indTime:indAlloc:inhTime:inhAlloc:other) <- dropSRC rest ->
parse' costCentre module_ no entries indTime indAlloc inhTime inhAlloc other
_ -> Left $ "Malformed .prof file line:\n" ++ s
where
-- XXX: The SRC field can contain arbitrary characters (from the
-- subdirectory name)!
--
-- As a heuristic, assume SRC spans until the last word which:
--
-- * Ends with '>'
-- (for special values emitted by GHC like "<no location info>")
--
-- or
--
-- * Contains a colon eventually followed by another colon or a minus
-- (to identify the source span, e.g. ":69:55-64" or ":(36,1)-(38,30)",
-- or maybe for a single character ":30:3")
--
-- If there is no such word, assume SRC is just one word.
--
-- This heuristic will break if:
--
-- * In the future, columns to the right of SRC can match the above
-- condition (currently, they're all numeric)
--
-- or
--
-- * GHC doesn't add a source span formatted as assumed above, and the
-- SRC contains spaces
--
-- The implementation is not very efficient, but I suppose this is not
-- performance-critical.
dropSRC (_:rest) = reverse . takeWhile (not . isPossibleEndOfSRC) . reverse $ rest
dropSRC [] = []
isPossibleEndOfSRC w = last w == '>'
|| case break (==':') w of
(_, _:rest) -> any (`elem` ":-") rest
_ -> False
parse' costCentre module_ no entries indTime indAlloc inhTime inhAlloc other = do
pNo <- readEither' no
pEntries <- readEither' entries
pTime <- Time <$> readEither' indTime <*> readEither' inhTime
pAlloc <- Time <$> readEither' indAlloc <*> readEither' inhAlloc
(pTicks, pBytes) <-
case other of
(ticks:bytes:_) -> (,) <$> readEither' ticks <*> readEither' bytes
_ -> pure (0, 0)
return $ Line costCentre module_ pNo pEntries pTime pAlloc pTicks pBytes
readEither' str = left (("Could not parse value "++show str++": ")++)
(readEither str)
type LineNumber = Int
processLines :: ProfFormat -> [String] -> LineNumber -> Either String [Line]
processLines format lines0 lineNumber0 = do
((ss,_), lines') <- go 0 lines0 lineNumber0
unless (null ss) $
error "processLines: the impossible happened, not all strings were consumed."
return lines'
where
go :: Int -> [String] -> LineNumber -> Either String (([String], LineNumber), [Line])
go _depth [] lineNumber = do
return (([], lineNumber), [])
go depth0 (line : lines') lineNumber = do
let (spaces, rest) = break (not . isSpace) line
let depth = length spaces
if depth < depth0
then return ((line : lines', lineNumber), [])
else do
parsedLine <- left (("Parse error in line "++show lineNumber++": ")++) $
parseLine format rest
((lines'', lineNumber''), children) <- go (depth + 1) lines' (lineNumber + 1)
second (parsedLine children :) <$> go depth lines'' lineNumber''
firstLineNoSources :: [String]
firstLineNoSources = ["COST", "CENTRE", "MODULE", "no.", "entries", "%time", "%alloc", "%time", "%alloc"]
-- Since GHC 8.0.2 the cost centres include the src location
firstLineIncludesSources :: [String]
firstLineIncludesSources = ["COST", "CENTRE", "MODULE", "SRC", "no.", "entries", "%time", "%alloc", "%time", "%alloc"]
findStart :: [String] -> LineNumber -> Either String (ProfFormat, [String], [String], LineNumber)
findStart [] _ = Left "Malformed .prof file: couldn't find start line"
findStart (line : _empty : lines') lineNumber | (firstLineNoSources `isPrefixOf` words line) = return (NoSources, words line, lines', lineNumber + 2)
| (firstLineIncludesSources `isPrefixOf` words line) = return (IncludesSources, words line, lines', lineNumber + 2)
findStart (_line : lines') lineNumber = findStart lines' (lineNumber + 1)
parse :: String -> Either String ([String], [Line])
parse s = do
(format, names, ss, lineNumber) <- findStart (lines s) 1
return . (names,) =<< processLines format ss lineNumber