forked from cms-sw/cms-sw.github.io
-
Notifications
You must be signed in to change notification settings - Fork 0
/
process-job-reports
executable file
·76 lines (69 loc) · 2.37 KB
/
process-job-reports
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python
# Process job reports and create summary file with performance information.
from argparse import ArgumentParser
from os.path import exists, join
from xml.sax import parseString, ContentHandler
import re
import datetime, time
from cmsutils import getRelValsFiles, readRelValFile
# Schema of the output
class JobReportHandler(ContentHandler):
def __init__(self, what):
self.counters = dict((k, 0) for k in what)
def startElement(self, name, attrs):
if name != "Metric":
return
if not attrs["Name"] in self.counters:
return
if "nan" in attrs["Value"]:
return
self.counters[attrs["Name"]] = float(attrs["Value"])
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("input", nargs="+", help="input file")
args = parser.parse_args()
# We start with empty workflows
results = {}
printHeader = True
for inFile in args.input:
inFiles, zipFile = getRelValsFiles(inFile, "*/JobReport*.xml", ".*/JobReport([0-9]).xml")
table = {}
for l in inFiles:
m = re.match("(.+/|)([^/]+)/JobReport([0-9]).xml", l)
if not m: continue
xpath, workflow, step = m.groups()
handler = JobReportHandler(what=["PeakValueRss", "PeakValueVsize", "TotalJobCPU","TotalJobTime"])
try:
parseString(readRelValFile(l,zipFile), handler)
except:
continue
for counter, value in handler.counters.items():
workflowId = workflow.split("_",1)[0]
if not workflowId in table:
table[workflowId] = {}
table[workflowId][counter + str(step)] = value
if printHeader:
header = sorted(set([k for (w, r) in table.items() for k in r ]))
print "Timestamp",
print "WorkflowId",
for h in header:
print h,
print
printHeader = False
inDate = re.match(".*CMSSW_.*_(20\d\d-\d\d-\d\d-\d\d\d\d)/.*", inFile).group(1)
timestamp = time.mktime(datetime.datetime.strptime(inDate, "%Y-%m-%d-%H%M").timetuple())
for k in sorted(table):
v = table[k]
print timestamp,
print k,
for h in header:
if h in v:
# We do not care about showing less than 1MB changes.
# Also we round to 1/100s the ellapsed time.
if h.startswith("PeakValue"):
print int(v[h]/10),
else:
print int(v[h]*10),
else:
print "NA",
print