-
Notifications
You must be signed in to change notification settings - Fork 59
/
Copy pathlog-input-files
executable file
·96 lines (87 loc) · 3.19 KB
/
log-input-files
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python3
# Process job reports and create summary file with performance information.
from argparse import ArgumentParser
from xml.sax import parseString, ContentHandler
from hashlib import sha1
from datetime import datetime
import base64, json
from urllib.request import Request,urlopen
from os import getenv
from cmsutils import getRelValsFiles, readRelValFile
import re
# Pipes results to elasticsearch.
class ElasticSearchPrinter(object):
def __init__(self):
self.hostname = getenv("ES_HOSTNAME")
self.auth = getenv("ES_AUTH")
# We use the REST API because the python one does not seem to work
# with our ssl setup.
def __call__(self, release, architecture, workflow, step, content):
sha_str = release + architecture + workflow + step + content
sha1_id = sha1(sha+_str.encode()).hexdigest()
datepart = "201" + release.split("201")[1]
d = datetime.strptime(datepart, "%Y-%m-%d-%H00")
payload = {"release": release,
"architecture": architecture,
"workflow": workflow,
"step": step,
"file": content
}
url = "https://%s/ib-files-%s/ib-file-data/%s" % (self.hostname,
d.strftime("%Y.%m"),
sha1_id)
request = Request(url)
if self.auth:
base64string = base64.encodebytes(self.auth.encode()).decode().replace('\n', '')
request.add_header("Authorization", "Basic %s" % base64string)
request.get_method = lambda: 'PUT'
result = urlopen(request, data=json.dumps(payload))
class JobReportHandler(ContentHandler):
def __init__(self, release, architecture, workflow, step, files):
self.release = release
self.architecture = architecture
self.workflow = workflow
self.step = step
self.printer = None
if getenv("ES_HOSTNAME"):
self.printer = ElasticSearchPrinter()
self.inLFN = False
self.files = files
self.content = ''
def startElement(self, name, attrs):
if name != "LFN":
return
self.content = ''
self.inLFN = True
def characters(self, content):
if self.inLFN:
self.content += content
def endElement(self, name):
if name != "LFN":
return
if self.printer:
self.printer(self.release, self.architecture, self.workflow, self.step, self.content)
self.files[self.content] = 1
self.inLFN = False
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("input", nargs=1, help="input file")
args = parser.parse_args()
# We start with empty workflows
results = {}
printHeader = True
files = {}
for inFile in args.input:
m = re.match(".*/([^/]+)/www/.*/([^/]+)/.*", inFile)
architecture, release = m.groups()
inFiles, zipFile = getRelValsFiles(inFile, "*/JobReport*.xml", ".*/JobReport([0-9]).xml")
table = {}
for l in inFiles:
m = re.match("(.+/|)([^/]+)/JobReport([0-9]).xml", l)
if not m: continue
handler = JobReportHandler(release, architecture, m.group(2), m.group(3), files)
try:
parseString(readRelValFile(l,zipFile), handler)
except Exception, e:
continue
print ("\n".join(files.keys()))