forked from mlcommons/logging
-
Notifications
You must be signed in to change notification settings - Fork 0
/
result_summarizer.py
382 lines (321 loc) · 13 KB
/
result_summarizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
'''
Summarizes a set of results.
'''
from __future__ import print_function
import argparse
import copy
import glob
import json
import os
import re
import sys
from ..compliance_checker import mlp_compliance
from ..rcp_checker import rcp_checker
_ALLOWED_BENCHMARKS_V06 = [
'resnet',
'ssd',
'maskrcnn',
'gnmt',
'transformer',
'ncf',
'minigo',
]
_ALLOWED_BENCHMARKS_V07 = [
'bert',
'dlrm',
'gnmt',
'maskrcnn',
'minigo',
'resnet',
'ssd',
'transformer',
]
_ALLOWED_BENCHMARKS_V10 = [
'bert',
'dlrm',
'maskrcnn',
'minigo',
'resnet',
'ssd',
'rnnt',
'unet3d',
]
_RUN_START_REGEX = r':::MLLOG (.*"run_start",.*)'
_RUN_STOP_REGEX = r':::MLLOG (.*"run_stop",.*)'
def _get_sub_folders(folder):
sub_folders = [os.path.join(folder, sub_folder)
for sub_folder in os.listdir(folder)]
return [sub_folder
for sub_folder in sub_folders
if os.path.isdir(sub_folder)]
def _read_json_file(json_file):
with open(json_file, 'r') as f:
return json.load(f)
def _pretty_system_name(system_desc):
system_name = system_desc['system_name']
if system_name == 'tpu-v3':
chips = int(system_desc['accelerators_per_node']) * 2
return 'TPUv3.{}'.format(chips)
return system_name
def _linkable_system_name(system_desc):
system_name = system_desc['system_name']
if system_name == 'tpu-v3':
chips = int(system_desc['accelerators_per_node']) * 2
return 'tpu-v3-{}'.format(chips)
return system_name
def _pretty_accelerator_model_name(system_desc):
accelerator_model_name = system_desc['accelerator_model_name']
if accelerator_model_name == 'tpu-v3':
return 'TPUv3'
return accelerator_model_name
def _pretty_framework(system_desc):
framework = system_desc['framework']
if 'TensorFlow' in framework:
commit_hash = re.search(r' commit hash = .*', framework)
if commit_hash:
return framework.replace(commit_hash.group(0), '')
return framework
def _benchmark_alias(benchmark):
if benchmark == 'mask':
return 'maskrcnn'
return benchmark
def _ruleset_url_prefix(ruleset):
short_ruleset = ruleset.replace('.0', '')
return 'https://github.com/mlperf/training_results_v{}'.format(short_ruleset)
def _details_url(system_desc, ruleset):
return '{ruleset_prefix}/blob/master/{submitter}/systems/{system}.json'.format(
ruleset_prefix=_ruleset_url_prefix(ruleset),
submitter=system_desc['submitter'],
system=_linkable_system_name(system_desc))
def _code_url(system_desc, ruleset):
return '{ruleset_prefix}/blob/master/{submitter}/benchmarks'.format(
ruleset_prefix=_ruleset_url_prefix(ruleset),
submitter=system_desc['submitter'])
def _row_key(system_desc):
return '-'.join([
system_desc['division'],
system_desc['system_name'],
system_desc['accelerator_model_name'],
system_desc['framework'],
system_desc['accelerators_per_node'],
])
def _read_mlperf_score(result_file, ruleset):
with open(result_file, 'r') as f:
result = f.read()
config_file = '{ruleset}/common.yaml'.format(ruleset=ruleset)
checker = mlp_compliance.make_checker(
ruleset=ruleset,
quiet=True,
werror=False)
valid, _, _, _ = mlp_compliance.main(result_file, config_file, checker)
if not valid:
return None
run_start = re.search(_RUN_START_REGEX, result)
if run_start is None:
raise Exception('Failed to match run_start!.')
run_start = json.loads(run_start.group(1))['time_ms']
run_stop = re.search(_RUN_STOP_REGEX, result)
run_stop = json.loads(run_stop.group(1))['time_ms']
seconds = float(run_stop) - float(run_start)
minutes = seconds / 60 / 1000 # convert ms to minutes
return minutes
def _compute_olympic_average(scores, dropped_scores, max_dropped_scores):
"""Olympic average by dropping the top and bottom max_dropped_scores:
If max_dropped_scores == 1, then we compute a normal olympic score.
If max_dropped_scores > 1, then we drop more than one scores from the
top and bottom and average the rest.
When dropped_scores > 0, then some scores have already been dropped
so we should not double count them
Precondition: Dropped scores have higher score value than the rest
"""
# Sort scores first
scores.sort()
# Remove top and bottom scores
countable_scores = scores[max_dropped_scores:len(scores)-(max_dropped_scores-dropped_scores)]
sum_of_scores = sum(countable_scores)
return sum_of_scores * 1.0 / len(countable_scores)
def _is_organization_folder(folder):
if not os.path.isdir(folder):
return False
systems_folder = os.path.join(folder, 'systems')
if not os.path.exists(systems_folder):
return False
results_folder = os.path.join(folder, 'results')
if not os.path.exists(results_folder):
return False
return True
def summarize_results(folder, ruleset, csv_file=None):
"""Summarizes a set of results.
Args:
folder: The folder for a submission package.
ruleset: The ruleset such as 0.6.0, 0.7.0, or 1.0.0.
"""
systems_folder = os.path.join(folder, 'systems')
results_folder = os.path.join(folder, 'results')
rows = {}
for system_folder in _get_sub_folders(results_folder):
folder_parts = system_folder.split('/')
system = folder_parts[-1]
# Load corresponding system description.
system_file = os.path.join(
systems_folder, '{}.json'.format(system))
if not os.path.exists(system_file):
print('ERROR: Missing {}'.format(system_file))
continue
try:
desc = _read_json_file(system_file)
except:
print('ERROR: Could not decode JSON struct in {}'.format(system_file))
continue
# Construct prefix portion of the row.
row = ''
if 'division' not in desc:
print('ERROR: "division" field missing in {}'.format(system_file))
continue
csv_header = 'division,availability'
row += '"{}",,'.format(desc['division'])
if 'submitter' not in desc:
print('ERROR: "submitter" field missing in {}'.format(system_file))
continue
csv_header += ',submitter'
row += '"{}",'.format(desc['submitter'])
if 'system_name' not in desc:
print('ERROR: "system_name" field missing in {}'.format(system_file))
continue
csv_header += ',system'
row += '"{}",'.format(_pretty_system_name(desc))
if 'host_processor_model_name' not in desc:
print('ERROR: "host_processor_model_name" field missing in {}'.format(system_file))
continue
csv_header += ',host_processor_model_name'
row += '"{}",'.format(desc['host_processor_model_name'])
if 'host_processor_core_count' not in desc:
print('ERROR: "host_processor_core_count" field missing in {}'.format(system_file))
continue
csv_header += ',host_processor_core_count'
row += '{},'.format(int(desc['host_processors_per_node']) * int(desc['number_of_nodes']))
if 'accelerator_model_name' not in desc:
print('ERROR: "accelerator_model_name" field missing in {}'.format(system_file))
continue
csv_header += ',accelerator_model_name'
row += '"{}",'.format(_pretty_accelerator_model_name(desc))
if 'accelerators_per_node' not in desc:
print('ERROR: "accelerators_per_node" field missing in {}'.format(system_file))
continue
csv_header += ',accelerators_count'
row += '{},'.format(int(desc['accelerators_per_node']) * int(desc['number_of_nodes']))
if 'framework' not in desc:
print('ERROR: "framework" field missing in {}'.format(system_file))
continue
csv_header += ',framework'
row += '"{}",'.format(_pretty_framework(desc))
# Collect scores for benchmarks.
benchmark_scores = {}
for benchmark_folder in _get_sub_folders(system_folder):
folder_parts = benchmark_folder.split('/')
benchmark = _benchmark_alias(folder_parts[-1])
# Read scores from result files.
pattern = '{folder}/result_*.txt'.format(folder=benchmark_folder)
result_files = glob.glob(pattern, recursive=True)
scores = []
dropped_scores = 0
for result_file in result_files:
score = _read_mlperf_score(result_file, ruleset)
if score is None:
dropped_scores += 1
else:
scores.append(score)
max_dropped_scores = 4 if benchmark == 'unet3d' else 1
if dropped_scores > max_dropped_scores:
print('CRITICAL ERROR: Too many non-converging runs for {} {}/{}'.
format(desc['submitter'], system, benchmark))
print('** CRITICAL ERROR ** Results in the table for {} {}/{} are NOT correct'.
format(desc['submitter'], system, benchmark))
elif dropped_scores >= 1:
print('NOTICE: Dropping non-converged run(s) for {} {}/{} using olympic scoring.'
.format(desc['submitter'], system, benchmark))
if dropped_scores <= max_dropped_scores:
benchmark_scores[benchmark] = _compute_olympic_average(scores, dropped_scores, max_dropped_scores)
# Construct scores portion of the row.
if ruleset == '0.6.0':
allowed_benchmarks = _ALLOWED_BENCHMARKS_V06
elif ruleset == '0.7.0':
allowed_benchmarks = _ALLOWED_BENCHMARKS_V07
elif ruleset == '1.0.0':
allowed_benchmarks = _ALLOWED_BENCHMARKS_V10
csv_header += "," + ",".join(allowed_benchmarks)
for benchmark in allowed_benchmarks:
if benchmark in benchmark_scores:
row += '{:.2f},'.format(benchmark_scores[benchmark])
else:
row += ','
# Construct postfix portion of the row.
csv_header += ',details_url'
row += '{},'.format(_details_url(desc, ruleset))
csv_header += ',code_url'
row += '{},'.format(_code_url(desc, ruleset))
rows[_row_key(desc)] = row
# Print rows in order of the sorted keys.
for key in sorted(rows):
print(rows[key])
# Write the rows to csv if needed
if csv_file is not None:
# Add the header above the first results row
if csv_file.tell() == 0:
csv_file.write(csv_header)
csv_file.write('\n' + rows[key])
def get_parser():
parser = argparse.ArgumentParser(
prog='mlperf_logging.result_summarizer',
description='Summarize a set of result files.',
)
parser.add_argument('folder', type=str,
help='the folder for a submission package')
parser.add_argument('usage', type=str,
help='the usage such as training, inference_edge, inference_server')
parser.add_argument('ruleset', type=str,
help='the ruleset such as 0.6.0, 0.7.0, or 1.0.0')
parser.add_argument('--werror', action='store_true',
help='Treat warnings as errors')
parser.add_argument('--quiet', action='store_true',
help='Suppress warnings. Does nothing if --werror is set')
parser.add_argument('-csv', '--csv', type=str,
help='Exports a csv of the results to the path specified')
return parser
def main():
parser = get_parser()
args = parser.parse_args()
if args.usage != 'training':
print('Usage {} is not supported.'.format(args.usage))
sys.exit(1)
if args.ruleset not in ['0.6.0', '0.7.0', '1.0.0']:
print('Ruleset {} is not supported.'.format(args.ruleset))
sys.exit(1)
# Setup a csv file if required
csv_file = None
if args.csv is not None:
csv_file = open(args.csv, 'w')
multiple_folders_regex = r'(.*)\{(.*)\}'
multiple_folders = re.search(multiple_folders_regex, args.folder)
if multiple_folders:
# Parse results for multiple organizations.
path_prefix = multiple_folders.group(1)
path_suffix = multiple_folders.group(2)
if ',' in path_suffix:
orgs = multiple_folders.group(2).split(',')
elif '*' == path_suffix:
orgs = os.listdir(path_prefix)
orgs = [org for org in orgs
if _is_organization_folder(os.path.join(path_prefix, org))]
print('Detected organizations: {}'.format(', '.join(orgs)))
for org in orgs:
org_folder = path_prefix + org
summarize_results(org_folder, args.ruleset, csv_file)
else:
# Parse results for single organization.
summarize_results(args.folder, args.ruleset, csv_file)
# Close csv file if required
if args.csv is not None:
csv_file.close()
if __name__ == '__main__':
main()