@@ -200,21 +200,15 @@ object Compiler {
200
200
new TfFrontend (
201
201
graphDef = util.protoFromStream(GraphDef , modelStream),
202
202
arch = options.arch,
203
- inputBatchSize = options.inputBatchSize,
204
203
graphStream = graphStream,
205
- printSchedulerSummary = options.printSchedulerSummary,
206
- printLayersSummary = options.printLayersSummary,
207
- printProgress = options.printProgress
204
+ options = options
208
205
)
209
206
} else if (modelSourceType == CompilerSourceType .ONNX ) {
210
207
new OnnxFrontend (
211
208
modelProto = util.protoFromStream(ModelProto , modelStream),
212
209
arch = options.arch,
213
- inputBatchSize = options.inputBatchSize,
214
210
graphStream = graphStream,
215
- printSchedulerSummary = options.printSchedulerSummary,
216
- printLayersSummary = options.printLayersSummary,
217
- printProgress = options.printProgress
211
+ options = options
218
212
)
219
213
} else
220
214
throw new CompilerException (
@@ -257,11 +251,17 @@ object Compiler {
257
251
258
252
val flowNodeNames = frontend.traverse(outputNames)
259
253
260
- if (options.printProgress)
254
+ if (options.printProgress) {
255
+ println(s " Found ${flowNodeNames.size} node(s) " )
261
256
println(s " Rewriting emitters ... " )
257
+ }
262
258
263
259
val flowEmitters = frontend.rewrite(flowNodeNames)
264
260
261
+ if (options.printProgress) {
262
+ println(s " Rewritten to ${flowEmitters.size} emitter(s) " )
263
+ }
264
+
265
265
val context = EmitContext (backend, backendStats, mm, outputNames)
266
266
267
267
val emitResults = for (emitter <- flowEmitters) yield {
@@ -272,11 +272,11 @@ object Compiler {
272
272
273
273
layerSchedulerResults = emitResults.filter(_.isDefined).map(_.get).toList
274
274
macs = layerSchedulerResults.map(_.macs).sum
275
- macEfficiency = backendStats
276
- .map(stats =>
277
- macs.toFloat / (options.arch.arraySize * options.arch.arraySize).toFloat / stats.totalCycles.toFloat
278
- )
279
- .getOrElse( 0f )
275
+ macEfficiency =
276
+ if (backendStats.isDefined)
277
+ BackendStats .macEfficiency(backendStats.get, options.arch, macs)
278
+ else
279
+ 0f
280
280
281
281
// TODO: fix leaks
282
282
// mm.reportObjects()
@@ -354,93 +354,110 @@ object Compiler {
354
354
)
355
355
tb.addNamedLine(" True consts scalar size" , mm.constsScalarSize)
356
356
tb.addNamedLine(" Consts utilization (%)" , mm.constsUtilization * 100f )
357
- tb.addNamedLine(" True MACs (M)" , macs.toFloat / 1e6f )
358
- tb.addNamedLine(" MAC efficiency (%)" , macEfficiency * 100f )
357
+ val (macsLetter, macsDivisor) =
358
+ BackendStats .getUnitsLetterAndDivisor(macs)
359
+ tb.addNamedLine(
360
+ s " True MACs ( ${macsLetter}MAC) " ,
361
+ macs.toFloat / macsDivisor
362
+ )
363
+ if (backendStats.isDefined)
364
+ tb.addNamedLine(" MAC efficiency (%)" , macEfficiency * 100f )
359
365
print(tb)
366
+ }
360
367
361
- if (options.printLayersSummary) {
362
- val layerSchedulerResultsWithIndex =
363
- layerSchedulerResults.zipWithIndex
364
-
365
- for (
366
- groupResultsWithIndex <- layerSchedulerResultsWithIndex.grouped(32 )
367
- ) {
368
- val tb = new TablePrinter (Some (" LAYERS SUMMARY" ), true )
369
- tb.addLine(
370
- new TableLine (
371
- List (" Layer:" ) ++ groupResultsWithIndex.map(_._2)
372
- )
368
+ if (options.printLayersSummary) {
369
+ val layerSchedulerResultsWithIndex =
370
+ layerSchedulerResults.zipWithIndex
371
+
372
+ for (
373
+ groupResultsWithIndex <- layerSchedulerResultsWithIndex.grouped(32 )
374
+ ) {
375
+ val tb = new TablePrinter (Some (" LAYERS SUMMARY" ), true )
376
+ tb.addLine(
377
+ new TableLine (
378
+ List (" Layer:" ) ++ groupResultsWithIndex.map(_._2)
373
379
)
374
- tb.addLine(
375
- new TableLine (
376
- List (
377
- " Number of stages: "
378
- ) ++ groupResultsWithIndex
379
- .map(_._1.numberOfStages)
380
- )
380
+ )
381
+ tb.addLine (
382
+ new TableLine (
383
+ List (
384
+ " Number of stages: "
385
+ ) ++ groupResultsWithIndex
386
+ .map(_._1.numberOfStages )
381
387
)
382
- tb.addLine(
383
- new TableLine (
384
- List (
385
- " Number of combined stages: "
386
- ) ++ groupResultsWithIndex
387
- .map(_._1.numberOfCombinedStages)
388
- )
388
+ )
389
+ tb.addLine (
390
+ new TableLine (
391
+ List (
392
+ " Number of combined stages: "
393
+ ) ++ groupResultsWithIndex
394
+ .map(_._1.numberOfCombinedStages )
389
395
)
390
- tb.addLine(
391
- new TableLine (
392
- List (
393
- " Number of partitions: "
394
- ) ++ groupResultsWithIndex.map(_._1.numberOfPartitions)
395
- )
396
+ )
397
+ tb.addLine (
398
+ new TableLine (
399
+ List (
400
+ " Number of partitions: "
401
+ ) ++ groupResultsWithIndex.map(_._1.numberOfPartitions)
396
402
)
397
- tb.addLine(
398
- new TableLine (
399
- List (
400
- " True MACs (M):"
401
- ) ++ groupResultsWithIndex
402
- .map(_._1.macs.toFloat)
403
- .map(_ / 1e6f )
404
- .map(f => f " $f%.3f " )
405
- )
403
+ )
404
+ val (macsLetter, macsDivisor) =
405
+ BackendStats .getUnitsLetterAndDivisor(
406
+ groupResultsWithIndex
407
+ .map(_._1.macs)
408
+ .filter(v => v > 0 )
409
+ .min
406
410
)
407
- tb.addLine(
408
- new TableLine (
409
- List (
410
- " MAC efficiency (%):"
411
- ) ++ groupResultsWithIndex
412
- .map(_._1.macEfficiency)
413
- .map(_ * 100f )
414
- .map(f => f " $f%.1f " )
415
- )
411
+ tb.addLine(
412
+ new TableLine (
413
+ List (
414
+ s " True MACs ( ${macsLetter}MAC): "
415
+ ) ++ groupResultsWithIndex
416
+ .map(_._1.macs.toFloat)
417
+ .map(_ / macsDivisor)
418
+ .map(f => f " $f%.3f " )
416
419
)
417
- tb.addLine(
418
- new TableLine (
419
- List (
420
- " Accumulator utilization (%): "
421
- ) ++ groupResultsWithIndex
422
- .map(_._1.accumulatorUtilization)
423
- .map(_ * 100f )
424
- .map(f => f " $f %.1f " )
425
- )
420
+ )
421
+ tb.addLine (
422
+ new TableLine (
423
+ List (
424
+ " MAC efficiency (%): "
425
+ ) ++ groupResultsWithIndex
426
+ .map(_._1.macEfficiency )
427
+ .map(_ * 100f )
428
+ .map(f => f " $f %.1f " )
426
429
)
427
- tb.addLine(
428
- new TableLine (
429
- List (" Local utilization (%):" ) ++ groupResultsWithIndex
430
- .map(_._1.localUtilization)
431
- .map(_ * 100f )
432
- .map(f => f " $f%.1f " )
433
- )
430
+ )
431
+ tb.addLine(
432
+ new TableLine (
433
+ List (
434
+ " Accumulator utilization (%):"
435
+ ) ++ groupResultsWithIndex
436
+ .map(_._1.accumulatorUtilization)
437
+ .map(_ * 100f )
438
+ .map(f => f " $f%.1f " )
434
439
)
435
- print(tb)
436
- }
440
+ )
441
+ tb.addLine(
442
+ new TableLine (
443
+ List (" Local utilization (%):" ) ++ groupResultsWithIndex
444
+ .map(_._1.localUtilization)
445
+ .map(_ * 100f )
446
+ .map(f => f " $f%.1f " )
447
+ )
448
+ )
449
+ print(tb)
437
450
}
451
+ }
438
452
439
- if (backendStats.isDefined) {
440
- BackendStats .printCompositionSummary(backendStats.get)
441
- BackendStats .printCyclesSummary(backendStats.get)
442
- BackendStats .printEnergySummary(backendStats.get)
453
+ if (backendStats.isDefined) {
454
+ if (options.printInstructionsSummary) {
455
+ BackendStats .printCompositionSummary(" TOTAL" , backendStats.get)
456
+ BackendStats .printCyclesSummary(" TOTAL" , backendStats.get)
457
+ BackendStats .printEnergySummary(" TOTAL" , backendStats.get)
458
+ }
443
459
460
+ if (options.printStridesSummary) {
444
461
def printStrideStats (
445
462
title : String ,
446
463
select : StrideStats => Any
@@ -456,10 +473,16 @@ object Compiler {
456
473
print(tb)
457
474
}
458
475
459
- printStrideStats(" STRIDES COUNT SUMMARY" , stats => stats.count)
460
- printStrideStats(" STRIDES MAX SIZE SUMMARY" , stats => stats.maxSize)
461
476
printStrideStats(
462
- " STRIDES AVERAGE SIZE SUMMARY" ,
477
+ " TOTAL STRIDES COUNT SUMMARY" ,
478
+ stats => stats.count
479
+ )
480
+ printStrideStats(
481
+ " TOTAL STRIDES MAX SIZE SUMMARY" ,
482
+ stats => stats.maxSize
483
+ )
484
+ printStrideStats(
485
+ " TOTAL STRIDES AVERAGE SIZE SUMMARY" ,
463
486
stats => Math .round(stats.totalSize.toFloat / stats.count.toFloat)
464
487
)
465
488
}
0 commit comments