From e830cba181d82686f861fa1154158c90ecaa18ff Mon Sep 17 00:00:00 2001 From: Alexander Wong Date: Sat, 12 Dec 2020 10:14:39 -0700 Subject: [PATCH] Squashed commit of the following: commit 5c7d73fad31204755fdbaf3f474654f527ddb1fc Author: Alexander Wong Date: Sat Dec 12 10:13:41 2020 -0700 Updated README examples to be consistent with new groupby commit 6c66306a482f89f1888953105e147bcff9e6102a Author: Alexander Wong Date: Sat Dec 12 10:03:20 2020 -0700 itertools.groupby -> custom group_by --- README.md | 120 +++++++++++++++++++++---------------------- torchprof/display.py | 13 ++++- 2 files changed, 71 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 3f90599..9ecf1a0 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![PyPI version](https://badge.fury.io/py/torchprof.svg)](https://pypi.org/project/torchprof/) [![CircleCI](https://circleci.com/gh/awwong1/torchprof.svg?style=svg)](https://circleci.com/gh/awwong1/torchprof) -A minimal dependency library for layer-by-layer profiling of Pytorch models. +A minimal dependency library for layer-by-layer profiling of PyTorch models. All metrics are derived using the PyTorch autograd profiler. @@ -31,28 +31,28 @@ Module | Self CPU total | CPU total | Self CUDA total | CUDA total | Sel ---------------|----------------|-----------|-----------------|------------|--------------|---------|---------------|-----------|---------------- AlexNet | | | | | | | | | ├── features | | | | | | | | | -│├── 0 | 1.808ms | 7.171ms | 1.807ms | 7.133ms | 0 b | 0 b | 3.71 Mb | 756.50 Kb | 1 -│├── 1 | 49.693us | 72.366us | 49.152us | 72.608us | 0 b | 0 b | 0 b | 0 b | 1 -│├── 2 | 78.267us | 162.737us | 77.824us | 147.232us | 0 b | 0 b | 1.60 Mb | 547.00 Kb | 1 -│├── 3 | 281.690us | 1.226ms | 506.880us | 1.992ms | 0 b | 0 b | 2.68 Mb | 547.00 Kb | 1 -│├── 4 | 29.124us | 41.487us | 29.632us | 43.968us | 0 b | 0 b | 0 b | 0 b | 1 -│├── 5 | 56.055us | 121.788us | 55.296us | 108.544us | 0 b | 0 b | 1.11 Mb | 380.50 Kb | 1 -│├── 6 | 175.320us | 678.494us | 213.856us | 818.016us | 0 b | 0 b | 8.27 Mb | 253.50 Kb | 1 -│├── 7 | 28.434us | 40.487us | 28.672us | 40.960us | 0 b | 0 b | 0 b | 0 b | 1 -│├── 8 | 147.237us | 564.774us | 209.920us | 801.984us | 0 b | 0 b | 10.20 Mb | 169.00 Kb | 1 -│├── 9 | 28.043us | 40.005us | 27.648us | 40.928us | 0 b | 0 b | 0 b | 0 b | 1 -│├── 10 | 141.357us | 541.427us | 177.152us | 671.552us | 0 b | 0 b | 7.08 Mb | 169.00 Kb | 1 -│├── 11 | 28.503us | 40.405us | 28.672us | 41.152us | 0 b | 0 b | 0 b | 0 b | 1 -│└── 12 | 55.224us | 119.865us | 55.296us | 106.880us | 0 b | 0 b | 324.00 Kb | 108.00 Kb | 1 -├── avgpool | 55.585us | 110.217us | 57.344us | 106.464us | 0 b | 0 b | 108.00 Kb | 36.00 Kb | 1 +│├── 0 | 1.831ms | 7.260ms | 1.830ms | 7.230ms | 0 b | 0 b | 3.71 Mb | 756.50 Kb | 1 +│├── 1 | 46.768us | 68.950us | 46.976us | 70.528us | 0 b | 0 b | 0 b | 0 b | 1 +│├── 2 | 80.361us | 166.213us | 79.872us | 149.696us | 0 b | 0 b | 1.60 Mb | 547.00 Kb | 1 +│├── 3 | 277.412us | 1.205ms | 492.544us | 1.932ms | 0 b | 0 b | 2.68 Mb | 547.00 Kb | 1 +│├── 4 | 28.274us | 40.156us | 27.872us | 41.184us | 0 b | 0 b | 0 b | 0 b | 1 +│├── 5 | 57.138us | 124.176us | 56.512us | 109.536us | 0 b | 0 b | 1.11 Mb | 380.50 Kb | 1 +│├── 6 | 173.517us | 674.434us | 210.880us | 809.824us | 0 b | 0 b | 8.27 Mb | 253.50 Kb | 1 +│├── 7 | 27.382us | 38.754us | 27.648us | 39.936us | 0 b | 0 b | 0 b | 0 b | 1 +│├── 8 | 144.863us | 556.345us | 207.872us | 798.368us | 0 b | 0 b | 10.20 Mb | 169.00 Kb | 1 +│├── 9 | 27.552us | 39.224us | 26.752us | 39.072us | 0 b | 0 b | 0 b | 0 b | 1 +│├── 10 | 138.752us | 531.703us | 173.056us | 661.568us | 0 b | 0 b | 7.08 Mb | 169.00 Kb | 1 +│├── 11 | 27.743us | 39.515us | 27.648us | 39.936us | 0 b | 0 b | 0 b | 0 b | 1 +│└── 12 | 60.333us | 133.099us | 59.392us | 116.768us | 0 b | 0 b | 324.00 Kb | 108.00 Kb | 1 +├── avgpool | 55.655us | 110.770us | 57.344us | 107.456us | 0 b | 0 b | 108.00 Kb | 36.00 Kb | 1 └── classifier | | | | | | | | | - ├── 0 | 78.037us | 165.510us | 76.896us | 142.432us | 0 b | 0 b | 171.00 Kb | 45.00 Kb | 1 - ├── 1 | 399.993us | 419.901us | 795.648us | 795.648us | 0 b | 0 b | 32.00 Kb | 16.00 Kb | 1 - ├── 2 | 29.937us | 43.122us | 29.664us | 42.944us | 0 b | 0 b | 0 b | 0 b | 1 - ├── 3 | 53.331us | 120.781us | 52.384us | 99.488us | 0 b | 0 b | 76.00 Kb | 20.00 Kb | 1 - ├── 4 | 64.231us | 79.479us | 232.448us | 232.448us | 0 b | 0 b | 32.00 Kb | 16.00 Kb | 1 - ├── 5 | 29.045us | 41.238us | 29.664us | 41.952us | 0 b | 0 b | 0 b | 0 b | 1 - └── 6 | 63.289us | 78.356us | 97.280us | 97.280us | 0 b | 0 b | 8.00 Kb | 4.00 Kb | 1 + ├── 0 | 77.746us | 165.089us | 77.696us | 144.064us | 0 b | 0 b | 171.00 Kb | 45.00 Kb | 1 + ├── 1 | 405.262us | 425.012us | 796.672us | 796.672us | 0 b | 0 b | 32.00 Kb | 16.00 Kb | 1 + ├── 2 | 29.455us | 42.329us | 29.472us | 42.976us | 0 b | 0 b | 0 b | 0 b | 1 + ├── 3 | 53.601us | 120.870us | 53.248us | 99.328us | 0 b | 0 b | 76.00 Kb | 20.00 Kb | 1 + ├── 4 | 63.981us | 79.811us | 232.448us | 232.448us | 0 b | 0 b | 32.00 Kb | 16.00 Kb | 1 + ├── 5 | 27.853us | 39.445us | 27.648us | 40.928us | 0 b | 0 b | 0 b | 0 b | 1 + └── 6 | 61.656us | 76.714us | 96.256us | 96.256us | 0 b | 0 b | 8.00 Kb | 4.00 Kb | 1 ``` To see the low level operations that occur within each layer, print the contents of `prof.display(show_events=True)`. @@ -63,20 +63,20 @@ Module | Self CPU total | CPU total | Self CUDA tot AlexNet | | | | | | | | | ├── features | | | | | | | | | │├── 0 | | | | | | | | | -││├── aten::conv2d | 16.481us | 1.808ms | 14.368us | 1.807ms | 0 b | 0 b | 756.50 Kb | 0 b | 1 -││├── aten::convolution | 10.450us | 1.792ms | 10.880us | 1.792ms | 0 b | 0 b | 756.50 Kb | 0 b | 1 -││├── aten::_convolution | 41.480us | 1.781ms | 34.240us | 1.782ms | 0 b | 0 b | 756.50 Kb | 0 b | 1 -││├── aten::contiguous | 2.514us | 2.514us | 2.304us | 2.304us | 0 b | 0 b | 0 b | 0 b | 1 -││├── aten::cudnn_convolution | 1.619ms | 1.657ms | 1.718ms | 1.723ms | 0 b | 0 b | 756.50 Kb | -18.00 Kb | 1 -││├── aten::empty | 9.859us | 9.859us | 0.000us | 0.000us | 0 b | 0 b | 18.00 Kb | 18.00 Kb | 1 -││├── aten::resize_ | 0.410us | 0.410us | 0.000us | 0.000us | 0 b | 0 b | 0 b | 0 b | 1 -││├── aten::stride | 1.773us | 1.773us | 0.000us | 0.000us | 0 b | 0 b | 0 b | 0 b | 4 -││├── aten::reshape | 6.101us | 17.853us | 1.024us | 1.024us | 0 b | 0 b | 0 b | 0 b | 1 -││├── aten::view | 11.752us | 11.752us | 0.000us | 0.000us | 0 b | 0 b | 0 b | 0 b | 1 -││└── aten::add_ | 61.024us | 61.024us | 19.456us | 19.456us | 0 b | 0 b | 0 b | 0 b | 1 +││├── aten::conv2d | 15.779us | 1.831ms | 14.336us | 1.830ms | 0 b | 0 b | 756.50 Kb | 0 b | 1 +││├── aten::convolution | 10.139us | 1.815ms | 8.512us | 1.816ms | 0 b | 0 b | 756.50 Kb | 0 b | 1 +││├── aten::_convolution | 45.115us | 1.805ms | 36.288us | 1.808ms | 0 b | 0 b | 756.50 Kb | 0 b | 1 +││├── aten::contiguous | 8.586us | 8.586us | 8.160us | 8.160us | 0 b | 0 b | 0 b | 0 b | 3 +││├── aten::cudnn_convolution | 1.646ms | 1.682ms | 1.745ms | 1.749ms | 0 b | 0 b | 756.50 Kb | -18.00 Kb | 1 +││├── aten::empty | 21.821us | 21.821us | 0.000us | 0.000us | 0 b | 0 b | 774.50 Kb | 774.50 Kb | 2 +││├── aten::resize_ | 7.324us | 7.324us | 0.000us | 0.000us | 0 b | 0 b | 0 b | 0 b | 2 +││├── aten::stride | 2.073us | 2.073us | 0.000us | 0.000us | 0 b | 0 b | 0 b | 0 b | 4 +││├── aten::reshape | 5.701us | 17.603us | 1.056us | 1.056us | 0 b | 0 b | 0 b | 0 b | 1 +││├── aten::view | 11.902us | 11.902us | 0.000us | 0.000us | 0 b | 0 b | 0 b | 0 b | 1 +││└── aten::add_ | 56.837us | 56.837us | 17.408us | 17.408us | 0 b | 0 b | 0 b | 0 b | 1 │├── 1 | | | | | | | | | -││├── aten::relu_ | 27.020us | 49.693us | 25.696us | 49.152us | 0 b | 0 b | 0 b | 0 b | 1 -││└── aten::threshold_ | 22.673us | 22.673us | 23.456us | 23.456us | 0 b | 0 b | 0 b | 0 b | 1 +││├── aten::relu_ | 24.586us | 46.768us | 23.424us | 46.976us | 0 b | 0 b | 0 b | 0 b | 1 +││└── aten::threshold_ | 22.182us | 22.182us | 23.552us | 23.552us | 0 b | 0 b | 0 b | 0 b | 1 │├── 2 | | | | | | | | | ... ``` @@ -95,27 +95,27 @@ print(event_lists_dict[trace[2].path][0]) --------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg CPU Mem Self CPU Mem CUDA Mem Self CUDA Mem # of Calls --------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - aten::conv2d 0.91% 16.481us 100.00% 1.808ms 1.808ms 14.368us 0.80% 1.807ms 1.807ms 0 b 0 b 756.50 Kb 0 b 1 - aten::convolution 0.58% 10.450us 99.09% 1.792ms 1.792ms 10.880us 0.60% 1.792ms 1.792ms 0 b 0 b 756.50 Kb 0 b 1 - aten::_convolution 2.29% 41.480us 98.51% 1.781ms 1.781ms 34.240us 1.90% 1.782ms 1.782ms 0 b 0 b 756.50 Kb 0 b 1 - aten::contiguous 0.21% 3.817us 0.21% 3.817us 3.817us 3.680us 0.20% 3.680us 3.680us 0 b 0 b 0 b 0 b 1 - aten::cudnn_convolution 89.53% 1.619ms 91.64% 1.657ms 1.657ms 1.718ms 95.09% 1.723ms 1.723ms 0 b 0 b 756.50 Kb -18.00 Kb 1 - aten::empty 0.73% 13.125us 0.73% 13.125us 13.125us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 756.50 Kb 756.50 Kb 1 - aten::contiguous 0.15% 2.745us 0.15% 2.745us 2.745us 2.720us 0.15% 2.720us 2.720us 0 b 0 b 0 b 0 b 1 - aten::resize_ 0.43% 7.835us 0.43% 7.835us 7.835us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 - aten::contiguous 0.14% 2.514us 0.14% 2.514us 2.514us 2.304us 0.13% 2.304us 2.304us 0 b 0 b 0 b 0 b 1 - aten::resize_ 0.02% 0.410us 0.02% 0.410us 0.410us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 - aten::stride 0.05% 0.982us 0.05% 0.982us 0.982us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 - aten::stride 0.02% 0.281us 0.02% 0.281us 0.281us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 - aten::stride 0.01% 0.260us 0.01% 0.260us 0.260us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 - aten::stride 0.01% 0.250us 0.01% 0.250us 0.250us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 - aten::empty 0.55% 9.859us 0.55% 9.859us 9.859us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 18.00 Kb 18.00 Kb 1 - aten::reshape 0.34% 6.101us 0.99% 17.853us 17.853us 1.024us 0.06% 1.024us 1.024us 0 b 0 b 0 b 0 b 1 - aten::view 0.65% 11.752us 0.65% 11.752us 11.752us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 - aten::add_ 3.37% 61.024us 3.37% 61.024us 61.024us 19.456us 1.08% 19.456us 19.456us 0 b 0 b 0 b 0 b 1 + aten::conv2d 0.86% 15.779us 100.00% 1.831ms 1.831ms 14.336us 0.78% 1.830ms 1.830ms 0 b 0 b 756.50 Kb 0 b 1 + aten::convolution 0.55% 10.139us 99.14% 1.815ms 1.815ms 8.512us 0.47% 1.816ms 1.816ms 0 b 0 b 756.50 Kb 0 b 1 + aten::_convolution 2.46% 45.115us 98.58% 1.805ms 1.805ms 36.288us 1.98% 1.808ms 1.808ms 0 b 0 b 756.50 Kb 0 b 1 + aten::contiguous 0.20% 3.697us 0.20% 3.697us 3.697us 3.616us 0.20% 3.616us 3.616us 0 b 0 b 0 b 0 b 1 + aten::cudnn_convolution 89.88% 1.646ms 91.85% 1.682ms 1.682ms 1.745ms 95.31% 1.749ms 1.749ms 0 b 0 b 756.50 Kb -18.00 Kb 1 + aten::empty 0.67% 12.313us 0.67% 12.313us 12.313us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 756.50 Kb 756.50 Kb 1 + aten::contiguous 0.14% 2.575us 0.14% 2.575us 2.575us 2.464us 0.13% 2.464us 2.464us 0 b 0 b 0 b 0 b 1 + aten::resize_ 0.37% 6.843us 0.37% 6.843us 6.843us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 + aten::contiguous 0.13% 2.314us 0.13% 2.314us 2.314us 2.080us 0.11% 2.080us 2.080us 0 b 0 b 0 b 0 b 1 + aten::resize_ 0.03% 0.481us 0.03% 0.481us 0.481us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 + aten::stride 0.07% 1.203us 0.07% 1.203us 1.203us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 + aten::stride 0.02% 0.300us 0.02% 0.300us 0.300us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 + aten::stride 0.02% 0.290us 0.02% 0.290us 0.290us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 + aten::stride 0.02% 0.280us 0.02% 0.280us 0.280us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 + aten::empty 0.52% 9.508us 0.52% 9.508us 9.508us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 18.00 Kb 18.00 Kb 1 + aten::reshape 0.31% 5.701us 0.96% 17.603us 17.603us 1.056us 0.06% 1.056us 1.056us 0 b 0 b 0 b 0 b 1 + aten::view 0.65% 11.902us 0.65% 11.902us 11.902us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1 + aten::add_ 3.10% 56.837us 3.10% 56.837us 56.837us 17.408us 0.95% 17.408us 17.408us 0 b 0 b 0 b 0 b 1 --------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 1.808ms -CUDA time total: 1.807ms +Self CPU time total: 1.831ms +CUDA time total: 1.830ms ``` @@ -171,13 +171,13 @@ AlexNet | | | If this software is useful to your research, I would greatly appreciate a citation in your work. ```tex -@misc{torchprof, - author = {Alexander William Wong}, +@misc{awwong1-torchprof, title = {torchprof}, - howpublished = {github.com}, - month = 4, + author = {Alexander William Wong}, + month = 12, year = 2020, - note = {A minimal dependency library for layer-by-layer profiling of Pytorch models.} + url = {https://github.com/awwong1/torchprof} + note = {https://github.com/awwong1/torchprof} } ``` diff --git a/torchprof/display.py b/torchprof/display.py index 968566c..5c3aaf5 100644 --- a/torchprof/display.py +++ b/torchprof/display.py @@ -1,5 +1,4 @@ from collections import OrderedDict, namedtuple -from itertools import groupby import torch.autograd.profiler as torch_profiler @@ -118,6 +117,16 @@ def _format_measure_tuple(measure): ) +def group_by(events, keyfn): + event_groups = OrderedDict() + for event in events: + key = keyfn(event) + key_events = event_groups.get(key, []) + key_events.append(event) + event_groups[key] = key_events + return event_groups.items() + + def traces_to_display( traces, trace_events, @@ -144,7 +153,7 @@ def traces_to_display( ): # tree measurements have key None, avoiding name conflict if show_events: - for event_name, event_group in groupby(events, lambda e: e.name): + for event_name, event_group in group_by(events, lambda e: e.name): event_group = list(event_group) current_tree[name][event_name] = { None: _build_measure_tuple(event_group, len(event_group))