Skip to content

Commit

Permalink
[add] profiler reports stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
mohsenMahmoodzadeh committed Apr 11, 2023
1 parent c09b0ff commit 57eb197
Show file tree
Hide file tree
Showing 26 changed files with 38,192 additions and 0 deletions.
15,030 changes: 15,030 additions & 0 deletions starter/ProfilerReports/benchmark/profiler-report.html

Large diffs are not rendered by default.

4,055 changes: 4,055 additions & 0 deletions starter/ProfilerReports/benchmark/profiler-report.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"RuleTriggered": 14, "Violations": 14, "Details": {"algo-1": {"cpu": {"p25": 39.54, "p50": 41.06125, "p75": 53.19499999999999, "p95": 76.699125, "upper": 73.67749999999998, "lower": 19.057500000000008}, "gpu0": {"p25": 0.0, "p50": 31.0, "p75": 44.0, "p95": 55.0, "upper": 56.0, "lower": 0.0}, "gpu0_memory": {"p25": 0.0, "p50": 22.0, "p75": 33.0, "p95": 41.0, "upper": 42.0, "lower": 0.0}}, "last_timestamp": 1681161660000000, "algo-2": {"cpu": {"p25": 39.619375, "p50": 41.208749999999995, "p75": 53.72, "p95": 80.23212499999998, "upper": 74.8709375, "lower": 18.468437499999997}, "gpu0": {"p25": 0.0, "p50": 31.0, "p75": 45.0, "p95": 55.0, "upper": 56.0, "lower": 0.0}, "gpu0_memory": {"p25": 0.0, "p50": 22.0, "p75": 33.0, "p95": 40.0, "upper": 41.0, "lower": 0.0}}}, "Datapoints": 1750, "RuleParameters": "cpu_threshold_p95:70\ngpu_threshold_p95:70\ngpu_memory_threshold_p95:70\npatience:1000\nwindow:500"}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"RuleTriggered": 1, "Violations": 11, "Details": {"pin_memory": false, "num_workers": 0, "cores": 4, "dataloaders": 1, "dataloading_time": {"p25": 0.073428, "p50": 0.078887, "p95": 0.09545139999999998, "probs": [13, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 15, 58, 211, 441, 713, 945, 1052, 1046, 898, 836, 687, 526, 340, 223, 129, 80, 54, 31, 13, 9, 10, 14, 11, 2, 4, 2, 0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], "binedges": [0.016458, 0.019209280000000002, 0.02196056, 0.024711840000000002, 0.02746312, 0.030214400000000002, 0.032965680000000004, 0.035716960000000006, 0.03846824, 0.04121952000000001, 0.043970800000000004, 0.046722080000000006, 0.04947336000000001, 0.05222464000000001, 0.054975920000000005, 0.057727200000000006, 0.06047848000000001, 0.06322976000000001, 0.06598104000000002, 0.06873232000000001, 0.07148360000000001, 0.07423488, 0.07698616000000001, 0.07973744, 0.08248872000000002, 0.08524000000000001, 0.08799128000000002, 0.09074256000000001, 0.09349384000000001, 0.09624512000000002, 0.09899640000000001, 0.10174768000000002, 0.10449896000000002, 0.10725024000000001, 0.11000152000000002, 0.11275280000000001, 0.11550408000000002, 0.11825536000000002, 0.12100664000000001, 0.12375792000000002, 0.12650920000000002, 0.12926048, 0.13201176000000003, 0.13476304, 0.13751432000000002, 0.14026560000000002, 0.14301688, 0.14576816000000004, 0.14851944000000003, 0.15127072000000003, 0.15402200000000002, 0.15677328000000001, 0.15952456000000004, 0.16227584000000003, 0.16502712000000003, 0.16777840000000002, 0.17052968000000002, 0.17328096000000004, 0.17603224000000003, 0.17878352000000003, 0.18153480000000002, 0.18428608000000002, 0.18703736000000004, 0.18978864000000004, 0.19253992000000003, 0.19529120000000003, 0.19804248000000002, 0.20079376000000004, 0.20354504000000004, 0.20629632000000003, 0.20904760000000003, 0.21179888000000002, 0.21455016000000005, 0.21730144000000004, 0.22005272000000003, 0.22280400000000003, 0.22555528000000002, 0.22830656000000005, 0.23105784000000004, 0.23380912000000004, 0.23656040000000003, 0.23931168000000003, 0.24206296000000005, 0.24481424000000004, 0.24756552000000004, 0.2503168, 0.25306808000000003, 0.25581936000000005, 0.2585706400000001, 0.26132192000000004, 0.2640732, 0.26682448000000003, 0.26957576000000005, 0.2723270400000001, 0.2750783200000001, 0.2778296, 0.28058088000000003, 0.28333216000000006, 0.2860834400000001, 0.2888347200000001, 0.291586]}}, "Datapoints": 8373, "RuleParameters": "min_threshold:70\nmax_threshold:200"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"RuleTriggered": 0, "Violations": 0, "Details": {}, "Datapoints": 1751, "RuleParameters": "increase:5\npatience:1000\nwindow:10"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"RuleTriggered": 0, "Violations": 10, "Details": {"low_gpu_utilization": 1240, "bottlenecks": {"1681161733.500243": {"GPUs": 1, "CPUs": 1}, "1681161734.500247": {"GPUs": 1, "CPUs": 1}, "1681161808.00034": {"GPUs": 1, "CPUs": 1}, "1681161873.000234": {"GPUs": 1, "CPUs": 1}, "1681161874.00023": {"GPUs": 1, "CPUs": 1}, "1681161878.500251": {"GPUs": 1, "CPUs": 1}, "1681161880.500233": {"GPUs": 1, "CPUs": 1}, "1681161881.50023": {"GPUs": 1, "CPUs": 1}, "1681161882.000251": {"GPUs": 1, "CPUs": 1}, "1681161878.000332": {"GPUs": 1, "CPUs": 1}}}, "Datapoints": 3514, "RuleParameters": "threshold:50\nio_threshold:50\ngpu_threshold:10\npatience:1000"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"RuleTriggered": 0, "Violations": 0, "Details": {"algo-1": {"workloads": {"gpu0": [616, 2, 3, 3, 0, 0, 0, 1, 0, 0, 0, 0, 16, 27, 168, 75, 61, 66, 72, 67, 71, 57, 61, 47, 61, 77, 61, 87, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}, "algo-2": {"workloads": {"gpu0": [608, 1, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 17, 9, 171, 81, 70, 78, 53, 56, 59, 67, 62, 69, 83, 79, 50, 63, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}}, "Datapoints": 1751, "RuleParameters": "threshold:0.2\npatience:1000"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"RuleTriggered": 14, "Violations": 14, "Details": {"algo-1": {"gpu0": {"gpu_max": 56.0, "gpu_95": 55.0, "gpu_5": 0.0, "p25": 0.0, "p50": 31.0, "p75": 44.0, "p95": 55.0, "upper": 56.0, "lower": 0.0}}, "last_timestamp": 1681162500000000, "algo-2": {"gpu0": {"gpu_max": 56.0, "gpu_95": 56.0, "gpu_5": 0.0, "p25": 0.0, "p50": 31.0, "p75": 45.0, "p95": 55.0, "upper": 56.0, "lower": 0.0}}}, "Datapoints": 1751, "RuleParameters": "threshold_p95:70\nthreshold_p5:10\nwindow:500\npatience:1000"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"RuleTriggered": 0, "Violations": 0, "Details": {"step_num": {}, "job_start": 1681161683.500177, "job_end": 1681162559.500965}, "Datapoints": 0, "RuleParameters": "threshold:20"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"RuleTriggered": 0, "Violations": 0, "Details": {"ratio": {}, "phase": {}, "phase_time": {}, "general": {"DataLoaderIterInitialize": 0.0013649255513454284, "DataLoaderIter": 99.99863507444864}}, "Datapoints": 0, "RuleParameters": ""}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"RuleTriggered": 0, "Violations": 0, "Details": {"Network": {"algo-1": {"max": 93836168.6, "p99": 52.28, "p95": 0, "p50": 0, "min": 0}, "algo-2": {"max": 97595785.97, "p99": 0, "p95": 0, "p50": 0, "min": 0}}, "GPU": {"algo-1": {"max": 56.0, "p99": 56.0, "p95": 55.0, "p50": 31.0, "min": 0}, "algo-2": {"max": 56.0, "p99": 56.0, "p95": 55.0, "p50": 31.0, "min": 0}}, "CPU": {"algo-1": {"max": 100.0, "p99": 96.7, "p95": 76.7, "p50": 41.06, "min": 0.49}, "algo-2": {"max": 100.0, "p99": 98.49, "p95": 80.23, "p50": 41.21, "min": 4.6}}, "CPU memory": {"algo-1": {"max": 33.06, "p99": 32.42, "p95": 31.57, "p50": 29.77, "min": 4.4}, "algo-2": {"max": 32.57, "p99": 31.69, "p95": 31.06, "p50": 29.71, "min": 4.46}}, "GPU memory": {"algo-1": {"max": 42.0, "p99": 41.0, "p95": 41.0, "p50": 22.0, "min": 0}, "algo-2": {"max": 41.0, "p99": 41.0, "p95": 40.0, "p50": 22.0, "min": 0}}, "I/O": {"algo-1": {"max": 38.15, "p99": 26.48, "p95": 18.68, "p50": 0, "min": 0}, "algo-2": {"max": 39.47, "p99": 27.54, "p95": 18.51, "p50": 0, "min": 0}}}, "Datapoints": 1752, "RuleParameters": ""}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"RuleTriggered": 0, "Violations": 0, "Details": {"step_details": {}}, "Datapoints": 0, "RuleParameters": "threshold:3\nmode:None\nn_outliers:10\nstddev:3"}
Loading

0 comments on commit 57eb197

Please sign in to comment.