From e7f35d494fee4af08d1e0a65d1f3ebcc5865154e Mon Sep 17 00:00:00 2001 From: setuc Date: Mon, 15 Apr 2019 20:33:58 +0800 Subject: [PATCH 01/15] 1.70637 hour run on Azure NDv2 with sgd on Resnet-50 --- ImageNet/train/setu_resnet50_azure_ndv2.json | 41 ++++++++++++++++++++ ImageNet/train/setu_resnet50_azure_ndv2.tsv | 28 +++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 ImageNet/train/setu_resnet50_azure_ndv2.json create mode 100644 ImageNet/train/setu_resnet50_azure_ndv2.tsv diff --git a/ImageNet/train/setu_resnet50_azure_ndv2.json b/ImageNet/train/setu_resnet50_azure_ndv2.json new file mode 100644 index 0000000..2002a85 --- /dev/null +++ b/ImageNet/train/setu_resnet50_azure_ndv2.json @@ -0,0 +1,41 @@ +{ + "version": "v1.0", + "author": "Setu Chokshi", + "authorEmail": "setuc@hotmail.com", + "framework": "PyTorch 1.0", + "codeURL": "https://github.com/diux-dev/cluster/blob/master/dawn/resnet.b512.baseline.py", + "model": "ResNet50", + "hardware": "Azure ND40s_v2", + "timestamp": "2019-04-13", + "costPerHour": 12.24, + "optimizer": "SGD with Momentum", + "momentum": 0.9, + "misc": { + "schedule": [{ + "learning_rate": 1.0, + "epochs": 1 + }, + { + "learning_rate": 2.0, + "epochs": 7 + }, + { + "learning_rate": 0.5, + "epochs": 5 + }, + { + "learning_rate": 0.4375, + "epochs": 9 + }, + { + "learning_rate": 0.004375, + "epochs": 2 + }, + { + "learning_rate": 0.0025, + "epochs": 3 + } + ] + }, + "usedBlacklist": true +} diff --git a/ImageNet/train/setu_resnet50_azure_ndv2.tsv b/ImageNet/train/setu_resnet50_azure_ndv2.tsv new file mode 100644 index 0000000..34dfdf8 --- /dev/null +++ b/ImageNet/train/setu_resnet50_azure_ndv2.tsv @@ -0,0 +1,28 @@ +epoch hours top1 top5 +0 0.04286 7.586 19.566 +1 0.06806 17.558 37.538 +2 0.09313 27.152 51.258 +3 0.11811 31.392 57.04 +4 0.14323 27.774 52.056 +5 0.16837 34.108 60.296 +6 0.19342 35.288 61.202 +7 0.21853 37.756 64.208 +8 0.24371 41.028 66.976 +9 0.26875 47.132 73.112 +10 0.2938 46.32 71.77 +11 0.3189 53.772 78.156 +12 0.34431 59.744 82.866 +13 0.5025 56.046 80.606 +14 0.57292 56.53 80.924 +15 0.64316 56.45 80.916 +16 0.7134 60.088 83.382 +17 0.78371 61.03 83.604 +18 0.85462 64.588 86.498 +19 0.9249 63.21 85.43 +20 0.99556 68.418 88.744 +21 1.0662 71.228 90.216 +22 1.13674 71.932 90.566 +23 1.20724 72.892 91.09 +24 1.27757 73.634 91.484 +25 1.58089 75.758 92.978 +26 1.70637 75.89 93.02 From 8de5a46c6c9efc8297e8d3eede5bbd5f81573303 Mon Sep 17 00:00:00 2001 From: Setu Chokshi Date: Mon, 15 Apr 2019 20:49:47 +0800 Subject: [PATCH 02/15] Update setu_resnet50_azure_ndv2.tsv Fix the TSV file header --- ImageNet/train/setu_resnet50_azure_ndv2.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ImageNet/train/setu_resnet50_azure_ndv2.tsv b/ImageNet/train/setu_resnet50_azure_ndv2.tsv index 34dfdf8..908232e 100644 --- a/ImageNet/train/setu_resnet50_azure_ndv2.tsv +++ b/ImageNet/train/setu_resnet50_azure_ndv2.tsv @@ -1,4 +1,4 @@ -epoch hours top1 top5 +epoch hours top1Accuracy top5Accuracy 0 0.04286 7.586 19.566 1 0.06806 17.558 37.538 2 0.09313 27.152 51.258 From f9256dd5d60d5b1cf8fbfd4d0efbef440b23a170 Mon Sep 17 00:00:00 2001 From: Setu Chokshi Date: Mon, 15 Apr 2019 20:57:50 +0800 Subject: [PATCH 03/15] Update setu_resnet50_azure_ndv2.json Fix the code URL --- ImageNet/train/setu_resnet50_azure_ndv2.json | 1 - 1 file changed, 1 deletion(-) diff --git a/ImageNet/train/setu_resnet50_azure_ndv2.json b/ImageNet/train/setu_resnet50_azure_ndv2.json index 2002a85..16b235b 100644 --- a/ImageNet/train/setu_resnet50_azure_ndv2.json +++ b/ImageNet/train/setu_resnet50_azure_ndv2.json @@ -3,7 +3,6 @@ "author": "Setu Chokshi", "authorEmail": "setuc@hotmail.com", "framework": "PyTorch 1.0", - "codeURL": "https://github.com/diux-dev/cluster/blob/master/dawn/resnet.b512.baseline.py", "model": "ResNet50", "hardware": "Azure ND40s_v2", "timestamp": "2019-04-13", From 7e0dfc079292f97fea6f70c38349e318869ca49c Mon Sep 17 00:00:00 2001 From: Ajay Arasanipalai Date: Tue, 16 Apr 2019 10:44:18 +0530 Subject: [PATCH 04/15] Add files via upload --- CIFAR10/train/ajay_resnet9_1v100_log.tsv | 25 ++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 CIFAR10/train/ajay_resnet9_1v100_log.tsv diff --git a/CIFAR10/train/ajay_resnet9_1v100_log.tsv b/CIFAR10/train/ajay_resnet9_1v100_log.tsv new file mode 100644 index 0000000..7ad0c8e --- /dev/null +++ b/CIFAR10/train/ajay_resnet9_1v100_log.tsv @@ -0,0 +1,25 @@ +epoch hours top1Accuracy +1 0.00214595 46.34 +2 0.00304084 69.12 +3 0.00393691 68.75 +4 0.00476100 75.63 +5 0.00558586 78.06 +6 0.00641432 81.45 +7 0.00724628 83.23 +8 0.00807638 83.07 +9 0.00888925 84.97 +10 0.00969845 86.18 +11 0.01051212 83.69 +12 0.01132872 81.33 +13 0.01214695 86.69 +14 0.01296518 86.28 +15 0.01380158 87.65 +16 0.01462798 87.80 +17 0.01544107 87.78 +18 0.01625558 87.48 +19 0.01707385 91.43 +20 0.01789114 90.94 +21 0.01871133 92.11 +22 0.01952776 92.97 +23 0.02034553 93.79 +24 0.02116915 94.08 \ No newline at end of file From 584a535766600aaed38fe69096cd22736a623bb3 Mon Sep 17 00:00:00 2001 From: Ajay Arasanipalai Date: Tue, 16 Apr 2019 10:51:56 +0530 Subject: [PATCH 05/15] Create ajay_resnet9_1v100_pytorch.json --- CIFAR10/train/ajay_resnet9_1v100_pytorch.json | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 CIFAR10/train/ajay_resnet9_1v100_pytorch.json diff --git a/CIFAR10/train/ajay_resnet9_1v100_pytorch.json b/CIFAR10/train/ajay_resnet9_1v100_pytorch.json new file mode 100644 index 0000000..851d9d4 --- /dev/null +++ b/CIFAR10/train/ajay_resnet9_1v100_pytorch.json @@ -0,0 +1,25 @@ +{ + "version": "v1.0", + "author": "Ajay Uppili Arasanipalai, Elliptigon", + "authorEmail": "ajay@elliptigon.com", + "framework": "pytorch 0.4.0", + "codeURL": "https://github.com/iyaja/cifar10-fast-2", + "model": "Custom ResNet 9", + "hardware": "V100 (Nimbix np9g1)", + "costPerHour": 1.68, + "timestamp": "2019-04-14", + "misc": { + "optimizer": "SGD with Nesterov Momentum", + "momentum": 0.9, + "weightDecay": 5e-4, + "batchSize": 512, + "learningRate": { + "type": "piecewiseLinear", + "epochs": [0, 5, 24], + "values": [0, 0.4, 0] + }, + "CUDA version": "9.0", + "CuDNN version": "7.1.2", + "commandLine" : "python dawn.py" + } +} From 92cffe79eb0aaffb89f41b6ea14bee4c311d3dd1 Mon Sep 17 00:00:00 2001 From: Ajay Arasanipalai Date: Tue, 16 Apr 2019 10:52:38 +0530 Subject: [PATCH 06/15] Delete ajay_resnet9_1v100_log.tsv --- CIFAR10/train/ajay_resnet9_1v100_log.tsv | 25 ------------------------ 1 file changed, 25 deletions(-) delete mode 100644 CIFAR10/train/ajay_resnet9_1v100_log.tsv diff --git a/CIFAR10/train/ajay_resnet9_1v100_log.tsv b/CIFAR10/train/ajay_resnet9_1v100_log.tsv deleted file mode 100644 index 7ad0c8e..0000000 --- a/CIFAR10/train/ajay_resnet9_1v100_log.tsv +++ /dev/null @@ -1,25 +0,0 @@ -epoch hours top1Accuracy -1 0.00214595 46.34 -2 0.00304084 69.12 -3 0.00393691 68.75 -4 0.00476100 75.63 -5 0.00558586 78.06 -6 0.00641432 81.45 -7 0.00724628 83.23 -8 0.00807638 83.07 -9 0.00888925 84.97 -10 0.00969845 86.18 -11 0.01051212 83.69 -12 0.01132872 81.33 -13 0.01214695 86.69 -14 0.01296518 86.28 -15 0.01380158 87.65 -16 0.01462798 87.80 -17 0.01544107 87.78 -18 0.01625558 87.48 -19 0.01707385 91.43 -20 0.01789114 90.94 -21 0.01871133 92.11 -22 0.01952776 92.97 -23 0.02034553 93.79 -24 0.02116915 94.08 \ No newline at end of file From f45ac8509c5b5824838976c6475914c300478b53 Mon Sep 17 00:00:00 2001 From: Ajay Arasanipalai Date: Tue, 16 Apr 2019 10:53:14 +0530 Subject: [PATCH 07/15] Add files via upload --- CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv | 25 ++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv diff --git a/CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv b/CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv new file mode 100644 index 0000000..7ad0c8e --- /dev/null +++ b/CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv @@ -0,0 +1,25 @@ +epoch hours top1Accuracy +1 0.00214595 46.34 +2 0.00304084 69.12 +3 0.00393691 68.75 +4 0.00476100 75.63 +5 0.00558586 78.06 +6 0.00641432 81.45 +7 0.00724628 83.23 +8 0.00807638 83.07 +9 0.00888925 84.97 +10 0.00969845 86.18 +11 0.01051212 83.69 +12 0.01132872 81.33 +13 0.01214695 86.69 +14 0.01296518 86.28 +15 0.01380158 87.65 +16 0.01462798 87.80 +17 0.01544107 87.78 +18 0.01625558 87.48 +19 0.01707385 91.43 +20 0.01789114 90.94 +21 0.01871133 92.11 +22 0.01952776 92.97 +23 0.02034553 93.79 +24 0.02116915 94.08 \ No newline at end of file From b1621b87d243820683ba600a30e962da65a7d202 Mon Sep 17 00:00:00 2001 From: Ajay Arasanipalai Date: Wed, 17 Apr 2019 11:48:45 +0530 Subject: [PATCH 08/15] Update ajay_resnet9_1v100_pytorch.json --- CIFAR10/train/ajay_resnet9_1v100_pytorch.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CIFAR10/train/ajay_resnet9_1v100_pytorch.json b/CIFAR10/train/ajay_resnet9_1v100_pytorch.json index 851d9d4..5e9b87f 100644 --- a/CIFAR10/train/ajay_resnet9_1v100_pytorch.json +++ b/CIFAR10/train/ajay_resnet9_1v100_pytorch.json @@ -1,7 +1,7 @@ { "version": "v1.0", - "author": "Ajay Uppili Arasanipalai, Elliptigon", - "authorEmail": "ajay@elliptigon.com", + "author": "Ajay Uppili Arasanipalai", + "authorEmail": "ajayuppili@gmail.com", "framework": "pytorch 0.4.0", "codeURL": "https://github.com/iyaja/cifar10-fast-2", "model": "Custom ResNet 9", From 8727ebe8623666690748f8645608d39a8ed85897 Mon Sep 17 00:00:00 2001 From: Ajay Arasanipalai Date: Wed, 17 Apr 2019 21:40:29 +0530 Subject: [PATCH 09/15] Update ajay_resnet9_1v100_pytorch.json --- CIFAR10/train/ajay_resnet9_1v100_pytorch.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CIFAR10/train/ajay_resnet9_1v100_pytorch.json b/CIFAR10/train/ajay_resnet9_1v100_pytorch.json index 5e9b87f..fffa1bc 100644 --- a/CIFAR10/train/ajay_resnet9_1v100_pytorch.json +++ b/CIFAR10/train/ajay_resnet9_1v100_pytorch.json @@ -2,10 +2,10 @@ "version": "v1.0", "author": "Ajay Uppili Arasanipalai", "authorEmail": "ajayuppili@gmail.com", - "framework": "pytorch 0.4.0", + "framework": "PowerAI 1.6.0 + PyTorch 1.0.1", "codeURL": "https://github.com/iyaja/cifar10-fast-2", "model": "Custom ResNet 9", - "hardware": "V100 (Nimbix np9g1)", + "hardware": "IBM AC922 + Nvidia Tesla V100 (Nimbix np9g1)", "costPerHour": 1.68, "timestamp": "2019-04-14", "misc": { From 93b93126fa7ebebb5adc48f9a04892c7c7f9c13e Mon Sep 17 00:00:00 2001 From: Ajay Arasanipalai Date: Wed, 17 Apr 2019 22:13:27 +0530 Subject: [PATCH 10/15] Delete ajay_resnet9_1v100_pytorch.tsv --- CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv | 25 -------------------- 1 file changed, 25 deletions(-) delete mode 100644 CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv diff --git a/CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv b/CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv deleted file mode 100644 index 7ad0c8e..0000000 --- a/CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv +++ /dev/null @@ -1,25 +0,0 @@ -epoch hours top1Accuracy -1 0.00214595 46.34 -2 0.00304084 69.12 -3 0.00393691 68.75 -4 0.00476100 75.63 -5 0.00558586 78.06 -6 0.00641432 81.45 -7 0.00724628 83.23 -8 0.00807638 83.07 -9 0.00888925 84.97 -10 0.00969845 86.18 -11 0.01051212 83.69 -12 0.01132872 81.33 -13 0.01214695 86.69 -14 0.01296518 86.28 -15 0.01380158 87.65 -16 0.01462798 87.80 -17 0.01544107 87.78 -18 0.01625558 87.48 -19 0.01707385 91.43 -20 0.01789114 90.94 -21 0.01871133 92.11 -22 0.01952776 92.97 -23 0.02034553 93.79 -24 0.02116915 94.08 \ No newline at end of file From 61f5c1113b2cc4328a15863f664470cd31dbeba3 Mon Sep 17 00:00:00 2001 From: Ajay Arasanipalai Date: Wed, 17 Apr 2019 22:14:52 +0530 Subject: [PATCH 11/15] Add latest results --- CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv | 25 ++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv diff --git a/CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv b/CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv new file mode 100644 index 0000000..b593797 --- /dev/null +++ b/CIFAR10/train/ajay_resnet9_1v100_pytorch.tsv @@ -0,0 +1,25 @@ +epoch hours top1Accuracy +1 0.00162239 56.39 +2 0.00244334 71.19 +3 0.00325813 72.37 +4 0.00407593 76.11 +5 0.00489309 78.21 +6 0.00571937 72.58 +7 0.00653693 79.36 +8 0.00735919 80.13 +9 0.00817624 82.22 +10 0.00899263 84.93 +11 0.00980781 85.10 +12 0.01062468 86.27 +13 0.01144059 85.79 +14 0.01225792 87.94 +15 0.01307402 85.02 +16 0.01390910 89.27 +17 0.01473466 88.72 +18 0.01555417 89.18 +19 0.01637740 89.46 +20 0.01719761 91.16 +21 0.01802300 89.29 +22 0.01884320 92.86 +23 0.01966557 93.60 +24 0.02048723 94.06 \ No newline at end of file From 106d5cbcfe94976417d62b3f484caed9236398b2 Mon Sep 17 00:00:00 2001 From: Ajay Arasanipalai Date: Thu, 18 Apr 2019 00:03:14 +0530 Subject: [PATCH 12/15] Add commit hash --- CIFAR10/train/ajay_resnet9_1v100_pytorch.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CIFAR10/train/ajay_resnet9_1v100_pytorch.json b/CIFAR10/train/ajay_resnet9_1v100_pytorch.json index fffa1bc..5b74f67 100644 --- a/CIFAR10/train/ajay_resnet9_1v100_pytorch.json +++ b/CIFAR10/train/ajay_resnet9_1v100_pytorch.json @@ -3,7 +3,7 @@ "author": "Ajay Uppili Arasanipalai", "authorEmail": "ajayuppili@gmail.com", "framework": "PowerAI 1.6.0 + PyTorch 1.0.1", - "codeURL": "https://github.com/iyaja/cifar10-fast-2", + "codeURL": "https://github.com/iyaja/cifar10-fast-2/commit/eb3c84003d8ff8329214a71023fac81475202e28", "model": "Custom ResNet 9", "hardware": "IBM AC922 + Nvidia Tesla V100 (Nimbix np9g1)", "costPerHour": 1.68, From 0b04ec71a4b6cdd666f00a9b106f39f6a77fcd58 Mon Sep 17 00:00:00 2001 From: Setu Chokshi Date: Fri, 19 Apr 2019 18:16:23 +0800 Subject: [PATCH 13/15] Rename to appropriate filenames --- ...50_azure_ndv2.json => setu_resnet50_azure-ndv2_pytorch.json} | 2 +- ...et50_azure_ndv2.tsv => setu_resnet50_azure-ndv2_pytorch.tsv} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename ImageNet/train/{setu_resnet50_azure_ndv2.json => setu_resnet50_azure-ndv2_pytorch.json} (91%) rename ImageNet/train/{setu_resnet50_azure_ndv2.tsv => setu_resnet50_azure-ndv2_pytorch.tsv} (100%) diff --git a/ImageNet/train/setu_resnet50_azure_ndv2.json b/ImageNet/train/setu_resnet50_azure-ndv2_pytorch.json similarity index 91% rename from ImageNet/train/setu_resnet50_azure_ndv2.json rename to ImageNet/train/setu_resnet50_azure-ndv2_pytorch.json index 16b235b..49544fa 100644 --- a/ImageNet/train/setu_resnet50_azure_ndv2.json +++ b/ImageNet/train/setu_resnet50_azure-ndv2_pytorch.json @@ -1,6 +1,6 @@ { "version": "v1.0", - "author": "Setu Chokshi", + "author": "SSetu Chokshi (MS AI MVP | PropertyGuru)", "authorEmail": "setuc@hotmail.com", "framework": "PyTorch 1.0", "model": "ResNet50", diff --git a/ImageNet/train/setu_resnet50_azure_ndv2.tsv b/ImageNet/train/setu_resnet50_azure-ndv2_pytorch.tsv similarity index 100% rename from ImageNet/train/setu_resnet50_azure_ndv2.tsv rename to ImageNet/train/setu_resnet50_azure-ndv2_pytorch.tsv From 7e739465c3335a28f43ac45086a2ebf9716e09eb Mon Sep 17 00:00:00 2001 From: Setu Chokshi Date: Fri, 19 Apr 2019 18:18:13 +0800 Subject: [PATCH 14/15] Fix name --- ImageNet/train/setu_resnet50_azure-ndv2_pytorch.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ImageNet/train/setu_resnet50_azure-ndv2_pytorch.json b/ImageNet/train/setu_resnet50_azure-ndv2_pytorch.json index 49544fa..8bb7e19 100644 --- a/ImageNet/train/setu_resnet50_azure-ndv2_pytorch.json +++ b/ImageNet/train/setu_resnet50_azure-ndv2_pytorch.json @@ -1,6 +1,6 @@ { "version": "v1.0", - "author": "SSetu Chokshi (MS AI MVP | PropertyGuru)", + "author": "Setu Chokshi (MS AI MVP | PropertyGuru)", "authorEmail": "setuc@hotmail.com", "framework": "PyTorch 1.0", "model": "ResNet50", From f42e4f515f6e7430aebf47c870fa5f6c506dbc89 Mon Sep 17 00:00:00 2001 From: Ajay Arasanipalai Date: Sat, 20 Apr 2019 23:34:59 +0530 Subject: [PATCH 15/15] Update ajay_resnet9_1v100_pytorch.json --- CIFAR10/train/ajay_resnet9_1v100_pytorch.json | 1 - 1 file changed, 1 deletion(-) diff --git a/CIFAR10/train/ajay_resnet9_1v100_pytorch.json b/CIFAR10/train/ajay_resnet9_1v100_pytorch.json index 5b74f67..886219e 100644 --- a/CIFAR10/train/ajay_resnet9_1v100_pytorch.json +++ b/CIFAR10/train/ajay_resnet9_1v100_pytorch.json @@ -6,7 +6,6 @@ "codeURL": "https://github.com/iyaja/cifar10-fast-2/commit/eb3c84003d8ff8329214a71023fac81475202e28", "model": "Custom ResNet 9", "hardware": "IBM AC922 + Nvidia Tesla V100 (Nimbix np9g1)", - "costPerHour": 1.68, "timestamp": "2019-04-14", "misc": { "optimizer": "SGD with Nesterov Momentum",