From a05dcc8b41f5b84f2d8ac00afed305b4f776f28c Mon Sep 17 00:00:00 2001
From: Fan <fan@Fans-MacBook-Pro.local>
Date: Mon, 27 Feb 2023 13:19:48 -0500
Subject: [PATCH 1/2] Fix legacy import

---
 examples/notebook/fedscale_demo_client.ipynb | 235 ++++----------
 examples/notebook/fedscale_demo_server.ipynb | 325 +++----------------
 2 files changed, 104 insertions(+), 456 deletions(-)

diff --git a/examples/notebook/fedscale_demo_client.ipynb b/examples/notebook/fedscale_demo_client.ipynb
index f2ce4e8d..c30cd55a 100644
--- a/examples/notebook/fedscale_demo_client.ipynb
+++ b/examples/notebook/fedscale_demo_client.ipynb
@@ -26,14 +26,30 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "(10-05) 10:56:27 INFO     [executor.py:61] (EXECUTOR:1) is setting up environ ...\n"
+      "(02-27) 12:24:33 INFO     [fllibs.py:97] Initializing the model ...\n",
+      "(02-27) 12:24:33 INFO     [executor.py:75] (EXECUTOR:1) is setting up environ ...\n",
+      "0.0%"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Files already downloaded and verified\n",
+      "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /Users/fan/cifar10/cifar-10-python.tar.gz\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100.0%\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Extracting /Users/fan/cifar10/cifar-10-python.tar.gz to /Users/fan/cifar10/\n",
       "Files already downloaded and verified\n"
      ]
     },
@@ -41,174 +57,41 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "(10-05) 10:56:29 INFO     [executor.py:117] Data partitioner starts ...\n",
-      "(10-05) 10:56:29 INFO     [divide_data.py:106] Randomly partitioning data, 50000 samples...\n",
-      "(10-05) 10:56:29 INFO     [divide_data.py:106] Randomly partitioning data, 10000 samples...\n",
-      "(10-05) 10:56:29 INFO     [executor.py:128] Data partitioner completes ...\n",
-      "(10-05) 10:56:29 INFO     [channel_context.py:21] %%%%%%%%%% Opening grpc connection to 127.0.0.1 %%%%%%%%%%\n",
-      "(10-05) 10:56:29 INFO     [executor.py:414] Start monitoring events ...\n",
-      "(10-05) 10:56:31 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 10:56:38 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 6.385307352168408, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:56:39 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 10:56:46 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 5.383798982996671, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:56:46 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 10:56:53 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 5.385428038255249, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:56:53 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 10:57:00 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 5.255024715696292, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:57:02 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 10:57:09 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 4.171052567362976, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:57:09 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 10:57:16 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 5.34112758911072, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:57:17 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 10:57:23 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 6.283451225568975, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:57:24 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 10:57:31 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 5.414917396137178, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:57:33 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 10:57:40 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 4.728196259173867, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:57:40 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 10:57:47 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 4.245132194605622, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:57:47 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 10:57:55 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 4.600091122132928, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:57:55 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 10:58:02 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 4.824014905330292, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:58:04 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 10:58:11 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 4.123896865647862, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:58:11 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 10:58:18 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 4.305955504712801, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:58:18 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 10:58:25 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 5.471528879133904, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:58:25 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 10:58:32 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 3.8462213926046678, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:58:53 INFO     [model_test_module.py:306] Rank 1: Test set: Average loss: 2.4887, Top-1 Accuracy: 1000.0/10000 (0.1), Top-5 Accuracy: 0.5142\n",
-      "(10-05) 10:58:53 INFO     [executor.py:376] After aggregation round 5, CumulTime 146.3842, eval_time 18.7353, test_loss 2.4887, test_accuracy 10.00%, test_5_accuracy 51.42% \n",
-      "\n",
-      "(10-05) 10:58:54 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 10:59:01 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 4.542471038460385, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:59:01 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 10:59:08 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 3.3273613208657293, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:59:09 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 10:59:15 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 4.458196635807365, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:59:16 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 10:59:22 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 3.2138776843070707, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:59:25 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 10:59:32 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 3.80448099041252, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:59:32 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 10:59:39 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 3.3631498863039644, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:59:39 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 10:59:46 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 3.59191329329352, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:59:46 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 10:59:53 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.848148092967869, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 10:59:56 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 11:00:03 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 2.872066663595638, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:00:03 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 11:00:10 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 3.5066577600041056, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:00:10 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 11:00:17 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 2.966638272894974, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:00:17 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 11:00:24 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.885736187690472, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:00:26 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 11:00:33 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 3.212822436472684, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:00:33 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 11:00:40 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 2.6508591739685192, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:00:40 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 11:00:47 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 2.887190611917579, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:00:47 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 11:00:54 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.704264756492174, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:00:57 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 11:01:04 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 3.152766867797803, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:01:04 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 11:01:11 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 2.4352049035450603, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:01:11 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 11:01:17 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 2.9751960506664665, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:01:18 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 11:01:24 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.7658717371408987, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:01:45 INFO     [model_test_module.py:306] Rank 1: Test set: Average loss: 2.3363, Top-1 Accuracy: 1766.0/10000 (0.1766), Top-5 Accuracy: 0.6799\n",
-      "(10-05) 11:01:45 INFO     [executor.py:376] After aggregation round 10, CumulTime 318.6031, eval_time 18.3159, test_loss 2.3363, test_accuracy 17.66%, test_5_accuracy 67.99% \n",
-      "\n",
-      "(10-05) 11:01:46 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 11:01:53 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 2.471355666196691, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:01:53 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 11:02:00 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 2.4356494496458128, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:02:00 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 11:02:07 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 2.576381177196222, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:02:07 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 11:02:15 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.3470000365460093, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:02:17 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 11:02:24 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 2.509514877440946, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:02:24 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 11:02:31 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 2.2464047509950236, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:02:31 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 11:02:38 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 2.4089918757961515, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:02:38 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 11:02:45 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.35296483668845, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:02:48 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 11:02:54 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 2.430718451410085, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:02:55 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 11:03:01 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 2.4424235744065412, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:03:02 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 11:03:09 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 2.3308809210055044, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:03:09 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 11:03:16 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.302724640744112, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:03:18 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 11:03:25 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 2.5397319132390055, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:03:25 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 11:03:32 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 2.447979369095947, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:03:33 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 11:03:39 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 2.412356183045489, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:03:39 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 11:03:46 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.2552800768384884, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:03:49 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 11:03:56 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 2.3954310684108044, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:03:56 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 11:04:02 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 2.356739756125249, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:04:03 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 11:04:10 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 2.3305684278581604, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:04:10 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 11:04:17 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.2867733053916672, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:04:38 INFO     [model_test_module.py:306] Rank 1: Test set: Average loss: 2.2923, Top-1 Accuracy: 2431.0/10000 (0.2431), Top-5 Accuracy: 0.7641\n",
-      "(10-05) 11:04:38 INFO     [executor.py:376] After aggregation round 15, CumulTime 491.1768, eval_time 18.6568, test_loss 2.2923, test_accuracy 24.31%, test_5_accuracy 76.41% \n",
+      "(02-27) 12:24:53 INFO     [executor.py:123] Data partitioner starts ...\n",
+      "(02-27) 12:24:53 INFO     [divide_data.py:105] Randomly partitioning data, 50000 samples...\n",
+      "(02-27) 12:24:53 INFO     [divide_data.py:105] Randomly partitioning data, 10000 samples...\n",
+      "(02-27) 12:24:53 INFO     [executor.py:134] Data partitioner completes ...\n",
+      "(02-27) 12:24:53 INFO     [channel_context.py:20] %%%%%%%%%% Opening grpc connection to 127.0.0.1 %%%%%%%%%%\n",
+      "(02-27) 12:24:53 INFO     [executor.py:372] Start monitoring events ...\n",
+      "(02-27) 12:26:30 INFO     [model_test_module.py:307] Rank 1: Test set: Average loss: 2.3027, Top-1 Accuracy: 1000.0/10000 (0.1), Top-5 Accuracy: 0.5\n",
+      "(02-27) 12:26:30 INFO     [torch_client.py:264] Test results: Eval_time 94.6244, test_loss 2.3027, test_accuracy 10.00%, test_5_accuracy 50.00% \n",
       "\n",
-      "(10-05) 11:04:39 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 11:04:46 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 2.3175615307159316, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:04:46 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 11:04:53 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 2.198226276340273, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:04:53 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 11:05:00 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 2.317745547848251, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:05:00 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 11:05:07 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.2811246385331883, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:05:10 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 11:05:16 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 2.200125896722598, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:05:17 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 11:05:23 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 2.2880915137374966, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:05:23 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 11:05:30 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 2.2059975263981326, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:05:30 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 11:05:37 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.34091048582892, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:05:40 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 11:05:46 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 2.149645458253341, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:05:47 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 11:05:54 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 2.113924674635463, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:05:54 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 11:06:01 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 2.2351507674495643, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:06:01 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 11:06:08 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.2538349455169575, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:06:10 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 11:06:17 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 2.1228796089115454, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:06:17 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 11:06:25 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 2.079776040856923, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:06:25 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 11:06:31 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 2.064791808439913, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:06:31 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 11:06:38 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.107674873224351, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:06:41 INFO     [client.py:32] Start to train (CLIENT: 1) ...\n",
-      "(10-05) 11:06:48 INFO     [client.py:67] Training of (CLIENT: 1) completes, {'clientId': 1, 'moving_loss': 2.1673746026503267, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:06:48 INFO     [client.py:32] Start to train (CLIENT: 2) ...\n",
-      "(10-05) 11:06:55 INFO     [client.py:67] Training of (CLIENT: 2) completes, {'clientId': 2, 'moving_loss': 2.0539934263054516, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:06:55 INFO     [client.py:32] Start to train (CLIENT: 3) ...\n",
-      "(10-05) 11:07:02 INFO     [client.py:67] Training of (CLIENT: 3) completes, {'clientId': 3, 'moving_loss': 2.0785532106098277, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:07:02 INFO     [client.py:32] Start to train (CLIENT: 4) ...\n",
-      "(10-05) 11:07:09 INFO     [client.py:67] Training of (CLIENT: 4) completes, {'clientId': 4, 'moving_loss': 2.1002533861294888, 'trained_size': 600, 'success': True}\n",
-      "(10-05) 11:07:10 INFO     [channel_context.py:33] %%%%%%%%%% Closing grpc connection to the aggregator %%%%%%%%%%\n"
+      "(02-27) 12:26:31 INFO     [torch_client.py:49] Start to train (CLIENT: 1) ...\n",
+      "(02-27) 12:27:33 INFO     [torch_client.py:84] Training of (CLIENT: 1) completes, {'client_id': 1, 'moving_loss': 6.559604595482319, 'trained_size': 600, 'success': True}\n",
+      "(02-27) 12:27:33 INFO     [torch_client.py:49] Start to train (CLIENT: 2) ...\n",
+      "(02-27) 12:28:36 INFO     [torch_client.py:84] Training of (CLIENT: 2) completes, {'client_id': 2, 'moving_loss': 5.693940820821615, 'trained_size': 600, 'success': True}\n",
+      "(02-27) 12:28:36 INFO     [torch_client.py:49] Start to train (CLIENT: 3) ...\n",
+      "(02-27) 12:29:37 INFO     [torch_client.py:84] Training of (CLIENT: 3) completes, {'client_id': 3, 'moving_loss': 5.515216224300269, 'trained_size': 600, 'success': True}\n",
+      "(02-27) 12:29:38 INFO     [torch_client.py:49] Start to train (CLIENT: 4) ...\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 15\u001b[0m\n\u001b[1;32m     13\u001b[0m parser\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39muse_cuda \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFalse\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     14\u001b[0m Demo_Executor \u001b[38;5;241m=\u001b[39m Executor(parser\u001b[38;5;241m.\u001b[39margs)\n\u001b[0;32m---> 15\u001b[0m \u001b[43mDemo_Executor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/anaconda3/envs/fedscale/lib/python3.10/site-packages/fedscale/cloud/execution/executor.py:144\u001b[0m, in \u001b[0;36mExecutor.run\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    142\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_sets, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtesting_sets \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minit_data()\n\u001b[1;32m    143\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msetup_communication()\n\u001b[0;32m--> 144\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevent_monitor\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/anaconda3/envs/fedscale/lib/python3.10/site-packages/fedscale/cloud/execution/executor.py:385\u001b[0m, in \u001b[0;36mExecutor.event_monitor\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    383\u001b[0m train_config[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m train_model\n\u001b[1;32m    384\u001b[0m train_config[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mclient_id\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m(train_config[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mclient_id\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m--> 385\u001b[0m client_id, train_res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrain_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    387\u001b[0m \u001b[38;5;66;03m# Upload model updates\u001b[39;00m\n\u001b[1;32m    388\u001b[0m future_call \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maggregator_communicator\u001b[38;5;241m.\u001b[39mstub\u001b[38;5;241m.\u001b[39mCLIENT_EXECUTE_COMPLETION\u001b[38;5;241m.\u001b[39mfuture(\n\u001b[1;32m    389\u001b[0m     job_api_pb2\u001b[38;5;241m.\u001b[39mCompleteRequest(client_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mstr\u001b[39m(client_id), executor_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexecutor_id,\n\u001b[1;32m    390\u001b[0m                                 event\u001b[38;5;241m=\u001b[39mcommons\u001b[38;5;241m.\u001b[39mUPLOAD_MODEL, status\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, msg\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m    391\u001b[0m                                 meta_result\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, data_result\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mserialize_response(train_res)\n\u001b[1;32m    392\u001b[0m                                 ))\n",
+      "File \u001b[0;32m~/anaconda3/envs/fedscale/lib/python3.10/site-packages/fedscale/cloud/execution/executor.py:204\u001b[0m, in \u001b[0;36mExecutor.Train\u001b[0;34m(self, config)\u001b[0m\n\u001b[1;32m    202\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object must be a non-null value in the training config.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    203\u001b[0m client_conf \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moverride_conf(train_config)\n\u001b[0;32m--> 204\u001b[0m train_res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining_handler\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    205\u001b[0m \u001b[43m    \u001b[49m\u001b[43mclient_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclient_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclient_conf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mmodel\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    207\u001b[0m \u001b[38;5;66;03m# Report execution completion meta information\u001b[39;00m\n\u001b[1;32m    208\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maggregator_communicator\u001b[38;5;241m.\u001b[39mstub\u001b[38;5;241m.\u001b[39mCLIENT_EXECUTE_COMPLETION(\n\u001b[1;32m    209\u001b[0m     job_api_pb2\u001b[38;5;241m.\u001b[39mCompleteRequest(\n\u001b[1;32m    210\u001b[0m         client_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mstr\u001b[39m(client_id), executor_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexecutor_id,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    213\u001b[0m     )\n\u001b[1;32m    214\u001b[0m )\n",
+      "File \u001b[0;32m~/anaconda3/envs/fedscale/lib/python3.10/site-packages/fedscale/cloud/execution/executor.py:309\u001b[0m, in \u001b[0;36mExecutor.training_handler\u001b[0;34m(self, client_id, conf, model)\u001b[0m\n\u001b[1;32m    303\u001b[0m client_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_sets \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mtask \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrl\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \\\n\u001b[1;32m    304\u001b[0m     select_dataset(client_id, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_sets,\n\u001b[1;32m    305\u001b[0m                    batch_size\u001b[38;5;241m=\u001b[39mconf\u001b[38;5;241m.\u001b[39mbatch_size, args\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs,\n\u001b[1;32m    306\u001b[0m                    collate_fn\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcollate_fn\n\u001b[1;32m    307\u001b[0m                    )\n\u001b[1;32m    308\u001b[0m client \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_client_trainer(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs)\n\u001b[0;32m--> 309\u001b[0m train_res \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    310\u001b[0m \u001b[43m    \u001b[49m\u001b[43mclient_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclient_data\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_adapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    312\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m train_res\n",
+      "File \u001b[0;32m~/anaconda3/envs/fedscale/lib/python3.10/site-packages/fedscale/cloud/execution/torch_client.py:71\u001b[0m, in \u001b[0;36mTorchClient.train\u001b[0;34m(self, client_data, model, conf)\u001b[0m\n\u001b[1;32m     69\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompleted_steps \u001b[38;5;241m<\u001b[39m conf\u001b[38;5;241m.\u001b[39mlocal_steps:\n\u001b[1;32m     70\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 71\u001b[0m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mclient_data\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptimizer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcriterion\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     72\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[1;32m     73\u001b[0m         error_type \u001b[38;5;241m=\u001b[39m ex\n",
+      "File \u001b[0;32m~/anaconda3/envs/fedscale/lib/python3.10/site-packages/fedscale/cloud/execution/torch_client.py:235\u001b[0m, in \u001b[0;36mTorchClient.train_step\u001b[0;34m(self, client_data, conf, model, optimizer, criterion)\u001b[0m\n\u001b[1;32m    233\u001b[0m \u001b[38;5;66;03m# ========= Define the backward loss ==============\u001b[39;00m\n\u001b[1;32m    234\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[0;32m--> 235\u001b[0m \u001b[43mloss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    236\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mstep()\n\u001b[1;32m    238\u001b[0m \u001b[38;5;66;03m# ========= Weight handler ========================\u001b[39;00m\n",
+      "File \u001b[0;32m~/anaconda3/envs/fedscale/lib/python3.10/site-packages/torch/_tensor.py:488\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m    478\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m    479\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m    480\u001b[0m         Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[1;32m    481\u001b[0m         (\u001b[38;5;28mself\u001b[39m,),\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    486\u001b[0m         inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m    487\u001b[0m     )\n\u001b[0;32m--> 488\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    489\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[1;32m    490\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/anaconda3/envs/fedscale/lib/python3.10/site-packages/torch/autograd/__init__.py:197\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m    192\u001b[0m     retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[1;32m    194\u001b[0m \u001b[38;5;66;03m# The reason we repeat same the comment below is that\u001b[39;00m\n\u001b[1;32m    195\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m    196\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 197\u001b[0m \u001b[43mVariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execution_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_backward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m    198\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    199\u001b[0m \u001b[43m    \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
      ]
     }
    ],
@@ -222,18 +105,26 @@
     "import sys, os\n",
     "\n",
     "import fedscale.cloud.config_parser as parser\n",
-    "from fedscale.cloud.execution.client import Client\n",
+    "from fedscale.cloud.execution.torch_client import TorchClient\n",
     "from fedscale.cloud.execution.executor import Executor\n",
     "### On CPU\n",
     "parser.args.use_cuda = \"False\"\n",
     "Demo_Executor = Executor(parser.args)\n",
     "Demo_Executor.run()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "227bece8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.7.13 ('fedscale': conda)",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -247,7 +138,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.13"
+   "version": "3.10.9"
   },
   "vscode": {
    "interpreter": {
diff --git a/examples/notebook/fedscale_demo_server.ipynb b/examples/notebook/fedscale_demo_server.ipynb
index bc5ff058..e0efac07 100644
--- a/examples/notebook/fedscale_demo_server.ipynb
+++ b/examples/notebook/fedscale_demo_server.ipynb
@@ -18,7 +18,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 6,
    "id": "a1e48395",
    "metadata": {},
    "outputs": [
@@ -26,287 +26,37 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "(10-05) 10:56:16 INFO     [aggregator.py:33] Job args Namespace(adam_epsilon=1e-08, arrival_interval=3, async_buffer=10, async_mode=False, backbone='./resnet50.pth', backend='gloo', batch_size=30, bidirectional=True, blacklist_max_len=0.3, blacklist_rounds=-1, block_size=64, cfg_file='./utils/rcnn/cfgs/res101.yml', checkin_period=50, clf_block_size=32, clip_bound=0.9, clip_threshold=3.0, clock_factor=1.1624548736462095, conf_path='~/dataset/', connection_timeout=60, cuda_device=None, cut_off_util=0.05, data_cache='', data_dir='~/cifar10/', data_map_file=None, data_set='cifar10', decay_factor=0.98, decay_round=10, device_avail_file=None, device_conf_file='/tmp/client.cfg', dump_epoch=10000000000.0, embedding_file='glove.840B.300d.txt', engine='pytorch', epsilon=0.9, eval_interval=5, executor_configs='127.0.0.1:[1]', experiment_mode='simulation', exploration_alpha=0.3, exploration_decay=0.98, exploration_factor=0.9, exploration_min=0.3, filter_less=32, filter_more=1000000000000000.0, finetune=False, gamma=0.9, gradient_policy=None, hidden_layers=7, hidden_size=256, input_dim=0, job_name='demo_job', labels_path='labels.json', learning_rate=0.05, line_by_line=False, local_steps=20, log_path='./', loss_decay=0.2, malicious_factor=1000000000000000.0, max_concurrency=100, max_staleness=5, memory_capacity=2000, min_learning_rate=5e-05, mlm=False, mlm_probability=0.15, model='shufflenet_v2_x2_0', model_size=65536, model_zoo='torchcv', n_actions=2, n_states=4, noise_dir=None, noise_factor=0.1, noise_max=0.5, noise_min=0.0, noise_prob=0.4, num_class=10, num_classes=35, num_executors=1, num_loaders=2, num_participants=4, output_dim=0, overcommitment=1.3, overwrite_cache=False, pacer_delta=5, pacer_step=20, proxy_mu=0.1, ps_ip='127.0.0.1', ps_port='29501', rnn_type='lstm', round_penalty=2.0, round_threshold=30, rounds=20, sample_mode='random', sample_rate=16000, sample_seed=233, sample_window=5.0, spec_augment=False, speed_volume_perturb=False, target_delta=0.0001, target_replace_iter=15, task='cv', test_bsz=128, test_manifest='data/test_manifest.csv', test_output_dir='./logs/server', test_ratio=1.0, test_size_file='', this_rank=1, time_stamp='logs', train_manifest='data/train_manifest.csv', train_size_file='', train_uniform=False, upload_step=20, use_cuda=True, vocab_tag_size=500, vocab_token_size=10000, weight_decay=0, window='hamming', window_size=0.02, window_stride=0.01, yogi_beta=0.9, yogi_beta2=0.99, yogi_eta=0.003, yogi_tau=1e-08)\n",
-      "2022-10-05 10:56:16.673184: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n",
-      "2022-10-05 10:56:16.673223: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n",
-      "2022-10-05 10:56:16.732819: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
-      "2022-10-05 10:56:17.784849: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n",
-      "2022-10-05 10:56:17.784957: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n",
-      "2022-10-05 10:56:17.784973: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
-      "(10-05) 10:56:19 INFO     [aggregator.py:129] Initiating control plane communication ...\n",
-      "(10-05) 10:56:19 INFO     [aggregator.py:153] %%%%%%%%%% Opening aggregator sever using port [::]:29501 %%%%%%%%%%\n",
-      "(10-05) 10:56:19 INFO     [fllibs.py:120] Initializing the model ...\n",
-      "(10-05) 10:56:19 INFO     [aggregator.py:871] Start monitoring events ...\n",
-      "(10-05) 10:56:29 INFO     [aggregator.py:263] Received executor 1 information, 1/1\n",
-      "(10-05) 10:56:29 INFO     [aggregator.py:234] Loading 4 client traces ...\n",
-      "(10-05) 10:56:29 INFO     [aggregator.py:251] Info of all feasible clients {'total_feasible_clients': 4, 'total_num_samples': 50000}\n",
-      "(10-05) 10:56:29 INFO     [aggregator.py:529] Wall clock: 0 s, round: 1, Planned participants: 0, Succeed participants: 0, Training loss: 0.0\n",
-      "(10-05) 10:56:29 INFO     [client_manager.py:202] Wall clock time: 0, 4 clients online, 0 clients offline\n",
-      "(10-05) 10:56:29 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 10:56:30 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 10:56:30 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 10:56:31 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:56:31 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:56:38 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:56:38 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:56:46 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:56:46 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:56:53 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:56:53 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:57:00 INFO     [aggregator.py:529] Wall clock: 339847 s, round: 2, Planned participants: 4, Succeed participants: 4, Training loss: 5.602389772279155\n",
-      "(10-05) 10:57:00 INFO     [client_manager.py:202] Wall clock time: 339847, 4 clients online, 0 clients offline\n",
-      "(10-05) 10:57:00 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 10:57:01 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 10:57:01 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 10:57:02 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:57:02 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:57:09 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:57:09 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:57:16 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:57:16 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:57:23 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:57:23 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:57:31 INFO     [aggregator.py:529] Wall clock: 679693 s, round: 3, Planned participants: 4, Succeed participants: 4, Training loss: 5.302637194544962\n",
-      "(10-05) 10:57:31 INFO     [client_manager.py:202] Wall clock time: 679693, 4 clients online, 0 clients offline\n",
-      "(10-05) 10:57:31 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 10:57:32 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 10:57:32 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 10:57:33 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:57:33 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:57:40 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:57:40 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:57:47 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:57:47 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:57:55 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:57:55 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:58:02 INFO     [aggregator.py:529] Wall clock: 1019540 s, round: 4, Planned participants: 4, Succeed participants: 4, Training loss: 4.599358620310677\n",
-      "(10-05) 10:58:02 INFO     [client_manager.py:202] Wall clock time: 1019540, 4 clients online, 0 clients offline\n",
-      "(10-05) 10:58:02 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 10:58:03 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 10:58:03 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 10:58:04 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:58:04 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:58:11 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:58:11 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:58:18 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:58:18 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:58:25 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:58:25 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:58:32 INFO     [aggregator.py:529] Wall clock: 1359387 s, round: 5, Planned participants: 4, Succeed participants: 4, Training loss: 4.436900660524809\n",
-      "(10-05) 10:58:32 INFO     [client_manager.py:202] Wall clock time: 1359387, 4 clients online, 0 clients offline\n",
-      "(10-05) 10:58:32 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 10:58:33 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'model_test'])\n",
-      "(10-05) 10:58:33 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 10:58:34 INFO     [aggregator.py:804] ====event queue 1, deque(['model_test'])\n",
-      "(10-05) 10:58:34 INFO     [aggregator.py:827] Issue EVENT (model_test) to EXECUTOR (1)\n",
-      "(10-05) 10:58:53 INFO     [aggragation.py:64] FL Testing in round: 5, virtual_clock: 1359386.95, top_1: 10.0 %, top_5: 51.42 %, test loss: 2.4887, test len: 10000\n",
-      "(10-05) 10:58:54 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:58:54 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:59:01 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:59:01 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:59:08 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:59:08 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:59:15 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:59:15 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:59:23 INFO     [aggregator.py:529] Wall clock: 1699234 s, round: 6, Planned participants: 4, Succeed participants: 4, Training loss: 3.8854766698601377\n",
-      "(10-05) 10:59:23 INFO     [client_manager.py:202] Wall clock time: 1699234, 4 clients online, 0 clients offline\n",
-      "(10-05) 10:59:23 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 10:59:23 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 10:59:23 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 10:59:25 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:59:25 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:59:32 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:59:32 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:59:39 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:59:39 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:59:46 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:59:46 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 10:59:53 INFO     [aggregator.py:529] Wall clock: 2039080 s, round: 7, Planned participants: 4, Succeed participants: 4, Training loss: 3.4019230657444686\n",
-      "(10-05) 10:59:53 INFO     [client_manager.py:202] Wall clock time: 2039080, 4 clients online, 0 clients offline\n",
-      "(10-05) 10:59:53 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 10:59:54 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 10:59:54 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 10:59:56 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 10:59:56 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:00:03 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:00:03 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:00:10 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:00:10 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:00:17 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:00:17 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:00:24 INFO     [aggregator.py:529] Wall clock: 2378927 s, round: 8, Planned participants: 4, Succeed participants: 4, Training loss: 3.0577747210462976\n",
-      "(10-05) 11:00:24 INFO     [client_manager.py:202] Wall clock time: 2378927, 4 clients online, 0 clients offline\n",
-      "(10-05) 11:00:24 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 11:00:25 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 11:00:25 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 11:00:26 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:00:26 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:00:33 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:00:33 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:00:40 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:00:40 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:00:47 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:00:47 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:00:54 INFO     [aggregator.py:529] Wall clock: 2718774 s, round: 9, Planned participants: 4, Succeed participants: 4, Training loss: 2.8637842447127393\n",
-      "(10-05) 11:00:54 INFO     [client_manager.py:202] Wall clock time: 2718774, 4 clients online, 0 clients offline\n",
-      "(10-05) 11:00:54 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 11:00:55 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 11:00:55 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 11:00:56 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:00:56 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:01:04 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:01:04 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:01:11 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:01:11 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:01:17 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:01:17 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:01:25 INFO     [aggregator.py:529] Wall clock: 3058621 s, round: 10, Planned participants: 4, Succeed participants: 4, Training loss: 2.832259889787557\n",
-      "(10-05) 11:01:25 INFO     [client_manager.py:202] Wall clock time: 3058621, 4 clients online, 0 clients offline\n",
-      "(10-05) 11:01:25 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 11:01:25 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'model_test'])\n",
-      "(10-05) 11:01:26 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 11:01:27 INFO     [aggregator.py:804] ====event queue 1, deque(['model_test'])\n",
-      "(10-05) 11:01:27 INFO     [aggregator.py:827] Issue EVENT (model_test) to EXECUTOR (1)\n",
-      "(10-05) 11:01:45 INFO     [aggragation.py:64] FL Testing in round: 10, virtual_clock: 3058620.6374999997, top_1: 17.66 %, top_5: 67.99 %, test loss: 2.3363, test len: 10000\n",
-      "(10-05) 11:01:46 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:01:46 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:01:53 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:01:53 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:02:00 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:02:00 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:02:07 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:02:07 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:02:15 INFO     [aggregator.py:529] Wall clock: 3398467 s, round: 11, Planned participants: 4, Succeed participants: 4, Training loss: 2.4575965823961834\n",
-      "(10-05) 11:02:15 INFO     [client_manager.py:202] Wall clock time: 3398467, 4 clients online, 0 clients offline\n",
-      "(10-05) 11:02:15 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 11:02:16 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 11:02:16 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 11:02:17 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:02:17 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:02:24 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:02:24 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:02:31 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:02:31 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:02:38 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:02:38 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:02:45 INFO     [aggregator.py:529] Wall clock: 3738314 s, round: 12, Planned participants: 4, Succeed participants: 4, Training loss: 2.3794690852301428\n",
-      "(10-05) 11:02:45 INFO     [client_manager.py:202] Wall clock time: 3738314, 4 clients online, 0 clients offline\n",
-      "(10-05) 11:02:45 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 11:02:46 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 11:02:46 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 11:02:47 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:02:47 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:02:54 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:02:54 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:03:01 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:03:01 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:03:09 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:03:09 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:03:16 INFO     [aggregator.py:529] Wall clock: 4078161 s, round: 13, Planned participants: 4, Succeed participants: 4, Training loss: 2.376686896891561\n",
-      "(10-05) 11:03:16 INFO     [client_manager.py:202] Wall clock time: 4078161, 4 clients online, 0 clients offline\n",
-      "(10-05) 11:03:16 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 11:03:17 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 11:03:17 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 11:03:18 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:03:18 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:03:25 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:03:25 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:03:32 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:03:32 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:03:39 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:03:39 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:03:46 INFO     [aggregator.py:529] Wall clock: 4418008 s, round: 14, Planned participants: 4, Succeed participants: 4, Training loss: 2.4138368855547325\n",
-      "(10-05) 11:03:46 INFO     [client_manager.py:202] Wall clock time: 4418008, 4 clients online, 0 clients offline\n",
-      "(10-05) 11:03:46 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 11:03:47 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 11:03:47 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 11:03:49 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:03:49 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:03:56 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:03:56 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:04:02 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:04:02 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:04:10 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:04:10 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:04:17 INFO     [aggregator.py:529] Wall clock: 4757854 s, round: 15, Planned participants: 4, Succeed participants: 4, Training loss: 2.3423781394464704\n",
-      "(10-05) 11:04:17 INFO     [client_manager.py:202] Wall clock time: 4757854, 4 clients online, 0 clients offline\n",
-      "(10-05) 11:04:17 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 11:04:18 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'model_test'])\n",
-      "(10-05) 11:04:18 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 11:04:19 INFO     [aggregator.py:804] ====event queue 1, deque(['model_test'])\n",
-      "(10-05) 11:04:19 INFO     [aggregator.py:827] Issue EVENT (model_test) to EXECUTOR (1)\n",
-      "(10-05) 11:04:38 INFO     [aggragation.py:64] FL Testing in round: 15, virtual_clock: 4757854.324999999, top_1: 24.31 %, top_5: 76.41 %, test loss: 2.2923, test len: 10000\n",
-      "(10-05) 11:04:39 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:04:39 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:04:46 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:04:46 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:04:53 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:04:53 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:05:00 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:05:00 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:05:07 INFO     [aggregator.py:529] Wall clock: 5097701 s, round: 16, Planned participants: 4, Succeed participants: 4, Training loss: 2.2786644983594107\n",
-      "(10-05) 11:05:08 INFO     [client_manager.py:202] Wall clock time: 5097701, 4 clients online, 0 clients offline\n",
-      "(10-05) 11:05:08 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 11:05:08 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 11:05:08 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 11:05:10 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:05:10 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:05:16 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:05:16 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:05:23 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:05:23 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:05:30 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:05:30 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:05:37 INFO     [aggregator.py:529] Wall clock: 5437548 s, round: 17, Planned participants: 4, Succeed participants: 4, Training loss: 2.2587813556717866\n",
-      "(10-05) 11:05:37 INFO     [client_manager.py:202] Wall clock time: 5437548, 4 clients online, 0 clients offline\n",
-      "(10-05) 11:05:37 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 11:05:38 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 11:05:38 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 11:05:40 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:05:40 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:05:46 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:05:46 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:05:54 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:05:54 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:06:01 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:06:01 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:06:08 INFO     [aggregator.py:529] Wall clock: 5777395 s, round: 18, Planned participants: 4, Succeed participants: 4, Training loss: 2.1881389614638316\n",
-      "(10-05) 11:06:08 INFO     [client_manager.py:202] Wall clock time: 5777395, 4 clients online, 0 clients offline\n",
-      "(10-05) 11:06:08 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 11:06:09 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 11:06:09 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 11:06:10 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:06:10 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:06:17 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:06:17 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:06:25 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:06:25 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:06:31 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:06:31 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:06:39 INFO     [aggregator.py:529] Wall clock: 6117241 s, round: 19, Planned participants: 4, Succeed participants: 4, Training loss: 2.093780582858183\n",
-      "(10-05) 11:06:39 INFO     [client_manager.py:202] Wall clock time: 6117241, 4 clients online, 0 clients offline\n",
-      "(10-05) 11:06:39 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n",
-      "(10-05) 11:06:39 INFO     [aggregator.py:804] ====event queue 1, deque(['update_model', 'client_train'])\n",
-      "(10-05) 11:06:40 INFO     [aggregator.py:827] Issue EVENT (update_model) to EXECUTOR (1)\n",
-      "(10-05) 11:06:41 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:06:41 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:06:48 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:06:48 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:06:55 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:06:55 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:07:02 INFO     [aggregator.py:804] ====event queue 1, deque(['client_train'])\n",
-      "(10-05) 11:07:02 INFO     [aggregator.py:827] Issue EVENT (client_train) to EXECUTOR (1)\n",
-      "(10-05) 11:07:09 INFO     [aggregator.py:529] Wall clock: 6457088 s, round: 20, Planned participants: 4, Succeed participants: 4, Training loss: 2.1000436564237734\n",
-      "(10-05) 11:07:09 INFO     [client_manager.py:202] Wall clock time: 6457088, 4 clients online, 0 clients offline\n",
-      "(10-05) 11:07:09 INFO     [aggregator.py:541] Selected participants to run: [1, 2, 3, 4]\n"
+      "(02-27) 12:24:08 INFO     [aggregator.py:44] Job args Namespace(job_name='demo_job', log_path='./', wandb_token='', ps_ip='127.0.0.1', ps_port='29500', this_rank=1, connection_timeout=60, experiment_mode='simulation', engine='pytorch', num_executors=1, executor_configs='127.0.0.1:[1]', num_participants=4, data_map_file=None, use_cuda=True, cuda_device=None, time_stamp='logs', task='cv', device_avail_file=None, clock_factor=1.1624548736462095, model_zoo='torchcv', data_dir='~/cifar10/', device_conf_file='/tmp/client.cfg', model='shufflenet_v2_x2_0', data_set='cifar10', sample_mode='random', filter_less=32, filter_more=1000000000000000.0, train_uniform=False, conf_path='~/dataset/', overcommitment=1.3, model_size=65536, round_threshold=30, round_penalty=2.0, clip_bound=0.9, blacklist_rounds=-1, blacklist_max_len=0.3, embedding_file='glove.840B.300d.txt', input_shape=[1, 3, 28, 28], save_checkpoint=False, rounds=50, local_steps=20, batch_size=30, test_bsz=128, backend='gloo', learning_rate=0.05, min_learning_rate=5e-05, input_dim=0, output_dim=0, dump_epoch=10000000000.0, decay_factor=0.98, decay_round=10, num_loaders=2, eval_interval=5, sample_seed=233, test_ratio=1.0, loss_decay=0.2, exploration_min=0.3, cut_off_util=0.05, gradient_policy=None, yogi_eta=0.003, yogi_tau=1e-08, yogi_beta=0.9, yogi_beta2=0.99, proxy_mu=0.1, cfg_file='./utils/rcnn/cfgs/res101.yml', test_output_dir='./logs/server', train_size_file='', test_size_file='', data_cache='', backbone='./resnet50.pth', malicious_factor=1000000000000000.0, max_concurrency=10, max_staleness=5, noise_factor=0.1, clip_threshold=3.0, target_delta=0.0001, pacer_delta=5, pacer_step=20, exploration_alpha=0.3, exploration_factor=0.9, exploration_decay=0.98, sample_window=5.0, line_by_line=False, clf_block_size=32, mlm=False, mlm_probability=0.15, overwrite_cache=False, block_size=64, weight_decay=0, adam_epsilon=1e-08, vocab_token_size=10000, vocab_tag_size=500, epsilon=0.9, gamma=0.9, memory_capacity=2000, target_replace_iter=15, n_actions=2, n_states=4, num_classes=35, train_manifest='data/train_manifest.csv', test_manifest='data/test_manifest.csv', sample_rate=16000, labels_path='labels.json', window_size=0.02, window_stride=0.01, window='hamming', hidden_size=256, hidden_layers=7, rnn_type='lstm', finetune=False, speed_volume_perturb=False, spec_augment=False, noise_dir=None, noise_prob=0.4, noise_min=0.0, noise_max=0.5, bidirectional=True, num_class=10)\n",
+      "(02-27) 12:24:08 INFO     [aggregator.py:156] Initiating control plane communication ...\n",
+      "(02-27) 12:24:08 INFO     [aggregator.py:180] %%%%%%%%%% Opening aggregator sever using port [::]:29500 %%%%%%%%%%\n",
+      "(02-27) 12:24:08 INFO     [fllibs.py:97] Initializing the model ...\n",
+      "(02-27) 12:24:08 INFO     [aggregator.py:869] Start monitoring events ...\n",
+      "(02-27) 12:24:53 INFO     [aggregator.py:298] Received executor 1 information, 1/1\n",
+      "(02-27) 12:24:53 INFO     [aggregator.py:264] Loading 4 client traces ...\n",
+      "(02-27) 12:24:53 INFO     [aggregator.py:285] Info of all feasible clients {'total_feasible_clients': 4, 'total_num_samples': 50000}\n",
+      "(02-27) 12:24:53 INFO     [aggregator.py:517] Wall clock: 0 s, round: 1, Planned participants: 0, Succeed participants: 0, Training loss: 0.0\n",
+      "(02-27) 12:24:53 INFO     [client_manager.py:194] Wall clock time: 0, 4 clients online, 0 clients offline\n",
+      "(02-27) 12:24:53 INFO     [aggregator.py:531] Selected participants to run: [1, 2, 3, 4]\n",
+      "(02-27) 12:24:54 INFO     [aggregator.py:825] Issue EVENT (update_model) to EXECUTOR (1)\n",
+      "(02-27) 12:24:55 INFO     [aggregator.py:825] Issue EVENT (model_test) to EXECUTOR (1)\n",
+      "(02-27) 12:26:30 INFO     [aggregator.py:490] FL Testing in round: 1, virtual_clock: 0.0, results: {'round': 1, 'clock': 0.0, 'top_1': 0.1, 'top_5': 0.5, 'loss': 2.3026571062546743}\n",
+      "(02-27) 12:26:31 INFO     [aggregator.py:825] Issue EVENT (client_train) to EXECUTOR (1)\n",
+      "(02-27) 12:27:33 INFO     [aggregator.py:825] Issue EVENT (client_train) to EXECUTOR (1)\n",
+      "(02-27) 12:28:36 INFO     [aggregator.py:825] Issue EVENT (client_train) to EXECUTOR (1)\n",
+      "(02-27) 12:29:38 INFO     [aggregator.py:825] Issue EVENT (client_train) to EXECUTOR (1)\n"
      ]
     },
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "(10-05) 11:07:10 INFO     [aggregator.py:804] ====event queue 1, deque(['terminate_executor'])\n",
-      "(10-05) 11:07:10 INFO     [aggregator.py:827] Issue EVENT (terminate_executor) to EXECUTOR (1)\n"
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[6], line 9\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[38;5;66;03m### On CPU\u001b[39;00m\n\u001b[1;32m      8\u001b[0m parser\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39muse_cuda \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFalse\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 9\u001b[0m \u001b[43mDemo_Aggregator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Documents/Research/code/FedScale/fedscale/cloud/aggregation/aggregator.py:390\u001b[0m, in \u001b[0;36mAggregator.run\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    386\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minit_model()\n\u001b[1;32m    387\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_update_size \u001b[38;5;241m=\u001b[39m sys\u001b[38;5;241m.\u001b[39mgetsizeof(\n\u001b[1;32m    388\u001b[0m     pickle\u001b[38;5;241m.\u001b[39mdumps(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapper)) \u001b[38;5;241m/\u001b[39m \u001b[38;5;241m1024.0\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m8.\u001b[39m  \u001b[38;5;66;03m# kbits\u001b[39;00m\n\u001b[0;32m--> 390\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevent_monitor\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    391\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstop()\n",
+      "File \u001b[0;32m~/Documents/Research/code/FedScale/fedscale/cloud/aggregation/aggregator.py:906\u001b[0m, in \u001b[0;36mAggregator.event_monitor\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    902\u001b[0m         logging\u001b[38;5;241m.\u001b[39merror(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEvent \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcurrent_event\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not defined\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    904\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    905\u001b[0m     \u001b[38;5;66;03m# execute every 100 ms\u001b[39;00m\n\u001b[0;32m--> 906\u001b[0m     \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0.1\u001b[39;49m\u001b[43m)\u001b[49m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
      ]
     }
    ],
@@ -314,7 +64,7 @@
     "import sys, os\n",
     "\n",
     "import fedscale.cloud.config_parser as parser\n",
-    "from fedscale.cloud.execution.client import Client\n",
+    "from fedscale.cloud.execution.torch_client import TorchClient\n",
     "from fedscale.cloud.aggregation.aggregator import Aggregator\n",
     "Demo_Aggregator = Aggregator(parser.args)\n",
     "### On CPU\n",
@@ -332,24 +82,31 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "TensorFlow installation not found - running with reduced feature set.\n",
       "\n",
       "NOTE: Using experimental fast data loading logic. To disable, pass\n",
       "    \"--load_fast=false\" and report issues on GitHub. More details:\n",
       "    https://github.com/tensorflow/tensorboard/issues/4784\n",
       "\n",
-      "TensorBoard 2.8.0 at http://clnode219.clemson.cloudlab.us:6007/ (Press CTRL+C to quit)\n"
+      "TensorBoard 2.11.2 at http://localhost:6007/ (Press CTRL+C to quit)\n"
      ]
     }
    ],
    "source": [
     "!tensorboard --logdir=./logs/demo_job --port=6007 --bind_all"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f2a40035",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.7.13 ('fedscale': conda)",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -363,7 +120,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.13"
+   "version": "3.10.9"
   },
   "vscode": {
    "interpreter": {

From 93c7548d8efd0ce58801948858409e4a9fc92dca Mon Sep 17 00:00:00 2001
From: fanlai0990 <fanlai0@outlook.com>
Date: Sat, 11 Mar 2023 11:48:36 -0500
Subject: [PATCH 2/2] [Doc] Deployment Readme

---
 fedscale/edge/android/README-App.md |  30 +++++++++
 fedscale/edge/android/README.md     | 100 +++++++++++++++-------------
 2 files changed, 85 insertions(+), 45 deletions(-)
 create mode 100644 fedscale/edge/android/README-App.md

diff --git a/fedscale/edge/android/README-App.md b/fedscale/edge/android/README-App.md
new file mode 100644
index 00000000..51f39469
--- /dev/null
+++ b/fedscale/edge/android/README-App.md
@@ -0,0 +1,30 @@
+## FedScale Example Mobile App
+
+We provide a sample app which you can choose to 
+- Train/test models with TFLite or Alibaba MNN.
+- Fine-tune models locally **after** receiving model from the cloud.
+
+Please follow these steps to download and build the sample android app.
+
+1. Download and unzip [sample dataset (TrainTest.zip)](https://drive.google.com/file/d/1nfi3SVzjaE0LPxwj_5DNdqi6rK7BU8kb/view?usp=sharing) to `assets/` directory. Remove `TrainTest.zip` after unzip to save space on your mobile device. After unzip, you should see 3 files and 2 directories under `assets/`:
+   1. `TrainSet`: Training set directory, contains 320 images.
+   2. `TestSet`: Testing set directory, contains 32 images.
+   3. `conf.json`: Configuration file for mobile app.
+   4. `train_labels.txt`: Training label file with format `<filename> <label>`, where `<filename>` is the path after `TrainSet/`.
+   5. `test_labels.txt`: Testing label file with the same format as `train_labels.txt`.
+2. Install [Android Studio](https://developer.android.com/studio) and open project `fedscale/edge/tflite`. Download necessary SDKs, NDKs and CMake when prompted. My version:
+    - SDK: API 32
+    - Android Gradle Plugin Version: 3.5.3
+    - Gradle Version: 5.4.1
+    - Source Compatibility: Java 8
+    - Target Compatibility: Java 8
+3. Modify `conf.json` and dataset.
+   - You must modify `aggregator.ip` & `aggregator.port` to your own server.
+   - You can choose your training framework by modifying `model_conf.backend` to `tflite` or `mnn`.
+   - You may config your dataset information at `training_conf` and `testing_conf`.
+   - You may put your own **image classification** dataset under `/TrainSet` and `/TestSet` directories and modify `train_labels.txt` and `test_labels.txt` accordingly. The format of labels must be \<filename\> \<label\>.
+   - If you want to perform tasks other than image classification, you should modify framework-specific code [MNN](https://github.com/SymbioticLab/FedScale/fedscale/edge/android/app/src/main/java/com/fedscale/android/mnn) [TFLite](https://github.com/SymbioticLab/FedScale/fedscale/edge/android/app/src/main/java/com/fedscale/android/tflite). If you are using TFLite, you should also write your own signatures similar to [our TFLite model provider](https://github/com/SymbioticLab/FedScale/fedscale/cloud/internal/tflite_model_adapter.py)
+4. Make Project. Android Studio will compile and build the app for you. Click Run if you want to run the app on a real android device.
+
+----
+If you need any further help, feel free to contact FedScale team or the developer [website](https://continue-revolution.github.io) [email](mailto:continuerevolution@gmail.com) of this app.
diff --git a/fedscale/edge/android/README.md b/fedscale/edge/android/README.md
index b1918a00..b00a5c45 100644
--- a/fedscale/edge/android/README.md
+++ b/fedscale/edge/android/README.md
@@ -1,69 +1,79 @@
 # FedScale Deployment
 
-FedScale provides a cloud-based [aggregation service](https://github.com/SymbioticLab/FedScale/blob/master/fedscale/cloud/aggregation/README.md) and an SDK for smartphones on the edge that currently supports TensorflowLite and Alibaba MNN on Android (iOS support coming soon!). In this tutorial, we introduce how to:
+FedScale provides a cloud-based [aggregation service](https://github.com/SymbioticLab/FedScale/blob/master/fedscale/cloud/aggregation/README.md) and an [SDK](#fedscale-mobile-runtime) for smartphones on the edge that currently supports TensorflowLite and Alibaba MNN on Android (iOS support coming soon!). In this tutorial, we introduce how to:
 
-- Initiate FedScale Cloud Service
-- Import FedScale SDK to locally fine tune models
-- Connect to FedScale cloud for federated training
+- [Initiate FedScale Cloud Service](#fedscale-cloud-aggregation)
+- [Import FedScale SDK to locally fine tune models](#fedscale-mobile-runtime)
+- [Connect to FedScale cloud for federated training](#fedscale-mobile-runtime)
 
-![fedscale deployment](../../../docs/fedscale-deploy.png)
+<p align="center">
+<img src="../../../docs/fedscale-deploy.png" width="600" height="400"/>
+</p>
 
 
 
 ## FedScale Cloud Aggregation
+FedScale cloud aggregation orchestrates distributed mobile devices to collaboratively train ML models over the Internet. It manages client check-in, participant selection, and model aggregation for practical FL deployment. 
 
-You may follow these steps to deploy and run the cloud server specific to [Alibaba MNN](https://github.com/SymbioticLab/FedScale/fedscale/edge/mnn/) or [TFLite](https://github.com/SymbioticLab/FedScale/fedscale/edge/tflite/).
+FedScale deployment mode follows similar setup of the [simulation mode](https://github.com/SymbioticLab/FedScale/blob/master/docs/tutorial.md) to streamline cloud-based prototyping and real-world deployment with little migration overhead. 
 
-- Specify number of executors `num_participants: 1`. You may add more mobile participants to any number you want. We currently only support all participant in one single training backend.
+- **Configurate job**: Jobs are configured in the `yml` format. Here is an [example](../../../benchmark/configs/android/tflite.yml
+): 
 
-- Specify `model`. Currently we have only tested `linear` models for Alibaba MNN backend because Alibaba MNN does not support Dropout. However, you may choose one of `linear`|`mobilenetv3`|`resnet50`|`mobilenetv3_finetune`|`resnet50_finetune` models. `finetune` means that only the last 2 linear layers will be trained, but the backbone layers will be frozen.
+  ```
+  job_conf:
+      - job_name: android-tflite  
+      - experiment_mode: mobile
+      - log_path: $FEDSCALE_HOME/benchmark    # Path of log files
+      - num_participants: 100                 # Number of participants selected in each training round
+      - model: mobilenetv3                    # Model to be trained
+      - data_path: assets/dataset             # Path to local database
+      - input_shape: 32 32 3                  # Shape of training data stored in local database
+      - num_classes: 10                       # Number of categories 
+  ```
 
-- Set `use_cuda` flag to `True` if you want to use GPU for aggregation. However, as aggregation process is sequential addition of several small tensors, GPU acceleration is very little.
+- **Submit job:** After figuring out the configuration, we can submit the FL training job in the cloud, which then will automatically coordinate edge clients. 
 
-- Submit job
+  ```
+  cd $FEDSCALE_HOME/docker
+  # If you want to run MNN backend on mobile.
+  python3 driver.py submit $FEDSCALE_HOME/benchmark/configs/android/mnn.yml 
+  # If you want to run TFLite backend on mobile.
+  python3 driver.py submit $FEDSCALE_HOME/benchmark/configs/android/tflite.yml 
+  ```
 
-	```
-	cd $FEDSCALE_HOME/docker
-	python3 driver.py submit $FEDSCALE_HOME/benchmark/configs/android/mnn.yml # If you want to run MNN backend on mobile.
-	python3 driver.py submit $FEDSCALE_HOME/benchmark/configs/android/tflite.yml # If you want to run TFLite backend on mobile.
-	```
+- **Check logs:** FedScale will generate logs under `data_path` you provided by default. If you use k8s deployment for cloud aggregation, keep in mind that k8s may load balancing your job to any node on the cluster, so make sure you are checking the `data_path` on the correct node.
 
-- Check logs: FedScale will generate logs under `data_path` you provided by default. Keep in mind that k8s may load balancing your job to any node on the cluster, so make sure you are checking the `data_path` on the correct node.
+- **Stop job:** When FL training reaches the target accuracy, we can stop FL training with the following command of line on the cloud server node.
 
-- Stop job
-
-	```
-	cd $FEDSCALE_HOME/docker
-	python3 driver.py stop $YOUR_JOB
-	```
+  ```
+  cd $FEDSCALE_HOME/docker
+  python3 driver.py stop $YOUR_JOB
+  ```
 
 ## FedScale Mobile Runtime
 
-We provide a sample app which you can choose to 
+If you don't have an app, you may refer to [Sample App](README-App.md) to play with a sample Android app. Next, we introduce how to: 
 - Train/test models with TFLite or Alibaba MNN.
 - Fine-tune models locally **after** receiving model from the cloud.
 
-Please follow these steps to download and build the sample android app.
-
-1. Download and unzip [sample dataset (TrainTest.zip)](https://drive.google.com/file/d/1nfi3SVzjaE0LPxwj_5DNdqi6rK7BU8kb/view?usp=sharing) to `assets/` directory. Remove `TrainTest.zip` after unzip to save space on your mobile device. After unzip, you should see 3 files and 2 directories under `assets/`:
-   1. `TrainSet`: Training set directory, contains 320 images.
-   2. `TestSet`: Testing set directory, contains 32 images.
-   3. `conf.json`: Configuration file for mobile app.
-   4. `train_labels.txt`: Training label file with format `<filename> <label>`, where `<filename>` is the path after `TrainSet/`.
-   5. `test_labels.txt`: Testing label file with the same format as `train_labels.txt`.
-2. Install [Android Studio](https://developer.android.com/studio) and open project `fedscale/edge/tflite`. Download necessary SDKs, NDKs and CMake when prompted. My version:
-    - SDK: API 32
-    - Android Gradle Plugin Version: 3.5.3
-    - Gradle Version: 5.4.1
-    - Source Compatibility: Java 8
-    - Target Compatibility: Java 8
-3. Modify `conf.json` and dataset.
-   - You must modify `aggregator.ip` & `aggregator.port` to your own server.
-   - You can choose your training framework by modifying `model_conf.backend` to `tflite` or `mnn`.
-   - You may config your dataset information at `training_conf` and `testing_conf`.
-   - You may put your own **image classification** dataset under `/TrainSet` and `/TestSet` directories and modify `train_labels.txt` and `test_labels.txt` accordingly. The format of labels must be \<filename\> \<label\>.
-   - If you want to perform tasks other than image classification, you should modify framework-specific code [MNN](https://github.com/SymbioticLab/FedScale/fedscale/edge/android/app/src/main/java/com/fedscale/android/mnn) [TFLite](https://github.com/SymbioticLab/FedScale/fedscale/edge/android/app/src/main/java/com/fedscale/android/tflite). If you are using TFLite, you should also write your own signatures similar to [our TFLite model provider](https://github/com/SymbioticLab/FedScale/fedscale/cloud/internal/tflite_model_adapter.py)
-4. Make Project. Android Studio will compile and build the app for you. Click Run if you want to run the app on a real android device.
+To get started, you need to install the FedScale SDK and import it into your project.
+Once you have installed the SDK, you can add ``fedscale_client`` to your app with the following code to fine-tune your local model: 
+
+  ```
+  import com.fedscale.android.Client;
+  public class App {
+      …
+      private Client fedscale_client;
+      protected void onCreate() {
+          …
+          this.fedscale_client = new Client();
+          this.fedscale_client.run(); // run in background threads
+      }
+  }
+  ```
+
+For example, our [example app](README-App.md) uses an image classification model within the app. Our example app puts training data under ``assets/dataset``. When the user opens the app, ``fedscale_client`` carefully schedules the resource to decide whether to start fine-tuning. 
 
 ----
-If you need any further help, feel free to contact FedScale team or the developer [website](https://continue-revolution.github.io) [email](mailto:continuerevolution@gmail.com) of this app.
+If you need any further help, feel free to contact FedScale team or the developer [website](https://continue-revolution.github.io) [email](mailto:continuerevolution@gmail.com) of this app.
\ No newline at end of file