Skip to content

Commit 3d4006a

Browse files
authored
Merge pull request #294 from leondavi/nerlplanner_fix
[NerlnetApp] Critical bugs in distributed running
2 parents e83e7ed + f8c4989 commit 3d4006a

27 files changed

+475
-231
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"connectionsMap":
3+
{
4+
"r1":["mainServer", "r2"],
5+
"r2":["r3", "s1"],
6+
"r3":["r4", "c1","s2"],
7+
"r4":["r5", "c2","s3"],
8+
"r5":["r6", "c3"],
9+
"r6":["r3", "r4"]
10+
}
11+
}
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
{
2+
"nerlnetSettings": {
3+
"frequency": "50",
4+
"batchSize": "100"
5+
},
6+
"mainServer": {
7+
"port": "8844",
8+
"args": ""
9+
},
10+
"apiServer": {
11+
"port": "8845",
12+
"args": ""
13+
},
14+
"devices": [
15+
{
16+
"name": "mac",
17+
"ipv4": "192.168.0.248",
18+
"entities": "c3,r1,r4,r5,r6,s1,mainServer,apiServer"
19+
},
20+
{
21+
"name": "jet1",
22+
"ipv4": "192.168.0.226",
23+
"entities": "c1,r2,s2"
24+
},
25+
{
26+
"name": "jet2",
27+
"ipv4": "192.168.0.228",
28+
"entities": "c2,r3,s3"
29+
}
30+
],
31+
"routers": [
32+
{
33+
"name": "r1",
34+
"port": "8900",
35+
"policy": "0"
36+
},
37+
{
38+
"name": "r2",
39+
"port": "8902",
40+
"policy": "0"
41+
},
42+
{
43+
"name": "r3",
44+
"port": "8903",
45+
"policy": "0"
46+
},
47+
{
48+
"name": "r4",
49+
"port": "8904",
50+
"policy": "0"
51+
},
52+
{
53+
"name": "r5",
54+
"port": "8905",
55+
"policy": "0"
56+
},
57+
{
58+
"name": "r6",
59+
"port": "8906",
60+
"policy": "0"
61+
}
62+
],
63+
"sources": [
64+
{
65+
"name": "s1",
66+
"port": "8853",
67+
"frequency": "50",
68+
"policy": "0",
69+
"epochs": "1",
70+
"type": "0"
71+
},
72+
{
73+
"name": "s2",
74+
"port": "8854",
75+
"frequency": "50",
76+
"policy": "0",
77+
"epochs": "1",
78+
"type": "0"
79+
},
80+
{
81+
"name": "s3",
82+
"port": "8855",
83+
"frequency": "50",
84+
"policy": "0",
85+
"epochs": "1",
86+
"type": "0"
87+
}
88+
],
89+
"clients": [
90+
{
91+
"name": "c1",
92+
"port": "8846",
93+
"workers": "w1,w2"
94+
},
95+
{
96+
"name": "c2",
97+
"port": "8847",
98+
"workers": "w3,w4"
99+
},
100+
{
101+
"name": "c3",
102+
"port": "8851",
103+
"workers": "w5"
104+
}
105+
],
106+
"workers": [
107+
{
108+
"name": "w1",
109+
"model_sha": "1f3078160da415bda29a65e6c854f938c291d38166d0cb6e89cec2fd81678613"
110+
},
111+
{
112+
"name": "w2",
113+
"model_sha": "1f3078160da415bda29a65e6c854f938c291d38166d0cb6e89cec2fd81678613"
114+
},
115+
{
116+
"name": "w3",
117+
"model_sha": "1f3078160da415bda29a65e6c854f938c291d38166d0cb6e89cec2fd81678613"
118+
},
119+
{
120+
"name": "w4",
121+
"model_sha": "1f3078160da415bda29a65e6c854f938c291d38166d0cb6e89cec2fd81678613"
122+
},
123+
{
124+
"name": "w5",
125+
"model_sha": "1f3078160da415bda29a65e6c854f938c291d38166d0cb6e89cec2fd81678613"
126+
}
127+
],
128+
"model_sha": {
129+
"1f3078160da415bda29a65e6c854f938c291d38166d0cb6e89cec2fd81678613": {
130+
"modelType": "0",
131+
"_doc_modelType": " nn:0 | approximation:1 | classification:2 | forecasting:3 | image_classification:4 | text_classification:5 | text_generation:6 | auto_association:7 | autoencoder:8 | ae_classifier:9 |",
132+
"layersSizes": "5,16,8,3",
133+
"_doc_layersSizes": "List of postive integers [L0, L1, ..., LN]",
134+
"layerTypesList": "1,3,3,3",
135+
"_doc_LayerTypes": " Default:0 | Scaling:1 | CNN:2 | Perceptron:3 | Pooling:4 | Probabilistic:5 | LSTM:6 | Reccurrent:7 | Unscaling:8 | Bounding:9 |",
136+
"layers_functions": "1,7,7,11",
137+
"_doc_layers_functions_activation": " Threshold:1 | Sign:2 | Logistic:3 | Tanh:4 | Linear:5 | ReLU:6 | eLU:7 | SeLU:8 | Soft-plus:9 | Soft-sign:10 | Hard-sigmoid:11 |",
138+
"_doc_layer_functions_pooling": " none:1 | Max:2 | Avg:3 |",
139+
"_doc_layer_functions_probabilistic": " Binary:1 | Logistic:2 | Competitive:3 | Softmax:4 |",
140+
"_doc_layer_functions_scaler": " none:1 | MinMax:2 | MeanStd:3 | STD:4 | Log:5 |",
141+
"lossMethod": "2",
142+
"_doc_lossMethod": " SSE:1 | MSE:2 | NSE:3 | MinkowskiE:4 | WSE:5 | CEE:6 |",
143+
"lr": "0.01",
144+
"_doc_lr": "Positve float",
145+
"epochs": "1",
146+
"_doc_epochs": "Positve Integer",
147+
"optimizer": "5",
148+
"_doc_optimizer": " GD:0 | CGD:1 | SGD:2 | QuasiNeuton:3 | LVM:4 | ADAM:5 |",
149+
"optimizerArgs": "none",
150+
"_doc_optimizerArgs": "String",
151+
"infraType": "0",
152+
"_doc_infraType": " opennn:0 | wolfengine:1 |",
153+
"distributedSystemType": "0",
154+
"_doc_distributedSystemType": " none:0 | fedClientAvg:1 | fedServerAvg:2 |",
155+
"distributedSystemArgs": "none",
156+
"_doc_distributedSystemArgs": "String",
157+
"distributedSystemToken": "none",
158+
"_doc_distributedSystemToken": "Token that associates distributed group of workers and parameter-server"
159+
}
160+
}
161+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
{
2+
"experimentName": "synthetic_3_gausians",
3+
"batchSize": 100,
4+
"csvFilePath": "/tmp/nerlnet/data/NerlnetData-master/nerlnet/synthetic/synthetic_full.csv",
5+
"numOfFeatures": "5",
6+
"numOfLabels": "3",
7+
"headersNames": "Norm(0:1),Norm(4:1),Norm(10:3)",
8+
"Phases":
9+
[
10+
{
11+
"phaseName": "training_phase",
12+
"phaseType": "training",
13+
"sourcePieces":
14+
[
15+
{
16+
"sourceName": "s1",
17+
"startingSample": "0",
18+
"numOfBatches": "200",
19+
"workers": "w1,w2"
20+
},
21+
{
22+
"sourceName": "s2",
23+
"startingSample": "20000",
24+
"numOfBatches": "200",
25+
"workers": "w3,w4"
26+
},
27+
{
28+
"sourceName": "s3",
29+
"startingSample": "40000",
30+
"numOfBatches": "200",
31+
"workers": "w5,w1,w2,w3,w4"
32+
}
33+
]
34+
},
35+
{
36+
"phaseName": "prediction_phase",
37+
"phaseType": "prediction",
38+
"sourcePieces":
39+
[
40+
{
41+
"sourceName": "s1",
42+
"startingSample": "40000",
43+
"numOfBatches": "300",
44+
"workers": "w1,w2,w3,w4"
45+
},
46+
{
47+
"sourceName": "s2",
48+
"startingSample": "40000",
49+
"numOfBatches": "300",
50+
"workers": "w1,w2,w3,w4,w5"
51+
},
52+
{
53+
"sourceName": "s3",
54+
"startingSample": "40000",
55+
"numOfBatches": "300",
56+
"workers": "w5"
57+
}
58+
]
59+
}
60+
]
61+
}
62+

src_erl/NerlnetApp/src/Init/jsonHandler.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ init(Req0, [ApplicationPid]) ->
2929
ApplicationPid ! {jsonAddress,{lists:nth(1, Data),lists:nth(2, Data)}};
3030
_Other ->
3131
{ok,Body,_} = cowboy_req:read_body(Req0), %% shouldn't be here, files expected
32-
io:format("got Req: ~p~nData: ~p~n",[Req0, Body])
32+
io:format("Error - Got an unknown request: ~p~nData: ~p~n",[Req0, Body])
3333
end,
3434

3535
Reply = io_lib:format("nerlnet starting", []),

src_erl/NerlnetApp/src/MainServer/ackHandler.erl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ init(Req0, [Who,Main_genserver_Pid]) ->
2121
dataReady -> gen_server:cast(Main_genserver_Pid, {sourceAckDataReady,Body}); %% when source data is ready
2222
sourceDone -> gen_server:cast(Main_genserver_Pid, {sourceDone,Body}); %% when source finished casting
2323
clientAck -> gen_server:cast(Main_genserver_Pid, {clientAck,Body}); %% when client received message (new state)
24-
jsonReceived -> gen_server:cast(Main_genserver_Pid, {jsonReceived,Body}) %% when other devices got the json and ready to start
24+
jsonReceived -> gen_server:cast(Main_genserver_Pid, {jsonReceived,Body}); %% when other devices got the json and ready to start
25+
apiserver_ack_validation -> Main_genserver_Pid ! {apiserver_ack_validation, Body} % This ack validates transmission with flask
2526
end,
2627
Reply = io_lib:format("Body Received: ~p ~n ", [Body]),
2728
Req = cowboy_req:reply(200,

src_erl/NerlnetApp/src/MainServer/initHandler.erl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ init(Req0, [Main_genServer_Pid]) ->
2424
Decoded_body = binary_to_list(Body),
2525
%Decoded_body = read_all_data(Req0),
2626
% io:format("GOT DATA: ~p~n",[Decoded_body]),
27-
[SourceName, _WorkersStr, _Epochs, _Data] = string:split(Decoded_body, "#", all),
27+
[Index, TotalSources, SourceName, _WorkersStr, _Epochs, _Data] = string:split(Decoded_body, "#", all),
2828
%WorkersList = string:split(WorkersStr, ",", all),
29-
gen_server:cast(Main_genServer_Pid,{initCSV, SourceName, Body}),
29+
gen_server:cast(Main_genServer_Pid,{initCSV, Index, TotalSources, SourceName, Body}),
3030
%[Source|WorkersAndInput] = re:split(binary_to_list(Body), "#", [{return, list}]),
3131
%{Workers,SourceData} = getWorkerInput(WorkersAndInput,[]),
3232

0 commit comments

Comments
 (0)