Skip to content

Commit f20a604

Browse files
committed
[PERF_STATS] Add CPU util query
1 parent e41168e commit f20a604

File tree

3 files changed

+148
-12
lines changed

3 files changed

+148
-12
lines changed

src_erl/NerlnetApp/src/Client/clientStatem.erl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ init({MyName,NerlnetGraph, ClientWorkers , WorkerShaMap , WorkerToClientMap , Sh
7575
EtsRef = ets:new(client_data, [set, public]), %% client_data is responsible for functional attributes
7676
EtsStats = ets:new(ets_stats, [set]), %% ets_stats is responsible for holding all the ets stats (client + workers)
7777
ClientStatsEts = stats:generate_stats_ets(), %% client stats ets inside ets_stats
78+
% TODO add flag to control generate performance stats ets
7879
ClientPerformanceEts = stats:generate_performance_stats_ets(), %% client performance stats ets inside ets_stats
7980
ets:insert(EtsStats, {MyName, ClientStatsEts}),
8081
ets:insert(EtsStats, {performance_stats, ClientStatsEts}),

src_erl/NerlnetApp/src/Stats/stats.erl

Lines changed: 134 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,20 @@
1717
% performance stats
1818
-export([generate_performance_stats_ets/0]).
1919
-export([start_os_mon/0]).
20+
-export([performance_stats_reset/1]).
21+
% perofmance stats getters/setters
22+
-export([get_time_train_active/1, increment_time_train_active/2]).
23+
-export([get_time_train_total/1, increment_time_train_total/2]).
24+
-export([get_time_predict_active/1, increment_time_predict_active/2]).
25+
-export([get_time_predict_total/1, increment_time_predict_total/2]).
26+
-export([get_memory_peak_usage_train/1, update_memory_peak_usage_train/2]).
27+
-export([get_memory_peak_usage_predict/1, update_memory_peak_usage_predict/2]).
28+
-export([get_memory_train_ema_usage/1, update_memory_train_ema_usage/2]).
29+
-export([get_memory_predict_ema_usage/1, update_memory_predict_ema_usage/2]).
30+
-export([set_cpu_util_per_core/2]).
31+
32+
% performance stats queries
33+
-export([query_memory_usage/0, query_cpu_util_cores/0]).
2034

2135
get_numeric_type(Value) ->
2236
case Value of
@@ -106,25 +120,24 @@ generate_performance_stats_ets() -> %% clients
106120
ets:insert(PerformanceStatsEts, {time_predict_active , 0}), % Client Aggregate prediction times of workers
107121
ets:insert(PerformanceStatsEts, {time_predict_total , 0}), % Client counts the total time spent in prediction state
108122

123+
ets:insert(PerformanceStatsEts, {average_gpu_usage_train , 0}),
124+
ets:insert(PerformanceStatsEts, {average_gpu_memory_usage_predict , 0}),
109125

110-
ets:insert(PerformanceStatsEts, {average_gpu_usage , 0}),
111-
ets:insert(PerformanceStatsEts, {average_gpu_memory_usage , 0}),
112-
113-
ets:insert(PerformanceStatsEts, {memory_train_avg_usage , 0}),
114-
ets:insert(PerformanceStatsEts, {memory_predict_avg_usage , 0}),
126+
ets:insert(PerformanceStatsEts, {memory_train_ema_usage , 0}),
127+
ets:insert(PerformanceStatsEts, {memory_predict_ema_usage , 0}),
115128
ets:insert(PerformanceStatsEts, {memory_train_peak_usage , 0}),
116129
ets:insert(PerformanceStatsEts, {memory_predict_peak_usage , 0}),
117130

118131
% cores usage
119-
NumberOfCores = length(cpu_sup:util([per_cpu])),
132+
NumberOfCores = length(cpu_sup:util([per_cpu])), % Important! this call also resets util since last util's call
120133
ets:insert(PerformanceStatsEts, {num_of_cores , NumberOfCores}),
121134
lists:foreach(fun(CoreIndex) ->
122-
KeyAvgUtilTrainingPerCoreStr = lists:flatten(io_lib:format("cpu_train_avg_util_core_~p" , [CoreIndex])),
123-
KeyAvgUtilTrainingPerCoreAtom = list_to_atom(KeyAvgUtilTrainingPerCoreStr),
124-
ets:insert(PerformanceStatsEts, {KeyAvgUtilTrainingPerCoreAtom, 0}),
125-
KeyAvgUtilPredictPerCoreStr = lists:flatten(io_lib:format("cpu_predict_avg_util_core_~p" , [CoreIndex])),
126-
KeyAvgUtilPredictPerCoreAtom = list_to_atom(KeyAvgUtilPredictPerCoreStr),
127-
ets:insert(PerformanceStatsEts, {KeyAvgUtilPredictPerCoreAtom, 0})
135+
KeyUtilTrainingPerCoreStr = lists:flatten(io_lib:format("cpu_train_util_core_~p" , [CoreIndex])),
136+
KeyUtilTrainingPerCoreAtom = list_to_atom(KeyUtilTrainingPerCoreStr),
137+
ets:insert(PerformanceStatsEts, {KeyUtilTrainingPerCoreAtom, 0}),
138+
KeyUtilPredictPerCoreStr = lists:flatten(io_lib:format("cpu_predict_util_core_~p" , [CoreIndex])),
139+
KeyUtilPredictPerCoreAtom = list_to_atom(KeyUtilPredictPerCoreStr),
140+
ets:insert(PerformanceStatsEts, {KeyUtilPredictPerCoreAtom, 0})
128141
end,
129142
lists:seq(1, NumberOfCores)),
130143
PerformanceStatsEts.
@@ -216,3 +229,112 @@ get_bad_messages(StatsEts) ->
216229
increment_bad_messages(StatsEts) ->
217230
ets:update_counter(StatsEts, ?STATS_ATOM_BAD_MSG, 1).
218231

232+
233+
performance_stats_reset(OldEts) ->
234+
% delete the old ETS table
235+
ets:delete(OldEts),
236+
% create a new ETS table
237+
NewEts = generate_performance_stats_ets(),
238+
NewEts.
239+
240+
%% Performance Stats Methods
241+
ema_calc(OldValue, NewValue) ->
242+
%% Exponential Moving Average (EMA) calculation
243+
Coefficient = ?EMA_COEFFICIENT_HIST,
244+
NewValue * Coefficient + OldValue * (1 - Coefficient).
245+
246+
%% Perofrmance Stats Query Methods
247+
query_memory_usage() ->
248+
%% Get the memory usage of the Erlang VM
249+
[{system_total_memory,SystemTotalMemory},
250+
{free_memory, FreeMemory},
251+
{total_memory, TotalMemory},
252+
{buffered_memory, BufferedMemory},
253+
{cached_memory, CachedMemory},
254+
{total_swap, TotalSwap},
255+
{free_swap, FreeSwap},
256+
{available_memory, AvailableMemory}] = memsup:get_system_memory_data(),
257+
%% Calculate the used memory
258+
UsedMemory = SystemTotalMemory - FreeMemory.
259+
260+
query_cpu_util_cores() ->
261+
%% Get the CPU utilization of the Erlang VM
262+
CpuUtil = cpu_sup:util([per_cpu]),
263+
%% Convert the CPU utilization to a list of tuples
264+
lists:map(fun({CoreIndex, Busy, NonBusy, _ }) -> {CoreIndex, Busy} end, CpuUtil).
265+
266+
%% Performance Stats Getters/Setters
267+
get_time_train_active(StatsEts) ->
268+
ets:lookup_element(StatsEts, ?STATS_ATOM_TIME_TRAIN_ACTIVE , ?STATS_KEYVAL_VAL_IDX).
269+
270+
increment_time_train_active(StatsEts, Value) ->
271+
ets:update_counter(StatsEts, ?STATS_ATOM_TIME_TRAIN_ACTIVE, Value).
272+
273+
get_time_train_total(StatsEts) ->
274+
ets:lookup_element(StatsEts, ?STATS_ATOM_TIME_TRAIN_TOTAL , ?STATS_KEYVAL_VAL_IDX).
275+
276+
increment_time_train_total(StatsEts, Value) ->
277+
ets:update_counter(StatsEts, ?STATS_ATOM_TIME_TRAIN_TOTAL, Value).
278+
279+
get_time_predict_active(StatsEts) ->
280+
ets:lookup_element(StatsEts, ?STATS_ATOM_TIME_PREDICT_ACTIVE , ?STATS_KEYVAL_VAL_IDX).
281+
282+
increment_time_predict_active(StatsEts, Value) ->
283+
ets:update_counter(StatsEts, ?STATS_ATOM_TIME_PREDICT_ACTIVE, Value).
284+
285+
get_time_predict_total(StatsEts) ->
286+
ets:lookup_element(StatsEts, ?STATS_ATOM_TIME_PREDICT_TOTAL , ?STATS_KEYVAL_VAL_IDX).
287+
288+
increment_time_predict_total(StatsEts, Value) ->
289+
ets:update_counter(StatsEts, ?STATS_ATOM_TIME_PREDICT_TOTAL, Value).
290+
291+
get_memory_peak_usage_train(StatsEts) ->
292+
ets:lookup_element(StatsEts, ?STATS_MEMORY_TRAIN_PEAK_USAGE , ?STATS_KEYVAL_VAL_IDX).
293+
294+
update_memory_peak_usage_train(StatsEts, Value) ->
295+
% get the current peak usage
296+
CurrentPeak = get_memory_peak_usage_train(StatsEts),
297+
% update the peak usage if the new value is greater
298+
NewPeak = max(CurrentPeak, Value),
299+
ets:update_element(StatsEts, ?STATS_MEMORY_TRAIN_PEAK_USAGE, { ?STATS_KEYVAL_VAL_IDX, NewPeak }).
300+
301+
get_memory_peak_usage_predict(StatsEts) ->
302+
ets:lookup_element(StatsEts, ?STATS_MEMORY_PREDICT_PEAK_USAGE , ?STATS_KEYVAL_VAL_IDX).
303+
304+
update_memory_peak_usage_predict(StatsEts, Value) ->
305+
% get the current peak usage
306+
CurrentPeak = get_memory_peak_usage_predict(StatsEts),
307+
% update the peak usage if the new value is greater
308+
NewPeak = max(CurrentPeak, Value),
309+
ets:update_element(StatsEts, ?STATS_MEMORY_PREDICT_PEAK_USAGE, { ?STATS_KEYVAL_VAL_IDX, NewPeak }).
310+
311+
get_memory_train_ema_usage(StatsEts) ->
312+
ets:lookup_element(StatsEts, ?STATS_MEMORY_TRAIN_EMA_USAGE , ?STATS_KEYVAL_VAL_IDX).
313+
314+
update_memory_train_ema_usage(StatsEts, Value) ->
315+
% get the current EMA usage
316+
CurrentEma = get_memory_train_ema_usage(StatsEts),
317+
% calculate the new EMA usage
318+
NewEma = ema_calc(CurrentEma, Value),
319+
ets:update_element(StatsEts, ?STATS_MEMORY_TRAIN_EMA_USAGE, { ?STATS_KEYVAL_VAL_IDX, NewEma }).
320+
321+
get_memory_predict_ema_usage(StatsEts) ->
322+
ets:lookup_element(StatsEts, ?STATS_MEMORY_PREDICT_EMA_USAGE , ?STATS_KEYVAL_VAL_IDX).
323+
324+
update_memory_predict_ema_usage(StatsEts, Value) ->
325+
% get the current EMA usage
326+
CurrentEma = get_memory_predict_ema_usage(StatsEts),
327+
% calculate the new EMA usage
328+
NewEma = ema_calc(CurrentEma, Value),
329+
ets:update_element(StatsEts, ?STATS_MEMORY_PREDICT_EMA_USAGE, { ?STATS_KEYVAL_VAL_IDX, NewEma }).
330+
331+
set_cpu_util_per_core(StatsEts, UtilList) ->
332+
%% UtilList is a list of tuples {CoreIndex, Util}
333+
lists:foreach(fun({CoreIndex, Util}) ->
334+
KeyUtilTrainingPerCoreStr = lists:flatten(io_lib:format("cpu_train_util_core_~p" , [CoreIndex])),
335+
KeyUtilTrainingPerCoreAtom = list_to_atom(KeyUtilTrainingPerCoreStr),
336+
ets:update_element(StatsEts, KeyUtilTrainingPerCoreAtom, { ?STATS_KEYVAL_VAL_IDX, Util }),
337+
KeyUtilPredictPerCoreStr = lists:flatten(io_lib:format("cpu_predict_util_core_~p" , [CoreIndex])),
338+
KeyUtilPredictPerCoreAtom = list_to_atom(KeyUtilPredictPerCoreStr),
339+
ets:update_element(StatsEts, KeyUtilPredictPerCoreAtom, { ?STATS_KEYVAL_VAL_IDX, Util })
340+
end, UtilList).

src_erl/NerlnetApp/src/Stats/stats.hrl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,16 @@
2020
-define(WORKER_SEPERATOR , "^").
2121
-define(WORKER_SEPERATOR_TRIPLETS , "@").
2222
-define(WORKER_SEPERATOR_WITHIN_TRIPLET , "$").
23+
24+
-define(STATS_ATOM_TIME_TRAIN_ACTIVE, time_train_active).
25+
-define(STATS_ATOM_TIME_TRAIN_TOTAL, time_train_total).
26+
-define(STATS_ATOM_TIME_PREDICT_ACTIVE, time_predict_active).
27+
-define(STATS_ATOM_TIME_PREDICT_TOTAL, time_predict_total).
28+
-define(STATS_AVG_GPU_USAGE_TRAIN, average_gpu_usage_train).
29+
-define(STATS_AVG_GPU_MEMORY_USAGE_PREDICT, average_gpu_memory_usage_predict).
30+
-define(STATS_MEMORY_TRAIN_EMA_USAGE, memory_train_ema_usage).
31+
-define(STATS_MEMORY_PREDICT_EMA_USAGE, memory_predict_ema_usage).
32+
-define(STATS_MEMORY_TRAIN_PEAK_USAGE, memory_train_peak_usage).
33+
-define(STATS_MEMORY_PREDICT_PEAK_USAGE, memory_predict_peak_usage).
34+
35+
-define(EMA_COEFFICIENT_HIST, 0.4).

0 commit comments

Comments
 (0)