Skip to content

Commit f2370cc

Browse files
committed
Fixes for zmq_worker_client benchmark app
Fixed Makefile to link to margo Updated config script to include monitoring and the multi-executable launch options Improved run script
1 parent 137b52a commit f2370cc

File tree

5 files changed

+77
-57
lines changed

5 files changed

+77
-57
lines changed

benchmark_suite/zmq_worker_client/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ INSTALL := ${INSTALL}../
55
all: server client
66

77
server: server.cpp
8-
tau_cxx.sh -std=c++17 -O3 -g -D_USE_ZMQNET -D_PERF_METRIC -I${INSTALL}/include -I${INSTALL}/include/chimbuko/3rdparty -L${INSTALL}/lib server.cpp -o server -lchimbuko -lstdc++fs
8+
tau_cxx.sh -std=c++17 -O3 -g -D_USE_ZMQNET -D_PERF_METRIC -I${INSTALL}/include -I${INSTALL}/include/chimbuko/3rdparty -L${INSTALL}/lib server.cpp -o server -lchimbuko -lstdc++fs -lmargo
99
client: client.cpp
10-
tau_cxx.sh -std=c++17 -O3 -g -D_USE_ZMQNET -D_PERF_METRIC -I${INSTALL}/include -I${INSTALL}/include/chimbuko/3rdparty -L${INSTALL}/lib client.cpp -o client -lchimbuko -lstdc++fs
10+
tau_cxx.sh -std=c++17 -O3 -g -D_USE_ZMQNET -D_PERF_METRIC -I${INSTALL}/include -I${INSTALL}/include/chimbuko/3rdparty -L${INSTALL}/lib client.cpp -o client -lchimbuko -lstdc++fs -lmargo
1111

1212
clean:
1313
rm -f client server

benchmark_suite/zmq_worker_client/chimbuko_config.sh

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ service_node_iface=eth0 #network interface upon which communication to the servi
88
####################################
99
#Options for visualization module
1010
####################################
11-
use_viz=1 #enable or disable the visualization
11+
use_viz=0 #enable or disable the visualization
1212
viz_root=/opt/chimbuko/viz #the root directory of the visualization module <------------ ***SET ME (if using viz)***
1313
viz_worker_port=6379 #the port on which to run the redis server for the visualization backend
1414
viz_port=5002 #the port on which to run the webserver
@@ -28,7 +28,7 @@ provdb_extra_args="" #any extra command line arguments to pass
2828
provdb_nshards=4 #number of database shards
2929
provdb_ninstances=1 #number of database server instances. Shards are distributed over instances
3030
provdb_engine="ofi+tcp;ofi_rxm" #the OFI libfabric provider used for the Mochi stack
31-
provdb_port=5000 #the port of the provenance database
31+
provdb_port=5000 #the port of the provenance database. For >1 instance the port of instance i will be provdb_port+i
3232
provdb_writedir=chimbuko/provdb #the directory in which the provenance database is written. Chimbuko creates chimbuko/provdb which can be used as a default
3333
provdb_commit_freq=10000 #frequency ms at which the provenance database is committed to disk. If set to 0 it will commit only at the end
3434

@@ -51,33 +51,35 @@ pserver_nt=2 #number of worker threads
5151
####################################
5252
ad_extra_args="-perf_outputpath chimbuko/logs -perf_step 1" #any extra command line arguments to pass. Note: chimbuko/logs is automatically created by services script
5353
ad_win_size=5 #number of events around an anomaly to store; provDB entry size is proportional to this so keep it small!
54-
ad_alg="sstd" #the anomaly detection algorithm. Valid values are "hbos" and "sstd"
54+
ad_alg="hbos" #the anomaly detection algorithm. Valid values are "hbos" and "sstd"
5555
ad_outlier_hbos_threshold=0.99 #the percentile of events outside of which are considered anomalies by the HBOS algorithm
5656
ad_outlier_sstd_sigma=12 #number of standard deviations that defines an outlier in the SSTD algorithm
5757
####################################
5858
#Options for TAU
59-
#Note: Only the TAU_ADIOS2_PATH, TAU_ADIOS2_FILE_PREFIX, EXE_NAME and TAU_ADIOS2_ENGINE variables are used by the Chimbuko services script and there only to generate the suggested
59+
#Note: Only the TAU_ADIOS2_PATH, TAU_ADIOS2_FILE_PREFIX, EXE_NAME, TAU_ADIOS2_ENGINE and tau_monitoring_conf variables are used by the Chimbuko services script and there only to generate the suggested
6060
# command to launch the AD (output to chimbuko/vars/chimbuko_ad_cmdline.var); they can be overridden by the run script if desired providing the appropriate modifications
6161
# are made to the AD launch command. The remainder of the variables are used only by TAU and can be freely overridden.
6262
####################################
63-
export TAU_ADIOS2_ENGINE=SST #online communication engine (alternative BP4 although this goes through the disk system and may be slower unless the BPfiles are stored on a burst disk)
63+
export TAU_ADIOS2_ENGINE=BP4 #online communication engine (alternative BP4 although this goes through the disk system and may be slower unless the BPfiles are stored on a burst disk)
6464
export TAU_ADIOS2_ONE_FILE=FALSE #a different connection file for each rank
6565
export TAU_ADIOS2_PERIODIC=1 #enable/disable ADIOS2 periodic output
6666
export TAU_ADIOS2_PERIOD=1000000 #period in us between ADIOS2 io steps
6767
export TAU_THREAD_PER_GPU_STREAM=1 #force GPU streams to appear as different TAU virtual threads
6868
export TAU_THROTTLE=0 #enable/disable throttling of short-running functions
6969

7070
export TAU_MAKEFILE=/opt/tau2/x86_64/lib/Makefile.tau-papi-pthread-python-pdt-adios2 #non-MPI TAU required!
71+
tau_monitoring_conf="default" #Provide a configuration file for the TAU monitoring plugin. It will be copied to the work directory as "tau_monitoring.json" (unless it is already there!). If set to default, Chimbuko will generate one automatically
7172

7273
#Note: the following 2 variables are not used by the service script but are included here for use from the user's run script allowing the application to be launched with either "${TAU_EXEC} <app>" or "${TAU_PYTHON} <app>"
7374
#Note: the "binding" -T ... is used by Tau to find the appropriate configuration. It can typically be inferred from the name of the Makefile. If using a non-MPI job the 'mpi' should be changed to 'serial' and a non-MPI build of
7475
# ADIOS2/TAU must exist
7576
#Suggestion: It is useful to test the command without Chimbuko first to ensure TAU picks up the correct binding; this can be done by 'export TAU_ADIOS2_ENGINE=BPFile' and then running the application with Tau but without Chimbuko.
76-
TAU_EXEC="tau_exec -T serial,papi,pthread,python,pdt,adios2 -adios2_trace" #how to execute tau_exec; the -T arguments should mirror the makefile name <------------ ***SET ME***
77+
#TAU_EXEC="tau_exec -T serial,papi,pthread,python,pdt,adios2 -adios2_trace -monitoring" #how to execute tau_exec; the -T arguments should mirror the makefile name <------------ ***SET ME***
78+
TAU_EXEC="tau_exec -T serial,papi,pthread,python,pdt,adios2 -adios2_trace -monitoring" #how to execute tau_exec; the -T arguments should mirror the makefile name <------------ ***SET ME***
7779
TAU_PYTHON="tau_python -T papi,mpi,pthread,pdt,adios2 -tau-python-interpreter=python3 -adios2_trace -tau-python-args=-u" #how to execute tau_python. Note that passing -u to python forces it to not buffer stdout so we can pipe it
7880
#to tee in realtime <--- SET ME (if !python3)
7981

80-
export EXE_NAME=main #the name of the executable (without path) <------------ ***SET ME***
82+
export EXE_NAME=(server client) #the name of the executable (without path). For multi-component workflows this argument also accepts a list, e.g. (main1 main2) <------------ ***SET ME***
8183

8284
TAU_ADIOS2_PATH=chimbuko/adios2 #path where the adios2 files are to be stored. Chimbuko services creates the directory chimbuko/adios2 in the working directory and this should be used by default
8385
TAU_ADIOS2_FILE_PREFIX=tau-metrics #the prefix of tau adios2 files; full filename is ${TAU_ADIOS2_PREFIX}-${EXE_NAME}-${RANK}.bp

benchmark_suite/zmq_worker_client/client.cpp

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,30 +23,32 @@ void rpcWaitAndRespond(ADThreadNetClient &client, int wait_ms){
2323
client.send_and_receive(recv,send); //ignore return
2424
}
2525

26-
int main(int argc, char** argv){
27-
assert(argc >= 6);
26+
int main(int argc, char** argv){
27+
{
28+
assert(argc >= 6);
2829

29-
std::string server_addr = argv[1];
30-
int cycles = std::stoi(argv[2]);
31-
int cycle_time = std::stoi(argv[3]); //ms
32-
int anom_freq = std::stoi(argv[4]);
33-
int anom_mult = std::stoi(argv[5]); //time multiplier for anomalies
34-
Log << "Client executing with parameters: cycles=" << cycles << " cycle_time=" << cycle_time << "ms anom_freq=" << anom_freq << " anom_mult=" << anom_mult << std::endl;
35-
36-
Log << "connecting to server with address " << server_addr << std::endl;
37-
ADThreadNetClient client;
38-
client.connect_ps(0,0,server_addr);
39-
40-
for(int i=0;i<cycles;i++){
41-
int ctime = cycle_time;
42-
if(i>0 && i % anom_freq == 0) ctime *= anom_mult;
43-
44-
//Client waits for same time as server
45-
clientWait(ctime);
46-
47-
Log << "calling RPC" << std::endl;
48-
rpcWaitAndRespond(client, ctime);
30+
std::string server_addr = argv[1];
31+
int cycles = std::stoi(argv[2]);
32+
int cycle_time = std::stoi(argv[3]); //ms
33+
int anom_freq = std::stoi(argv[4]);
34+
int anom_mult = std::stoi(argv[5]); //time multiplier for anomalies
35+
Log << "executing with parameters: cycles=" << cycles << " cycle_time=" << cycle_time << "ms anom_freq=" << anom_freq << " anom_mult=" << anom_mult << std::endl;
36+
37+
Log << "connecting to server with address " << server_addr << std::endl;
38+
ADThreadNetClient client;
39+
client.connect_ps(0,0,server_addr);
40+
41+
for(int i=0;i<cycles;i++){
42+
int ctime = cycle_time;
43+
if(i>0 && i % anom_freq == 0) ctime *= anom_mult;
44+
45+
//Client waits for same time as server
46+
clientWait(ctime);
47+
48+
Log << "calling RPC" << std::endl;
49+
rpcWaitAndRespond(client, ctime);
50+
}
4951
}
50-
52+
Log << "finished" << std::endl;
5153
return 0;
5254
}

benchmark_suite/zmq_worker_client/run.sh

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,36 @@ rm -rf chimbuko
66
export CHIMBUKO_CONFIG=chimbuko_config.sh
77
source ${CHIMBUKO_CONFIG}
88

9-
if (( 1 )); then
9+
log_dir=chimbuko/logs
10+
11+
use_chimbuko=1
12+
use_tau=1
13+
14+
#Launch Chimbuko services
15+
if (( ${use_chimbuko} == 1 )); then
1016
echo "Running services"
1117
${chimbuko_services} 2>&1 | tee services.log &
1218
echo "Waiting"
13-
while [ ! -f chimbuko/vars/chimbuko_ad_cmdline.var ]; do sleep 1; done
19+
while [ ! -f chimbuko/vars/chimbuko_ad_opts.var ]; do sleep 1; done
1420
ad_opts=$(cat chimbuko/vars/chimbuko_ad_opts.var)
1521
else
16-
mkdir -p chimbuko/logs chimbuko/adios2
22+
mkdir -p chimbuko/logs chimbuko/adios2
1723
fi
1824

19-
log_dir=chimbuko/logs
25+
if (( ${use_tau} == 0 )); then
26+
TAU_EXEC=
27+
fi
2028

21-
if (( 1 )); then
22-
ad_run_server="driver ${TAU_ADIOS2_ENGINE} ${TAU_ADIOS2_PATH} ${TAU_ADIOS2_FILE_PREFIX}-server -program_idx 0 ${ad_opts} 2>&1 | tee ${log_dir}/ad_server.log"
29+
#Launch AD on server
30+
if (( ${use_chimbuko} == 1 )); then
31+
ad_run_server=$(cat chimbuko/vars/chimbuko_ad_cmdline.server.var)
2332
echo "Running AD for server with command:"
2433
echo ${ad_run_server}
2534
eval "${ad_run_server} &"
2635
fi
2736

37+
#Launch server
38+
export TAU_VERBOSE=0
2839
${TAU_EXEC} ./server 9876 2>&1 | tee chimbuko/logs/server.log &
2940
spid=$!
3041

@@ -38,12 +49,15 @@ cycle_time=50 #ms
3849
anom_freq=30
3950
anom_mult=20
4051

41-
if (( 1 )); then
42-
ad_run_client="driver ${TAU_ADIOS2_ENGINE} ${TAU_ADIOS2_PATH} ${TAU_ADIOS2_FILE_PREFIX}-client -program_idx 1 ${ad_opts} 2>&1 | tee ${log_dir}/ad_client.log"
52+
#Launch AD on client
53+
if (( ${use_chimbuko} == 1 )); then
54+
ad_run_client=$(cat chimbuko/vars/chimbuko_ad_cmdline.client.var)
4355
echo "Running AD for client"
4456
eval "${ad_run_client} &"
4557
fi
4658

47-
export TAU_VERBOSE=1
48-
59+
#Launch client
60+
export TAU_VERBOSE=0
4961
${TAU_EXEC} ./client ${ip_port} ${cycles} ${cycle_time} ${anom_freq} ${anom_mult} 2>&1 | tee chimbuko/logs/client.log
62+
63+
wait

benchmark_suite/zmq_worker_client/server.cpp

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -38,29 +38,31 @@ class NetPayloadBounce: public NetPayloadBase{
3838

3939

4040
int main (int argc, char ** argv){
41-
if(argc != 2){ std::cout << "Usage: <binary> <port>" << std::endl; return 0; }
41+
{
42+
if(argc != 2){ std::cout << "Usage: <binary> <port>" << std::endl; return 0; }
4243

43-
int port = std::stoi(argv[1]);
44-
int threads = 4;
44+
int port = std::stoi(argv[1]);
45+
int threads = 4;
4546

46-
ZMQNet net;
47-
net.setPort(port);
48-
net.setAutoShutdown(true);
47+
ZMQNet net;
48+
net.setPort(port);
49+
net.setAutoShutdown(true);
4950

50-
Log << "run parameter server on port " << port << std::endl;
51+
Log << "run parameter server on port " << port << std::endl;
5152

52-
net.add_payload(new NetPayloadBounce);
53-
net.init(nullptr, nullptr, threads);
53+
for(int t=0;t<threads;t++)
54+
net.add_payload(new NetPayloadBounce,t);
55+
net.init(nullptr, nullptr, threads);
5456

55-
signal(SIGTERM, termSignalHandler);
57+
signal(SIGTERM, termSignalHandler);
5658

57-
net.run();
59+
net.run();
5860

59-
signal(SIGTERM, SIG_DFL); //restore default signal handling
60-
61-
Log << "shutdown parameter server ..." << std::endl;
62-
net.finalize();
61+
signal(SIGTERM, SIG_DFL); //restore default signal handling
6362

63+
Log << "shutdown parameter server ..." << std::endl;
64+
net.finalize();
65+
}
6466
Log << "finished" << std::endl;
6567
return 0;
6668
}

0 commit comments

Comments
 (0)