From 640f7d09f805b48ce1d6138035b0e07f589f072e Mon Sep 17 00:00:00 2001 From: David Boehme Date: Fri, 24 Aug 2018 17:39:40 -0700 Subject: [PATCH 1/8] Initial Caliper support --- src/MC_Fast_Timer.hh | 74 ++++++++++++++++++++++++++++++++------------ src/Makefile | 26 +++++++++++++++- 2 files changed, 79 insertions(+), 21 deletions(-) diff --git a/src/MC_Fast_Timer.hh b/src/MC_Fast_Timer.hh index 9cad33a8..cd131ec1 100644 --- a/src/MC_Fast_Timer.hh +++ b/src/MC_Fast_Timer.hh @@ -6,6 +6,10 @@ #include #endif +#ifdef USE_CALIPER +#include +#endif + #include "portability.hh" // needed for uint64_t in this file #include "utilsMpi.hh" // needed for MPI_Comm type in this file @@ -44,12 +48,20 @@ class MC_Fast_Timer class MC_Fast_Timer_Container { public: - MC_Fast_Timer_Container() {} ; // constructor + MC_Fast_Timer_Container() +#ifdef USE_CALIPER + : cali_annotation("mc.timer", CALI_ATTR_DEFAULT | CALI_ATTR_NESTED) +#endif + {} ; // constructor void Cumulative_Report(int mpi_rank, int num_ranks, MPI_Comm comm_world, uint64_t numSegments); void Last_Cycle_Report(int report_time, int mpi_rank, int num_ranks, MPI_Comm comm_world); void Clear_Last_Cycle_Timers(); MC_Fast_Timer timers[MC_Fast_Timer::Num_Timers]; // timers for various routines - + +#ifdef USE_CALIPER + cali::Annotation cali_annotation; +#endif + private: void Print_Cumulative_Heading(int mpi_rank); void Print_Last_Cycle_Heading(int mpi_rank); @@ -87,27 +99,49 @@ extern const char *mc_fast_timer_names[MC_Fast_Timer::Num_Timers]; #define MC_FASTTIMER_GET_LASTCYCLE(timerIndex) (float)mcco->fast_timer->timers[timerIndex].lastCycleClock / 1000000. #else // else CHRONO_MISSING is not defined, so high resolution clock is available - - #define MC_FASTTIMER_START(timerIndex) \ - if (omp_get_thread_num() == 0) { \ - mcco->fast_timer->timers[timerIndex].startClock = std::chrono::high_resolution_clock::now(); \ - } - - #define MC_FASTTIMER_STOP(timerIndex) \ - if ( omp_get_thread_num() == 0 ) { \ - mcco->fast_timer->timers[timerIndex].stopClock = std::chrono::high_resolution_clock::now(); \ - mcco->fast_timer->timers[timerIndex].lastCycleClock += \ - std::chrono::duration_cast \ - (mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock).count(); \ - mcco->fast_timer->timers[timerIndex].cumulativeClock += \ - std::chrono::duration_cast \ - (mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock).count(); \ - mcco->fast_timer->timers[timerIndex].numCalls++; \ - } + #ifdef USE_CALIPER + #define MC_FASTTIMER_START(timerIndex) \ + if (omp_get_thread_num() == 0) { \ + mcco->fast_timer->timers[timerIndex].startClock = std::chrono::high_resolution_clock::now(); \ + } \ + mcco->fast_timer->cali_annotation.begin(mc_fast_timer_names[timerIndex]); + + #define MC_FASTTIMER_STOP(timerIndex) \ + if ( omp_get_thread_num() == 0 ) { \ + mcco->fast_timer->timers[timerIndex].stopClock = std::chrono::high_resolution_clock::now(); \ + mcco->fast_timer->timers[timerIndex].lastCycleClock += \ + std::chrono::duration_cast \ + (mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock).count(); \ + mcco->fast_timer->timers[timerIndex].cumulativeClock += \ + std::chrono::duration_cast \ + (mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock).count(); \ + mcco->fast_timer->timers[timerIndex].numCalls++; \ + } \ + mcco->fast_timer->cali_annotation.end(); + + #else // not defined USE_CALIPER + + #define MC_FASTTIMER_START(timerIndex) \ + if (omp_get_thread_num() == 0) { \ + mcco->fast_timer->timers[timerIndex].startClock = std::chrono::high_resolution_clock::now(); \ + } + + #define MC_FASTTIMER_STOP(timerIndex) \ + if ( omp_get_thread_num() == 0 ) { \ + mcco->fast_timer->timers[timerIndex].stopClock = std::chrono::high_resolution_clock::now(); \ + mcco->fast_timer->timers[timerIndex].lastCycleClock += \ + std::chrono::duration_cast \ + (mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock).count(); \ + mcco->fast_timer->timers[timerIndex].cumulativeClock += \ + std::chrono::duration_cast \ + (mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock).count(); \ + mcco->fast_timer->timers[timerIndex].numCalls++; \ + } + + #endif // end ifdef USE_CALIPER else branch #define MC_FASTTIMER_GET_LASTCYCLE(timerIndex) (float)mcco->fast_timer->timers[timerIndex].lastCycleClock / 1000000. - #endif // end ifdef CHRONO_MISSING else section #endif // end if DISABLE_TIMERS diff --git a/src/Makefile b/src/Makefile index 5f42dc12..261cab03 100644 --- a/src/Makefile +++ b/src/Makefile @@ -75,6 +75,10 @@ # with some Clang compilers, some older Gnu compilers on BG/Q # and older Intel compilers. # +# -DUSE_CALIPER Define this to enable Caliper instrumentation. Caliper +# provides performance profiling infrastructure for a wide +# range of tasks. +# # -DUSE_NVTX Define this for some extra NVProf profiling information. # It will create regions that can be visualized in NVVP. # @@ -99,6 +103,8 @@ CXXFLAGS = CPPFLAGS = LDFLAGS = +# CALIPER_DIR = $(spack location --install-dir caliper) + ############################################################################### # Very simple GCC build with OpenMP but without MPI. # This works on a Macbook (if gcc is installed) @@ -113,7 +119,6 @@ LDFLAGS = #CPPFLAGS = $(OPENMP_FLAGS) #LDFLAGS = $(OPENMP_LDFLAGS) - ############################################################################### ### GCC -- with MPI and OpenMP ############################################################################### @@ -128,6 +133,25 @@ LDFLAGS = #LDFLAGS = $(OPENMP_LDFLAGS) +############################################################################### +### GCC -- with MPI and OpenMP and Caliper support +############################################################################### +CALIPER_DIR = ${HOME}/local/caliper/toss3-release + +CALIPER_FLAGS = -I${CALIPER_DIR}/include -DUSE_CALIPER +CALIPER_LDFLAGS = -Wl,-rpath ${CALIPER_DIR}/lib64 -L${CALIPER_DIR}/lib64 -lcaliper-mpi -lcaliper + +OPENMP_FLAGS = -DHAVE_OPENMP -fopenmp +OPENMP_LDFLAGS = -fopenmp +MPI_FLAGS = -DHAVE_MPI +OPTFLAGS = -g -O2 + +CXX=mpicxx +CXXFLAGS = -std=c++11 $(OPTFLAGS) -Wpedantic +CPPFLAGS = $(MPI_FLAGS) $(OPENMP_FLAGS) $(CALIPER_FLAGS) +LDFLAGS = $(OPENMP_LDFLAGS) $(CALIPER_LDFLAGS) + + ############################################################################### # LLNL LC BG/Q Comilers # ############################################################################### From 32a638f01913ed07a022e0b5a95f95fd2a197f43 Mon Sep 17 00:00:00 2001 From: David Boehme Date: Fri, 21 Sep 2018 16:23:31 -0700 Subject: [PATCH 2/8] Add documentation on Caliper --- src/MC_Fast_Timer.hh | 2 +- src/Makefile | 7 +++++-- src/READ.ME.HOW.TO.RUN | 10 ++++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/MC_Fast_Timer.hh b/src/MC_Fast_Timer.hh index cd131ec1..dd36384b 100644 --- a/src/MC_Fast_Timer.hh +++ b/src/MC_Fast_Timer.hh @@ -50,7 +50,7 @@ class MC_Fast_Timer_Container public: MC_Fast_Timer_Container() #ifdef USE_CALIPER - : cali_annotation("mc.timer", CALI_ATTR_DEFAULT | CALI_ATTR_NESTED) + : cali_annotation("mc.timer", CALI_ATTR_SCOPE_PROCESS | CALI_ATTR_NESTED) #endif {} ; // constructor void Cumulative_Report(int mpi_rank, int num_ranks, MPI_Comm comm_world, uint64_t numSegments); diff --git a/src/Makefile b/src/Makefile index 261cab03..5cef6961 100644 --- a/src/Makefile +++ b/src/Makefile @@ -76,8 +76,11 @@ # and older Intel compilers. # # -DUSE_CALIPER Define this to enable Caliper instrumentation. Caliper -# provides performance profiling infrastructure for a wide -# range of tasks. +# is a performance profiling / analysis library for +# tracing, sampling, HW counter measurements, and much more. +# When enabled, Quicksilver will link in the Caliper +# library and export its timed regions to Caliper. See +# https://github.com/LLNL/Caliper for more information. # # -DUSE_NVTX Define this for some extra NVProf profiling information. # It will create regions that can be visualized in NVVP. diff --git a/src/READ.ME.HOW.TO.RUN b/src/READ.ME.HOW.TO.RUN index 24e2a8e8..dd8240be 100644 --- a/src/READ.ME.HOW.TO.RUN +++ b/src/READ.ME.HOW.TO.RUN @@ -115,6 +115,16 @@ There is also, at the end of the run, a coarse breakdown of time spent overall in the above mentioned three code phases, as well as a few other sub timings from cycle tracking. +------------------------------------------------------------------------------- +A note on Caliper: + +Caliper is a powerful performance profiling/tracing library. When configured +with Caliper support, Quicksilver adds Caliper annotations for its timed +regions (cycleTracking, cycleTrackingKernel, etc.) as the "mc.timer" +attribute. Performance measurements can be configured through environment +variables or the caliper.config configuration file. For Caliper documentation, +see https://github.com/LLNL/Caliper. + ------------------------------------------------------------------------------- A note on asserts: From caa077134c17274bba1719197fd88644fcab2708 Mon Sep 17 00:00:00 2001 From: David Boehme Date: Thu, 14 Mar 2019 17:32:34 -0700 Subject: [PATCH 3/8] Caliper instrumentation update --- src/main.cc | 70 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 16 deletions(-) diff --git a/src/main.cc b/src/main.cc index bb9517b2..c692059a 100644 --- a/src/main.cc +++ b/src/main.cc @@ -26,6 +26,14 @@ #include "git_hash.hh" #include "git_vers.hh" +#ifdef USE_CALIPER +#include +#ifdef HAVE_MPI +#include +#endif +#endif + +void setupCaliper(); void gameOver(); void cycleInit( bool loadBalance ); void cycleTracking(MonteCarlo* monteCarlo); @@ -43,8 +51,10 @@ int main(int argc, char** argv) Parameters params = getParameters(argc, argv); printParameters(params, cout); - // mcco stores just about everything. - mcco = initMC(params); + setupCaliper(); + + // mcco stores just about everything. + mcco = initMC(params); int loadBalance = params.simulationParams.loadBalance; @@ -52,8 +62,16 @@ int main(int argc, char** argv) const int nSteps = params.simulationParams.nSteps; +#ifdef USE_CALIPER + CALI_CXX_MARK_LOOP_BEGIN(mainloop, "qs.mainloop"); +#endif + for (int ii=0; iiprocessor_info->comm_mc_world ); } +#ifdef USE_CALIPER + CALI_CXX_MARK_LOOP_END(mainloop); +#endif MC_FASTTIMER_STOP(MC_Fast_Timer::main); @@ -80,10 +101,28 @@ int main(int argc, char** argv) #endif mpiFinalize(); - + return 0; } +void setupCaliper() +{ +#ifdef USE_CALIPER +#ifdef HAVE_MPI + cali_mpi_init(); +#endif + + cali_config_preset("CALI_CALIPER_ATTRIBUTE_PROPERTIES", + "loop=process_scope:nested" + ",iteration#qs.mainloop=process_scope:asvalue" + ",cupti.runtimeAPI=process_scope:nested" + ",cupti.driverAPI=process_scope:nested"); + + cali_set_global_string_byname("qs.git_vers", GIT_VERS); + cali_set_global_string_byname("qs.git_hash", GIT_HASH); +#endif +} + void gameOver() { mcco->fast_timer->Cumulative_Report(mcco->processor_info->rank, @@ -112,7 +151,7 @@ void cycleInit( bool loadBalance ) mcco->particle_buffer->Initialize(); MC_SourceNow(mcco); - + PopulationControl(mcco, loadBalance); // controls particle population RouletteLowWeightParticles(mcco); // Delete particles with low statistical weight @@ -125,7 +164,7 @@ void cycleInit( bool loadBalance ) __global__ void CycleTrackingKernel( MonteCarlo* monteCarlo, int num_particles, ParticleVault* processingVault, ParticleVault* processedVault ) { - int global_index = getGlobalThreadID(); + int global_index = getGlobalThreadID(); if( global_index < num_particles ) { @@ -167,9 +206,9 @@ void cycleTracking(MonteCarlo *monteCarlo) ParticleVault *processingVault = my_particle_vault.getTaskProcessingVault(processing_vault); ParticleVault *processedVault = my_particle_vault.getTaskProcessedVault(processed_vault); - + int numParticles = processingVault->size(); - + if ( numParticles != 0 ) { NVTX_Range trackingKernel("cycleTracking_TrackingKernel"); // range ends at end of scope @@ -187,28 +226,28 @@ void cycleTracking(MonteCarlo *monteCarlo) dim3 grid(1,1,1); dim3 block(1,1,1); int runKernel = ThreadBlockLayout( grid, block, numParticles); - + //Call Cycle Tracking Kernel if( runKernel ) CycleTrackingKernel<<>>( monteCarlo, numParticles, processingVault, processedVault ); - + //Synchronize the stream so that memory is copied back before we begin MPI section cudaPeekAtLastError(); cudaDeviceSynchronize(); #endif } break; - + case gpuWithOpenMP: { int nthreads=128; - if (numParticles < 64*56 ) + if (numParticles < 64*56 ) nthreads = 64; int nteams = (numParticles + nthreads - 1 ) / nthreads; nteams = nteams > 1 ? nteams : 1; #ifdef HAVE_OPENMP_TARGET - #pragma omp target enter data map(to:monteCarlo[0:1]) - #pragma omp target enter data map(to:processingVault[0:1]) + #pragma omp target enter data map(to:monteCarlo[0:1]) + #pragma omp target enter data map(to:processingVault[0:1]) #pragma omp target enter data map(to:processedVault[0:1]) #pragma omp target teams distribute parallel for num_teams(nteams) thread_limit(128) #endif @@ -245,7 +284,7 @@ void cycleTracking(MonteCarlo *monteCarlo) // Next, communicate particles that have crossed onto // other MPI ranks. NVTX_Range cleanAndComm("cycleTracking_clean_and_comm"); - + SendQueue &sendQueue = *(my_particle_vault.getSendQueue()); monteCarlo->particle_buffer->Allocate_Send_Buffer( sendQueue ); @@ -314,7 +353,7 @@ void cycleFinalize() mcco->_tallies->_balanceTask[0]._end = mcco->_particleVaultContainer->sizeProcessed(); // Update the cumulative tally data. - mcco->_tallies->CycleFinalize(mcco); + mcco->_tallies->CycleFinalize(mcco); mcco->time_info->cycle++; @@ -322,4 +361,3 @@ void cycleFinalize() MC_FASTTIMER_STOP(MC_Fast_Timer::cycleFinalize); } - From 326df96a7766c3b8c61afca2a3ff093953750f70 Mon Sep 17 00:00:00 2001 From: David Boehme Date: Wed, 11 Sep 2019 10:41:11 -0700 Subject: [PATCH 4/8] Use Caliper ConfigManager --- src/Parameters.cc | 4 ++++ src/Parameters.hh | 1 + src/main.cc | 21 ++++++++++++++++----- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/Parameters.cc b/src/Parameters.cc index a7205da6..4111313f 100644 --- a/src/Parameters.cc +++ b/src/Parameters.cc @@ -226,6 +226,8 @@ namespace esName[0] = '\0'; char xsec[1024]; xsec[0] = '\0'; + char calicfg[1024]; + calicfg[0] = '\0'; addArg("help", 'h', 0, 'i', &(help), 0, "print this message"); addArg("dt", 'D', 1, 'd', &(sp.dt), 0, "time step (seconds)"); @@ -253,12 +255,14 @@ namespace addArg("bTally", 'B', 1, 'i', &(sp.balanceTallyReplications), 0, "number of balance tally replications"); addArg("fTally", 'F', 1, 'i', &(sp.fluxTallyReplications), 0, "number of scalar flux tally replications"); addArg("cTally", 'C', 1, 'i', &(sp.cellTallyReplications), 0, "number of scalar cell tally replications"); + addArg("caliper-config", 'P', 1, 's', &(calicfg), sizeof(calicfg), "Caliper configuration"); processArgs(argc, argv); sp.inputFile = name; sp.energySpectrum = esName; sp.crossSectionsOut = xsec; + sp.caliperConfig = calicfg; if (help) { diff --git a/src/Parameters.hh b/src/Parameters.hh index 79dfe3cf..d4ce91c4 100644 --- a/src/Parameters.hh +++ b/src/Parameters.hh @@ -164,6 +164,7 @@ struct SimulationParameters int fluxTallyReplications; //!< Number of replications for the scalar flux tally int cellTallyReplications; //!< Number of replications for the scalar cell tally int coralBenchmark; //!< enable correctness check for Coral2 benchmark + std::string caliperConfig; //!< Caliper configuration string }; struct Parameters diff --git a/src/main.cc b/src/main.cc index c692059a..56b8c8f8 100644 --- a/src/main.cc +++ b/src/main.cc @@ -28,6 +28,7 @@ #ifdef USE_CALIPER #include +#include #ifdef HAVE_MPI #include #endif @@ -51,8 +52,17 @@ int main(int argc, char** argv) Parameters params = getParameters(argc, argv); printParameters(params, cout); +#ifdef USE_CALIPER setupCaliper(); + cali::ConfigManager calimgr(params.simulationParams.caliperConfig.c_str()); + + if (calimgr.error()) + std::cerr << "caliper config error: " << calimgr.error_msg() << std::endl; + + calimgr.start(); +#endif + // mcco stores just about everything. mcco = initMC(params); @@ -100,6 +110,10 @@ int main(int argc, char** argv) delete mcco; #endif +#ifdef USE_CALIPER + calimgr.flush(); +#endif + mpiFinalize(); return 0; @@ -112,11 +126,8 @@ void setupCaliper() cali_mpi_init(); #endif - cali_config_preset("CALI_CALIPER_ATTRIBUTE_PROPERTIES", - "loop=process_scope:nested" - ",iteration#qs.mainloop=process_scope:asvalue" - ",cupti.runtimeAPI=process_scope:nested" - ",cupti.driverAPI=process_scope:nested"); + cali_config_preset("CALI_LOG_VERBOSITY", "0"); + cali_config_preset("CALI_CALIPER_ATTRIBUTE_DEFAULT_SCOPE", "process"); cali_set_global_string_byname("qs.git_vers", GIT_VERS); cali_set_global_string_byname("qs.git_hash", GIT_HASH); From 5fff3bf3c2932d0d7cce979f7d8ec93eb4c7009a Mon Sep 17 00:00:00 2001 From: David Boehme Date: Thu, 2 Jan 2020 17:04:31 -0800 Subject: [PATCH 5/8] Update Makefile --- src/Makefile | 53 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/src/Makefile b/src/Makefile index 5cef6961..aee008a1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -106,8 +106,6 @@ CXXFLAGS = CPPFLAGS = LDFLAGS = -# CALIPER_DIR = $(spack location --install-dir caliper) - ############################################################################### # Very simple GCC build with OpenMP but without MPI. # This works on a Macbook (if gcc is installed) @@ -139,21 +137,50 @@ LDFLAGS = ############################################################################### ### GCC -- with MPI and OpenMP and Caliper support ############################################################################### -CALIPER_DIR = ${HOME}/local/caliper/toss3-release +#CALIPER_DIR = $(spack location --install-dir caliper) +# CALIPER_DIR = ${HOME}/local/caliper/toss3-release -CALIPER_FLAGS = -I${CALIPER_DIR}/include -DUSE_CALIPER -CALIPER_LDFLAGS = -Wl,-rpath ${CALIPER_DIR}/lib64 -L${CALIPER_DIR}/lib64 -lcaliper-mpi -lcaliper +# CALIPER_FLAGS = -I${CALIPER_DIR}/include -DUSE_CALIPER +# CALIPER_LDFLAGS = -Wl,-rpath ${CALIPER_DIR}/lib64 -L${CALIPER_DIR}/lib64 -lcaliper-mpi -lcaliper + +# OPENMP_FLAGS = -DHAVE_OPENMP -fopenmp +# OPENMP_LDFLAGS = -fopenmp +# MPI_FLAGS = -DHAVE_MPI +# OPTFLAGS = -g -O2 + +# CXX=mpicxx +# CXXFLAGS = -std=c++11 $(OPTFLAGS) -Wpedantic +# CPPFLAGS = $(MPI_FLAGS) $(OPENMP_FLAGS) $(CALIPER_FLAGS) +# LDFLAGS = $(OPENMP_LDFLAGS) $(CALIPER_LDFLAGS) + +############################################################################### +# Cuda on LLNL Lassen w/ Caliper +############################################################################### +## Choose one Cuda path +# CUDA_PATH = /usr/local/cuda -OPENMP_FLAGS = -DHAVE_OPENMP -fopenmp -OPENMP_LDFLAGS = -fopenmp -MPI_FLAGS = -DHAVE_MPI -OPTFLAGS = -g -O2 +CALIPER_DIR=${HOME}/local/caliper/lassen-cuda10 +CUDA_PATH = /usr/tce/packages/cuda/cuda-10.1.168 -CXX=mpicxx -CXXFLAGS = -std=c++11 $(OPTFLAGS) -Wpedantic -CPPFLAGS = $(MPI_FLAGS) $(OPENMP_FLAGS) $(CALIPER_FLAGS) -LDFLAGS = $(OPENMP_LDFLAGS) $(CALIPER_LDFLAGS) +HOST_COMPILER = mpicxx +CALIPER_FLAGS = -I${CALIPER_DIR}/include -DUSE_CALIPER +CALIPER_LDFLAGS = -L${CALIPER_DIR}/lib64 -lcaliper + +OPTFLAGS = -O2 -g +## Version below for debugging +##OPTFLAGS = -DUSE_NVTX -g -G -lineinfo -O0 + +CUDA_FLAGS = -I${CUDA_PATH}/include/ +#CUDA_LDFLAGS = -L${CUDA_PATH}/lib64/ -lcuda -lcudart +# +CXX=$(CUDA_PATH)/bin/nvcc +CXXFLAGS = -DHAVE_CUDA -std=c++11 $(OPTFLAGS) -Xptxas -v +CXXFLAGS += -gencode=arch=compute_60,code=\"sm_60,compute_60\" +CXXFLAGS += --compiler-bindir=$(HOST_COMPILER) +CPPFLAGS = -x cu -dc -DHAVE_MPI -DHAVE_ASYNC_MPI $(CALIPER_FLAGS) +LDFLAGS = $(CUDA_LDFLAGS) $(CALIPER_LDFLAGS) +##LDFLAGS += ${CUDA_PATH}/lib64/libnvToolsExt.so ############################################################################### # LLNL LC BG/Q Comilers # From cadd87654b30cb6570a67311210aabf6bd63b0c4 Mon Sep 17 00:00:00 2001 From: David Boehme Date: Wed, 26 Feb 2025 17:13:52 -0800 Subject: [PATCH 6/8] Update Caliper annotations and add Adiak support --- src/MC_Fast_Timer.hh | 15 ++----- src/Parameters.cc | 98 ++++++++++++++++++++++++++++++++++++++++++++ src/Parameters.hh | 1 + src/main.cc | 48 +++++++++++++++------- 4 files changed, 136 insertions(+), 26 deletions(-) diff --git a/src/MC_Fast_Timer.hh b/src/MC_Fast_Timer.hh index dd36384b..88ae46ac 100644 --- a/src/MC_Fast_Timer.hh +++ b/src/MC_Fast_Timer.hh @@ -49,19 +49,12 @@ class MC_Fast_Timer_Container { public: MC_Fast_Timer_Container() -#ifdef USE_CALIPER - : cali_annotation("mc.timer", CALI_ATTR_SCOPE_PROCESS | CALI_ATTR_NESTED) -#endif {} ; // constructor void Cumulative_Report(int mpi_rank, int num_ranks, MPI_Comm comm_world, uint64_t numSegments); void Last_Cycle_Report(int report_time, int mpi_rank, int num_ranks, MPI_Comm comm_world); void Clear_Last_Cycle_Timers(); MC_Fast_Timer timers[MC_Fast_Timer::Num_Timers]; // timers for various routines -#ifdef USE_CALIPER - cali::Annotation cali_annotation; -#endif - private: void Print_Cumulative_Heading(int mpi_rank); void Print_Last_Cycle_Heading(int mpi_rank); @@ -102,9 +95,9 @@ extern const char *mc_fast_timer_names[MC_Fast_Timer::Num_Timers]; #ifdef USE_CALIPER #define MC_FASTTIMER_START(timerIndex) \ if (omp_get_thread_num() == 0) { \ + cali_begin_region(mc_fast_timer_names[timerIndex]); \ mcco->fast_timer->timers[timerIndex].startClock = std::chrono::high_resolution_clock::now(); \ - } \ - mcco->fast_timer->cali_annotation.begin(mc_fast_timer_names[timerIndex]); + } #define MC_FASTTIMER_STOP(timerIndex) \ if ( omp_get_thread_num() == 0 ) { \ @@ -116,8 +109,8 @@ extern const char *mc_fast_timer_names[MC_Fast_Timer::Num_Timers]; std::chrono::duration_cast \ (mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock).count(); \ mcco->fast_timer->timers[timerIndex].numCalls++; \ - } \ - mcco->fast_timer->cali_annotation.end(); + cali_end_region(mc_fast_timer_names[timerIndex]); \ + } #else // not defined USE_CALIPER diff --git a/src/Parameters.cc b/src/Parameters.cc index 4111313f..80488e94 100644 --- a/src/Parameters.cc +++ b/src/Parameters.cc @@ -33,6 +33,10 @@ #include "InputBlock.hh" #include "utilsMpi.hh" +#ifdef USE_CALIPER +#include +#endif + using std::string; using std::ifstream; using std::make_pair; @@ -94,6 +98,100 @@ Parameters getParameters(int argc, char** argv) return params; } +void saveParametersInAdiak(const Parameters& pp) +{ +#ifdef USE_CALIPER + adiak::value("dt", pp.simulationParams.dt); + adiak::value("fMax", pp.simulationParams.fMax); + adiak::value("inputFile", adiak::path(pp.simulationParams.inputFile)); + adiak::value("energySpectrum", pp.simulationParams.energySpectrum); + adiak::value("boundaryCondition", pp.simulationParams.boundaryCondition); + adiak::value("loadBalance", pp.simulationParams.loadBalance); + adiak::value("cycleTimers", pp.simulationParams.cycleTimers); + adiak::value("debugThreads", pp.simulationParams.debugThreads); + adiak::value("lx", pp.simulationParams.lx); + adiak::value("ly", pp.simulationParams.ly); + adiak::value("lz", pp.simulationParams.lz); + adiak::value("nParticles", pp.simulationParams.nParticles); + adiak::value("batchSize", pp.simulationParams.batchSize); + adiak::value("nBatches", pp.simulationParams.nBatches); + adiak::value("nSteps", pp.simulationParams.nSteps); + adiak::value("nx", pp.simulationParams.nx); + adiak::value("ny", pp.simulationParams.ny); + adiak::value("nz", pp.simulationParams.nz); + adiak::value("seed", pp.simulationParams.seed); + adiak::value("xDom", pp.simulationParams.xDom); + adiak::value("yDom", pp.simulationParams.yDom); + adiak::value("zDom", pp.simulationParams.zDom); + adiak::value("eMax", pp.simulationParams.eMax); + adiak::value("eMin", pp.simulationParams.eMin); + adiak::value("nGroups", pp.simulationParams.nGroups); + adiak::value("lowWeightCutoff", pp.simulationParams.lowWeightCutoff); + adiak::value("bTally", pp.simulationParams.balanceTallyReplications); + adiak::value("fTally", pp.simulationParams.fluxTallyReplications); + adiak::value("cTally", pp.simulationParams.cellTallyReplications); + adiak::value("coralBenchmark", pp.simulationParams.coralBenchmark); + adiak::value("crossSectionsOut", pp.simulationParams.crossSectionsOut); + + for (size_t i = 0; i < pp.geometryParams.size(); ++i) { + std::string prefix("geometry."); + prefix.append(std::to_string(i)); + prefix.append("."); + const GeometryParameters& gp = pp.geometryParams[i]; + adiak::value(prefix+"material", gp.materialName); + switch (gp.shape) + { + case GeometryParameters::BRICK: + adiak::value(prefix+"shape", "brick"); + adiak::value(prefix+"xMax", gp.xMax); + adiak::value(prefix+"xMin", gp.xMin); + adiak::value(prefix+"yMax", gp.yMax); + adiak::value(prefix+"yMin", gp.yMin); + adiak::value(prefix+"zMax", gp.zMax); + adiak::value(prefix+"zMin", gp.zMin); + break; + case GeometryParameters::SPHERE: + adiak::value(prefix+"shape", "sphere"); + adiak::value(prefix+"xCenter", gp.xCenter); + adiak::value(prefix+"yCenter", gp.yCenter); + adiak::value(prefix+"zCenter", gp.zCenter); + break; + default: + qs_assert(false); + } + } + + for (const auto& material : pp.materialParams) { + std::string prefix("material."); + prefix.append(material.first); + prefix.append("."); + adiak::value(prefix+"mass", material.second.mass); + adiak::value(prefix+"nIsotopes", material.second.nIsotopes); + adiak::value(prefix+"nReactions", material.second.nReactions); + adiak::value(prefix+"sourceRate", material.second.sourceRate); + adiak::value(prefix+"totalCrossSection", material.second.totalCrossSection); + adiak::value(prefix+"absorptionCrossSection", material.second.absorptionCrossSection); + adiak::value(prefix+"fissionCrossSection", material.second.fissionCrossSection); + adiak::value(prefix+"scatteringCrossSection", material.second.scatteringCrossSection); + adiak::value(prefix+"absorptionCrossSectionRatio", material.second.absorptionCrossSectionRatio); + adiak::value(prefix+"fissionCrossSectionRatio", material.second.fissionCrossSectionRatio); + adiak::value(prefix+"scatteringCrossSectionRatio", material.second.scatteringCrossSectionRatio); + } + + for (const auto& csp : pp.crossSectionParams) { + std::string prefix("crossection."); + prefix.append(csp.first); + prefix.append("."); + adiak::value(prefix+"A", csp.second.aa); + adiak::value(prefix+"B", csp.second.bb); + adiak::value(prefix+"C", csp.second.cc); + adiak::value(prefix+"D", csp.second.dd); + adiak::value(prefix+"E", csp.second.ee); + adiak::value(prefix+"nuBar", csp.second.nuBar); + } +#endif +} + void printParameters(const Parameters& pp, ostream& out) { int rank = -1; diff --git a/src/Parameters.hh b/src/Parameters.hh index d4ce91c4..54f9e371 100644 --- a/src/Parameters.hh +++ b/src/Parameters.hh @@ -177,6 +177,7 @@ struct Parameters Parameters getParameters(int argc, char** argv); void printParameters(const Parameters& params, std::ostream& out); +void saveParametersInAdiak(const Parameters& parms); std::ostream& operator<<(std::ostream& out, const SimulationParameters& pp); std::ostream& operator<<(std::ostream& out, const GeometryParameters& pp); diff --git a/src/main.cc b/src/main.cc index 9eceb528..b4e16f61 100644 --- a/src/main.cc +++ b/src/main.cc @@ -27,14 +27,15 @@ #include "git_vers.hh" #ifdef USE_CALIPER +#include #include #include -#ifdef HAVE_MPI -#include +#if _OPENMP +#include #endif #endif -void setupCaliper(); +void setupCaliper(const Parameters&); void gameOver(); void cycleInit( bool loadBalance ); void cycleTracking(MonteCarlo* monteCarlo); @@ -53,7 +54,7 @@ int main(int argc, char** argv) printParameters(params, cout); #ifdef USE_CALIPER - setupCaliper(); + setupCaliper(params); cali::ConfigManager calimgr(params.simulationParams.caliperConfig.c_str()); @@ -73,9 +74,8 @@ int main(int argc, char** argv) const int nSteps = params.simulationParams.nSteps; #ifdef USE_CALIPER - CALI_CXX_MARK_LOOP_BEGIN(mainloop, "qs.mainloop"); + CALI_CXX_MARK_LOOP_BEGIN(mainloop, "mainloop"); #endif - for (int ii=0; iiprocessor_info->num_processors, mcco->processor_info->comm_mc_world ); } - #ifdef USE_CALIPER CALI_CXX_MARK_LOOP_END(mainloop); #endif @@ -119,18 +118,37 @@ int main(int argc, char** argv) return 0; } -void setupCaliper() +void setupCaliper(const Parameters& params) { #ifdef USE_CALIPER -#ifdef HAVE_MPI - cali_mpi_init(); -#endif - - cali_config_preset("CALI_LOG_VERBOSITY", "0"); cali_config_preset("CALI_CALIPER_ATTRIBUTE_DEFAULT_SCOPE", "process"); - cali_set_global_string_byname("qs.git_vers", GIT_VERS); - cali_set_global_string_byname("qs.git_hash", GIT_HASH); + adiak::value("git_version", GIT_VERS); + adiak::value("git_hash", GIT_HASH); + adiak::collect_all(); + +#if _OPENMP + adiak::value("max_threads", omp_get_max_threads()); +#else + adiak::value("max_threads", 1); +#endif +#ifdef GPU_NATIVE + adiak::value("gpu_native", 1); +#else + adiak::value("gpu_native", 0); +#endif +#ifdef HAVE_CUDA + adiak::value("have_cuda", 1); +#else + adiak::value("have_cuda", 0); +#endif +#ifdef HAVE_HIP + adiak::value("have_hip", 1); +#else + adiak::value("have_hip", 0); +#endif + + saveParametersInAdiak(params); #endif } From e22b5cbb8564d5fdb6d8b8f42fbcbc2e4f9837b9 Mon Sep 17 00:00:00 2001 From: David Boehme Date: Thu, 27 Feb 2025 09:29:17 -0800 Subject: [PATCH 7/8] Add figure of merit to Adiak metadata --- src/MC_Fast_Timer.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/MC_Fast_Timer.cc b/src/MC_Fast_Timer.cc index 70aa7bf9..415860a6 100644 --- a/src/MC_Fast_Timer.cc +++ b/src/MC_Fast_Timer.cc @@ -5,6 +5,10 @@ #include "Globals.hh" #include "portability.hh" +#ifdef USE_CALIPER +#include +#endif + const char *mc_fast_timer_names[MC_Fast_Timer::Num_Timers] = { "main", @@ -101,6 +105,12 @@ void MC_Fast_Timer_Container::Cumulative_Report(int mpi_rank, int num_ranks, MPI "Figure Of Merit", (numSegments / (max_clock[cycleTracking_Index]*1e-6)), "[Num Segments / Cycle Tracking Time]" ); + +#ifdef USE_CALIPER + adiak::value("numSegments", numSegments); + adiak::value("CycleTrackingTime", max_clock[cycleTracking_Index]*1e-6); + adiak::value("FigureOfMerit", numSegments / (max_clock[cycleTracking_Index]*1e-6)); +#endif } } From 7185821b4686333073d9a5338850a2281c517bd2 Mon Sep 17 00:00:00 2001 From: David Boehme Date: Thu, 27 Feb 2025 14:41:33 -0800 Subject: [PATCH 8/8] Update Caliper example in Makefile --- src/Makefile | 39 ++++++--------------------------------- 1 file changed, 6 insertions(+), 33 deletions(-) diff --git a/src/Makefile b/src/Makefile index a570bf2a..4c362938 100644 --- a/src/Makefile +++ b/src/Makefile @@ -88,7 +88,8 @@ # is a performance profiling / analysis library for # tracing, sampling, HW counter measurements, and much more. # When enabled, Quicksilver will link in the Caliper -# library and export its timed regions to Caliper. See +# library and Adiak libraries and export its timed regions +# to Caliper and store run metadata in Adiak. See # https://github.com/LLNL/Caliper for more information. # # -DUSE_NVTX Define this for some extra NVProf profiling information. @@ -163,11 +164,11 @@ LDFLAGS = -fgpu-rdc --hip-link --offload-arch=gfx90a ############################################################################### ### GCC -- with MPI and OpenMP and Caliper support ############################################################################### -#CALIPER_DIR = $(spack location --install-dir caliper) -# CALIPER_DIR = ${HOME}/local/caliper/toss3-release +# ADIAK_DIR = $(spack location --install-dir adiak) +# CALIPER_DIR = $(spack location --install-dir caliper) -# CALIPER_FLAGS = -I${CALIPER_DIR}/include -DUSE_CALIPER -# CALIPER_LDFLAGS = -Wl,-rpath ${CALIPER_DIR}/lib64 -L${CALIPER_DIR}/lib64 -lcaliper-mpi -lcaliper +# CALIPER_FLAGS = -I${CALIPER_DIR}/include -I${ADIAK_DIR}/include -DUSE_CALIPER +# CALIPER_LDFLAGS = -Wl,-rpath ${CALIPER_DIR}/lib64 -Wl,-rpath ${ADIAK_DIR}/lib -L${CALIPER_DIR}/lib64 -L${ADIAK_DIR}/lib -lcaliper -ladiak # OPENMP_FLAGS = -DHAVE_OPENMP -fopenmp # OPENMP_LDFLAGS = -fopenmp @@ -179,34 +180,6 @@ LDFLAGS = -fgpu-rdc --hip-link --offload-arch=gfx90a # CPPFLAGS = $(MPI_FLAGS) $(OPENMP_FLAGS) $(CALIPER_FLAGS) # LDFLAGS = $(OPENMP_LDFLAGS) $(CALIPER_LDFLAGS) -############################################################################### -# Cuda on LLNL Lassen w/ Caliper -############################################################################### -## Choose one Cuda path -# CUDA_PATH = /usr/local/cuda - -CALIPER_DIR=${HOME}/local/caliper/lassen-cuda10 -CUDA_PATH = /usr/tce/packages/cuda/cuda-10.1.168 - -HOST_COMPILER = mpicxx - -CALIPER_FLAGS = -I${CALIPER_DIR}/include -DUSE_CALIPER -CALIPER_LDFLAGS = -L${CALIPER_DIR}/lib64 -lcaliper - -OPTFLAGS = -O2 -g -## Version below for debugging -##OPTFLAGS = -DUSE_NVTX -g -G -lineinfo -O0 - -CUDA_FLAGS = -I${CUDA_PATH}/include/ -#CUDA_LDFLAGS = -L${CUDA_PATH}/lib64/ -lcuda -lcudart -# -CXX=$(CUDA_PATH)/bin/nvcc -CXXFLAGS = -DHAVE_CUDA -std=c++11 $(OPTFLAGS) -Xptxas -v -CXXFLAGS += -gencode=arch=compute_60,code=\"sm_60,compute_60\" -CXXFLAGS += --compiler-bindir=$(HOST_COMPILER) -CPPFLAGS = -x cu -dc -DHAVE_MPI -DHAVE_ASYNC_MPI $(CALIPER_FLAGS) -LDFLAGS = $(CUDA_LDFLAGS) $(CALIPER_LDFLAGS) -##LDFLAGS += ${CUDA_PATH}/lib64/libnvToolsExt.so ############################################################################### # LLNL LC BG/Q Comilers #