diff --git a/examples/8c_benchmark_ptl_parallel.cpp b/examples/8c_benchmark_ptl_parallel.cpp index 91eb0c3382..20aa9c6303 100644 --- a/examples/8c_benchmark_ptl_parallel.cpp +++ b/examples/8c_benchmark_ptl_parallel.cpp @@ -44,16 +44,18 @@ using std::cout; using namespace openPMD; -using MaxResSteadyClock = std::conditional_t; +using MaxResSteadyClock = std::conditional_t< + std::chrono::high_resolution_clock::is_steady, + std::chrono::high_resolution_clock, + std::chrono::steady_clock>; /** The Memory profiler class for profiling purpose * * Simple Memory usage report that works on linux system */ -static std::chrono::time_point< MaxResSteadyClock > m_ProgStart = MaxResSteadyClock::now(); +static std::chrono::time_point m_ProgStart = + MaxResSteadyClock::now(); class MemoryProfiler { @@ -117,7 +119,6 @@ class MemoryProfiler std::string m_Name; }; - /** The Timer class for profiling purpose * * Simple Timer that measures time consumption btw constucture and destructor @@ -126,7 +127,12 @@ class MemoryProfiler class Timer { public: - enum VERBOSE_LEVEL {NONE, MIN, FULL}; + enum VERBOSE_LEVEL + { + NONE, + MIN, + FULL + }; /** * * Simple Timer @@ -136,108 +142,119 @@ class Timer */ Timer(const std::string &tag, int rank, VERBOSE_LEVEL vl = FULL) - //Timer(const std::string &tag, int rank, bool silent = false) + // Timer(const std::string &tag, int rank, bool silent = false) { m_Tag = tag; m_Rank = rank; - m_Start = MaxResSteadyClock::now(); - - /* - m_Silent = silent; - if (!m_Silent) - MemoryProfiler(rank, tag); - */ - m_Silent = vl; - if (m_Silent == FULL) - MemoryProfiler(rank, tag); + m_Start = MaxResSteadyClock::now(); + + /* + m_Silent = silent; + if (!m_Silent) + MemoryProfiler(rank, tag); + */ + m_Silent = vl; + if (m_Silent == FULL) + MemoryProfiler(rank, tag); } double getDuration() { auto curr = MaxResSteadyClock::now(); - double secs = std::chrono::duration_cast > (curr - m_Start).count(); - return secs; + double secs = + std::chrono::duration_cast >( + curr - m_Start) + .count(); + return secs; } ~Timer() { - if (m_Silent == NONE) + if (m_Silent == NONE) return; - if (m_Silent == FULL) - { - std::string tt = "~" + m_Tag; - MemoryProfiler mp(m_Rank, tt.c_str()); - } + if (m_Silent == FULL) + { + std::string tt = "~" + m_Tag; + MemoryProfiler mp(m_Rank, tt.c_str()); + } - double secs = getDuration(); + double secs = getDuration(); if (m_Rank > 0) return; std::cout << " [" << m_Tag << "] took:" << secs << " seconds"; std::cout << " Time Elapsed:" - << secs + std::chrono::duration_cast< std::chrono::duration > (m_Start - m_ProgStart).count() + << secs + + std::chrono::duration_cast >( + m_Start - m_ProgStart) + .count() << std::endl; std::cout << std::endl; } private: - std::chrono::time_point< MaxResSteadyClock > m_Start; + std::chrono::time_point m_Start; std::string m_Tag; int m_Rank = 0; - //bool m_Silent = false; + // bool m_Silent = false; VERBOSE_LEVEL m_Silent = Timer::NONE; }; class LocalProfiler { public: - LocalProfiler() = default; - ~LocalProfiler () = default; + LocalProfiler() = default; + ~LocalProfiler() = default; - void setRank(int r) {m_Rank = r;} - void update(Timer& timer) { m_Counter ++; m_Total += timer.getDuration(); } + void setRank(int r) + { + m_Rank = r; + } + void update(Timer &timer) + { + m_Counter++; + m_Total += timer.getDuration(); + } - int m_Rank = 0; // info only - int m_Counter=0; - double m_Total = 0; + int m_Rank = 0; // info only + int m_Counter = 0; + double m_Total = 0; }; -static std::map m_GlobalProfilers; +static std::map m_GlobalProfilers; class Checkpoint { public: - Checkpoint(std::string const& name, int rank) - :m_name(name) - { - auto fp = m_GlobalProfilers.find(name); - if ( fp == m_GlobalProfilers.end()) { - LocalProfiler p; - p.setRank(rank); - m_GlobalProfilers[name] = p; - } - m_Timer = new Timer(name, rank, Timer::NONE); - } - ~Checkpoint() - { - m_GlobalProfilers[m_name].update(*m_Timer); - - if (m_Timer != nullptr) - delete m_Timer; - } + Checkpoint(std::string const &name, int rank) : m_name(name) + { + auto fp = m_GlobalProfilers.find(name); + if (fp == m_GlobalProfilers.end()) + { + LocalProfiler p; + p.setRank(rank); + m_GlobalProfilers[name] = p; + } + m_Timer = new Timer(name, rank, Timer::NONE); + } + ~Checkpoint() + { + m_GlobalProfilers[m_name].update(*m_Timer); + + if (m_Timer != nullptr) + delete m_Timer; + } private: - Timer* m_Timer = nullptr; - std::string m_name; + Timer *m_Timer = nullptr; + std::string m_name; }; - - /** createDataCPU * generate a shared ptr of given size with given type & default value on * CPU @@ -295,9 +312,9 @@ template std::shared_ptr createData(const unsigned long &size, const T &val, const T &increment) { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - Checkpoint c(" CreateData", rank); + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + Checkpoint c(" CreateData", rank); #if openPMD_HAVE_CUDA_EXAMPLES return createDataGPU(size, val, increment); @@ -317,15 +334,16 @@ std::vector getBackends(bool bpOnly) res.emplace_back(".bp"); #endif - if (bpOnly) { - if (res.size() == 0) - std::cerr<<" BP is not supported "< 0) { - if ( (vec[1][0] == 't') or (vec[1][0] == 'T') ) + if (vec[1].size() > 0) + { + if ((vec[1][0] == 't') or (vec[1][0] == 'T')) input.m_UseJoinedDim = true; } return; @@ -438,24 +457,27 @@ void parse(TestInput &input, std::string line) if (vec[0].compare("usePDW") == 0) { - if (vec[1].size() > 0) { - if ( (vec[1][0] == 't') or (vec[1][0] == 'T') ) - input.m_CallPDW = true; + if (vec[1].size() > 0) + { + if ((vec[1][0] == 't') or (vec[1][0] == 'T')) + input.m_CallPDW = true; } return; } if (vec[0].compare("maxMil") == 0) { - input.m_PtlMax = ( (unsigned long) atoi(vec[1].c_str()) ) * (unsigned long) 1000000; + input.m_PtlMax = + ((unsigned long)atoi(vec[1].c_str())) * (unsigned long)1000000; return; } if (vec[0].compare("minMil") == 0) { - input.m_PtlMin = ( (unsigned long) atoi(vec[1].c_str()) ) * (unsigned long) 1000000; - if (input.m_PtlMin > input.m_PtlMax) - input.m_PtlMin = input.m_PtlMax; + input.m_PtlMin = + ((unsigned long)atoi(vec[1].c_str())) * (unsigned long)1000000; + if (input.m_PtlMin > input.m_PtlMax) + input.m_PtlMin = input.m_PtlMax; return; } @@ -494,46 +516,45 @@ int parseArgs(int argc, char *argv[], TestInput &input) infile.close(); return 1; } - std::cout<<" No input file. Using defaults. Otherwise, try: "< "< " << std::endl; return 1; } - /** TEST doWork * * run the actual test scenarios using the input */ -void doWork(TestInput & input) +void doWork(TestInput &input) { Checkpoint g("Total: ", input.m_MPIRank); auto const backends = getBackends(input.m_UseJoinedDim); try { - if ( 0 < input.m_Backend.size() ) - { - BasicParticlePattern p(input); - p.printMe(); - p.run(); - } - else - { - for (auto const &which : backends) - { - input.m_Backend = which; + if (0 < input.m_Backend.size()) + { BasicParticlePattern p(input); p.printMe(); p.run(); - } - } + } + else + { + for (auto const &which : backends) + { + input.m_Backend = which; + BasicParticlePattern p(input); + p.printMe(); + p.run(); + } + } } catch (std::exception const &ex) { if (0 == input.m_MPIRank) std::cout << "Error: " << ex.what() << std::endl; } - } /** TEST MAIN * @@ -553,32 +574,41 @@ int main(int argc, char *argv[]) return -1; } - doWork(input); { - MPI_Barrier(MPI_COMM_WORLD); - if ( 0 == input.m_MPIRank ) { - std::cout<<" ============= GLOBAL PROFILER SUMMARY =========="< result(input.m_MPISize, 0); - //unsigned long buffer[m_Input.m_MPISize]; - MPI_Allgather (&p.m_Total, 1, MPI_DOUBLE, result.data(), 1, MPI_DOUBLE, MPI_COMM_WORLD); - - auto [min, max] = std::minmax_element(result.begin(),result.end()); + MPI_Barrier(MPI_COMM_WORLD); + if (0 == input.m_MPIRank) + { + std::cout << " ============= GLOBAL PROFILER SUMMARY ==========" + << std::endl; + std::cout << "NAME: \t\t NumCalls: \t Min(sec): \t Max (secs): \n"; + } - if ( 0 == input.m_MPIRank ) - std::cout << name << "\t\t "< result(input.m_MPISize, 0); + // unsigned long buffer[m_Input.m_MPISize]; + MPI_Allgather( + &p.m_Total, + 1, + MPI_DOUBLE, + result.data(), + 1, + MPI_DOUBLE, + MPI_COMM_WORLD); + + auto [min, max] = std::minmax_element(result.begin(), result.end()); + + if (0 == input.m_MPIRank) + std::cout << name << "\t\t " << p.m_Counter << "\t" << *min + << " \t " << *max << " \t :peek " << result[0] << " " + << result[input.m_MPISize - 1] << std::endl; + } } MPI_Finalize(); - return 0; } @@ -586,7 +616,8 @@ int main(int argc, char *argv[]) * Class BasicParticlePattern * @param input: (user input class) */ -BasicParticlePattern::BasicParticlePattern(const TestInput &input) : m_Input(input) +BasicParticlePattern::BasicParticlePattern(const TestInput &input) + : m_Input(input) {} /* @@ -599,8 +630,8 @@ void BasicParticlePattern::run() if (m_Input.m_Encoding == openPMD::IterationEncoding::fileBased) { // file based std::ostringstream s; - s << m_Input.m_Prefix << "/" <(), ProperExtent(np, true)); - auto const realDataSet = - openPMD::Dataset(openPMD::determineDatatype(), ProperExtent(np, true)); + auto const intDataSet = openPMD::Dataset( + openPMD::determineDatatype(), ProperExtent(np, true)); + auto const realDataSet = openPMD::Dataset( + openPMD::determineDatatype(), ProperExtent(np, true)); currSpecies["id"].resetDataset(intDataSet); currSpecies["charge"].resetDataset(realDataSet); currSpecies["position"]["x"].resetDataset(realDataSet); - //currSpecies["positionOffset"]["x"].resetDataset(realDataSet); - //currSpecies["positionOffset"]["x"].makeConstant(0.); - + // currSpecies["positionOffset"]["x"].resetDataset(realDataSet); + // currSpecies["positionOffset"]["x"].makeConstant(0.); { - Checkpoint remove2(" SP_Barrier_2", m_Input.m_MPIRank); - MPI_Barrier(MPI_COMM_WORLD); + Checkpoint remove2(" SP_Barrier_2", m_Input.m_MPIRank); + MPI_Barrier(MPI_COMM_WORLD); } - Checkpoint remove3(" SP_cs", m_Input.m_MPIRank); if (count > 0) - { - auto ids = createData(count, offset, 1); - currSpecies["id"].storeChunk(ids, ProperExtent(offset, false), {count}); + { + auto ids = createData(count, offset, 1); + currSpecies["id"].storeChunk(ids, ProperExtent(offset, false), {count}); - auto charges = createData(count, 0.1 * step, 0.0001); - currSpecies["charge"].storeChunk(charges, ProperExtent(offset, false), {count}); + auto charges = createData(count, 0.1 * step, 0.0001); + currSpecies["charge"].storeChunk( + charges, ProperExtent(offset, false), {count}); - auto mx = createData(count, 1.0 * step, 0.0002); - currSpecies["position"]["x"].storeChunk(mx, ProperExtent(offset, false), {count}); - } + auto mx = createData(count, 1.0 * step, 0.0002); + currSpecies["position"]["x"].storeChunk( + mx, ProperExtent(offset, false), {count}); + } } // storeParticles /* @@ -738,85 +769,98 @@ void BasicParticlePattern::storeParticles(ParticleSpecies &currSpecies, int &ste * set to be a multiple of mesh size * */ -void BasicParticlePattern::getParticleLayout(unsigned long& offset, unsigned long &count, unsigned long &total) +void BasicParticlePattern::getParticleLayout( + unsigned long &offset, unsigned long &count, unsigned long &total) { - { - Checkpoint x1(" ComputeLayout", m_Input.m_MPIRank); - if (m_Input.m_PtlMin >= m_Input.m_PtlMax) - { - count = m_Input.m_PtlMax; - } - else { - std::random_device rd; // a seed source for the random number engine - std::mt19937 gen(rd()); // mersenne_twister_engine seeded with rd() - std::uniform_int_distribution<> distrib(m_Input.m_PtlMin, m_Input.m_PtlMax); - - //for (int n = 0; n != 10; ++n) - // std::cout << distrib(gen) << ' '; - count = distrib(gen); - } - - // gather from all ranks to get offset/total - - if (m_Input.m_UseJoinedDim) - return; - } - //Timer g("Gather Particle logistics ", m_Input.m_MPIRank); - Checkpoint x(" GetPTLOffset", m_Input.m_MPIRank); + Checkpoint x1(" ComputeLayout", m_Input.m_MPIRank); + if (m_Input.m_PtlMin >= m_Input.m_PtlMax) + { + count = m_Input.m_PtlMax; + } + else + { + std::random_device rd; // a seed source for the random number engine + std::mt19937 gen(rd()); // mersenne_twister_engine seeded with rd() + std::uniform_int_distribution<> distrib( + m_Input.m_PtlMin, m_Input.m_PtlMax); + + // for (int n = 0; n != 10; ++n) + // std::cout << distrib(gen) << ' '; + count = distrib(gen); + } - std::vector result(m_Input.m_MPISize, 0); - //unsigned long buffer[m_Input.m_MPISize]; - MPI_Allgather (&count, 1, MPI_UNSIGNED_LONG, result.data(), 1, MPI_UNSIGNED_LONG, MPI_COMM_WORLD); + // gather from all ranks to get offset/total - total = 0; - auto const num_results = static_cast(result.size()); - for (int i=0; i result(m_Input.m_MPISize, 0); + // unsigned long buffer[m_Input.m_MPISize]; + MPI_Allgather( + &count, + 1, + MPI_UNSIGNED_LONG, + result.data(), + 1, + MPI_UNSIGNED_LONG, + MPI_COMM_WORLD); + + total = 0; + auto const num_results = static_cast(result.size()); + for (int i = 0; i < num_results; i++) { - total += result[i]; - if (i < m_Input.m_MPIRank) { - offset += result[i]; - } + total += result[i]; + if (i < m_Input.m_MPIRank) + { + offset += result[i]; + } } - } const std::string BasicParticlePattern::getBaseFileName() const { - if (m_Input.m_UseJoinedDim) - return "8a_parallel_ptl_joined"; - return "8a_parallel_ptl"; + if (m_Input.m_UseJoinedDim) + return "8a_parallel_ptl_joined"; + return "8a_parallel_ptl"; } - -openPMD::Extent BasicParticlePattern::ProperExtent (unsigned long long n, bool init) const +openPMD::Extent +BasicParticlePattern::ProperExtent(unsigned long long n, bool init) const { - if (!m_Input.m_UseJoinedDim) - return {n}; + if (!m_Input.m_UseJoinedDim) + return {n}; - if (init) - return {openPMD::Dataset::JOINED_DIMENSION}; - else - return {}; + if (init) + return {openPMD::Dataset::JOINED_DIMENSION}; + else + return {}; } /* * Print pattern layout */ void BasicParticlePattern::printMe() { - if ( 0 < m_Input.m_MPIRank ) - return; - - std::string pdw_status=" just EndStep"; - if (m_Input.m_CallPDW) - pdw_status=" PDW + EndStep"; + if (0 < m_Input.m_MPIRank) + return; - if (m_Input.m_UseJoinedDim) - std::cout << " ====> This is a Particle Only test, With Joined Dimension applied to ADIOS."< This is a Particle Only test. " < This is a Particle Only test, With Joined " + "Dimension applied to ADIOS." + << pdw_status << std::endl; + else + std::cout << " ====> This is a Particle Only test. " << pdw_status + << std::endl; - std::cout << "\t Num steps: "<