Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dump walkers periodically for post-processing #4940

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 37 additions & 14 deletions src/Particle/HDFWalkerOutput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ HDFWalkerOutput::~HDFWalkerOutput() = default;
* - walker_partition (int array)
* - walkers (nw,np,3)
*/
bool HDFWalkerOutput::dump(const WalkerConfigurations& W, int nblock)
bool HDFWalkerOutput::dump(const WalkerConfigurations& W, int nblock, const bool identify_block)
{
std::filesystem::path FileName = myComm->getName();
FileName.concat(hdf::config_ext);
Expand All @@ -93,22 +93,41 @@ bool HDFWalkerOutput::dump(const WalkerConfigurations& W, int nblock)

//try to use collective
hdf_archive dump_file(myComm, true);
dump_file.create(FileName);
HDFVersion cur_version;
dump_file.write(cur_version.version, hdf::version);
bool exists = dump_file.open(FileName);
if (!exists) // create new config.h5
{
dump_file.create(FileName);
HDFVersion cur_version;
dump_file.write(cur_version.version, hdf::version);
}
dump_file.push(hdf::main_state);
dump_file.write(nblock, "block");

write_configuration(W, dump_file, nblock);
write_configuration(W, dump_file, nblock, identify_block);
dump_file.close();

currentConfigNumber++;
prevFile = FileName;
return true;
}

void HDFWalkerOutput::write_configuration(const WalkerConfigurations& W, hdf_archive& hout, int nblock)
void HDFWalkerOutput::write_configuration(const WalkerConfigurations& W, hdf_archive& hout, int nblock, const bool identify_block)
{
std::string partition_name = "walker_partition";
std::string dataset_name = hdf::walkers;
std::string weights_name = hdf::walker_weights;
if (identify_block)
{ // change h5 slab name to record more than one block
std::stringstream block_str;
block_str << nblock;
partition_name += block_str.str();
dataset_name += block_str.str();
weights_name += block_str.str();
} else { // remove previous checkpoint
std::vector<std::string> names = {"block", hdf::num_walkers, partition_name, dataset_name, weights_name};
for (auto aname : names)
if (hout.is_dataset(aname)) hout.unlink(aname);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you mentioned the data set name used for recording instead of checkpointing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dataset name for recording is walkers{block_idx}, whereas the name for checkpoint is simply walkers.


const int wb = OHMMS_DIM * number_of_particles_;
if (nblock > block)
{
Expand All @@ -120,7 +139,11 @@ void HDFWalkerOutput::write_configuration(const WalkerConfigurations& W, hdf_arc

auto& walker_offsets = W.getWalkerOffsets();
number_of_walkers_ = walker_offsets[myComm->size()];
hout.write(number_of_walkers_, hdf::num_walkers);
if (!identify_block)
{
hout.write(nblock, "block");
hout.write(number_of_walkers_, hdf::num_walkers);
}

if (hout.is_parallel())
{
Expand All @@ -142,26 +165,26 @@ void HDFWalkerOutput::write_configuration(const WalkerConfigurations& W, hdf_arc
myWalkerOffset.push_back(walker_offsets[myComm->rank()]);
}
hyperslab_proxy<std::vector<int>, 1> slab(myWalkerOffset, gcounts, counts, offsets);
hout.write(slab, "walker_partition");
hout.write(slab, partition_name);
}
{ // write walker configuration
std::array<size_t, 3> gcounts{number_of_walkers_, number_of_particles_, OHMMS_DIM};
std::array<size_t, 3> counts{W.getActiveWalkers(), number_of_particles_, OHMMS_DIM};
std::array<size_t, 3> offsets{static_cast<size_t>(walker_offsets[myComm->rank()]), 0, 0};
hyperslab_proxy<BufferType, 3> slab(RemoteData[0], gcounts, counts, offsets);
hout.write(slab, hdf::walkers);
hout.write(slab, dataset_name);
}
{
std::array<size_t, 1> gcounts{number_of_walkers_};
std::array<size_t, 1> counts{W.getActiveWalkers()};
std::array<size_t, 1> offsets{static_cast<size_t>(walker_offsets[myComm->rank()])};
hyperslab_proxy<std::vector<QMCTraits::FullPrecRealType>, 1> slab(RemoteDataW[0], gcounts, counts, offsets);
hout.write(slab, hdf::walker_weights);
hout.write(slab, weights_name);
}
}
else
{ //gaterv to the master and master writes it, could use isend/irecv
hout.write(walker_offsets, "walker_partition");
hout.write(walker_offsets, partition_name);
if (myComm->size() > 1)
{
std::vector<int> displ(myComm->size()), counts(myComm->size());
Expand All @@ -186,11 +209,11 @@ void HDFWalkerOutput::write_configuration(const WalkerConfigurations& W, hdf_arc
int buffer_id = (myComm->size() > 1) ? 1 : 0;
{
std::array<size_t, 3> gcounts{number_of_walkers_, number_of_particles_, OHMMS_DIM};
hout.writeSlabReshaped(RemoteData[buffer_id], gcounts, hdf::walkers);
hout.writeSlabReshaped(RemoteData[buffer_id], gcounts, dataset_name);
}
{
std::array<size_t, 1> gcounts{number_of_walkers_};
hout.writeSlabReshaped(RemoteDataW[buffer_id], gcounts, hdf::walker_weights);
hout.writeSlabReshaped(RemoteDataW[buffer_id], gcounts, weights_name);
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/Particle/HDFWalkerOutput.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class HDFWalkerOutput
/** dump configurations
* @param w walkers
*/
bool dump(const WalkerConfigurations& w, int block);
bool dump(const WalkerConfigurations& w, int block, const bool identify_block=false);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please document what identify_block=true do.

// bool dump(ForwardWalkingHistoryObject& FWO);

private:
Expand All @@ -62,7 +62,7 @@ class HDFWalkerOutput
std::array<BufferType, 2> RemoteData;
std::array<std::vector<QMCTraits::FullPrecRealType>, 2> RemoteDataW;
int block;
void write_configuration(const WalkerConfigurations& W, hdf_archive& hout, int block);
void write_configuration(const WalkerConfigurations& W, hdf_archive& hout, int block, const bool identify_block);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please document what identify_block=true do.

};

} // namespace qmcplusplus
Expand Down
5 changes: 5 additions & 0 deletions src/QMCDrivers/QMCDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,11 @@
branchEngine->write(RootName, true); //save energy_history
RandomNumberControl::write(RootName, myComm);
}
if (Period4ConfigDump!=0 && block%Period4ConfigDump == 0)
{ // append current walkers to config.h5
const bool identify_block = true;
wOut->dump(W, block, identify_block);

Check warning on line 312 in src/QMCDrivers/QMCDriver.cpp

View check run for this annotation

Codecov / codecov/patch

src/QMCDrivers/QMCDriver.cpp#L311-L312

Added lines #L311 - L312 were not covered by tests
}
}

bool QMCDriver::finalize(int block, bool dumpwalkers)
Expand Down
Loading