Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dump walkers periodically for post-processing #4940

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 37 additions & 14 deletions src/Particle/HDFWalkerOutput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ HDFWalkerOutput::~HDFWalkerOutput() = default;
* - walker_partition (int array)
* - walkers (nw,np,3)
*/
bool HDFWalkerOutput::dump(const WalkerConfigurations& W, int nblock)
bool HDFWalkerOutput::dump(const WalkerConfigurations& W, int nblock, const bool identify_block)
{
std::filesystem::path FileName = myComm->getName();
FileName.concat(hdf::config_ext);
Expand All @@ -93,22 +93,41 @@ bool HDFWalkerOutput::dump(const WalkerConfigurations& W, int nblock)

//try to use collective
hdf_archive dump_file(myComm, true);
dump_file.create(FileName);
HDFVersion cur_version;
dump_file.write(cur_version.version, hdf::version);
bool exists = dump_file.open(FileName);
if (!exists) // create new config.h5
{
dump_file.create(FileName);
HDFVersion cur_version;
dump_file.write(cur_version.version, hdf::version);
}
dump_file.push(hdf::main_state);
dump_file.write(nblock, "block");

write_configuration(W, dump_file, nblock);
write_configuration(W, dump_file, nblock, identify_block);
dump_file.close();

currentConfigNumber++;
prevFile = FileName;
return true;
}

void HDFWalkerOutput::write_configuration(const WalkerConfigurations& W, hdf_archive& hout, int nblock)
void HDFWalkerOutput::write_configuration(const WalkerConfigurations& W, hdf_archive& hout, int nblock, const bool identify_block)
{
std::string partition_name = "walker_partition";
std::string dataset_name = hdf::walkers;
std::string weights_name = hdf::walker_weights;
if (identify_block)
{ // change h5 slab name to record more than one block
std::stringstream block_str;
block_str << nblock;
partition_name += block_str.str();
dataset_name += block_str.str();
weights_name += block_str.str();
} else { // remove previous checkpoint
std::vector<std::string> names = {"block", hdf::num_walkers, partition_name, dataset_name, weights_name};
for (auto aname : names)
if (hout.is_dataset(aname)) hout.unlink(aname);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you mentioned the data set name used for recording instead of checkpointing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dataset name for recording is walkers{block_idx}, whereas the name for checkpoint is simply walkers.


const int wb = OHMMS_DIM * number_of_particles_;
if (nblock > block)
{
Expand All @@ -120,7 +139,11 @@ void HDFWalkerOutput::write_configuration(const WalkerConfigurations& W, hdf_arc

auto& walker_offsets = W.getWalkerOffsets();
number_of_walkers_ = walker_offsets[myComm->size()];
hout.write(number_of_walkers_, hdf::num_walkers);
if (!identify_block)
{
hout.write(nblock, "block");
hout.write(number_of_walkers_, hdf::num_walkers);
}

if (hout.is_parallel())
{
Expand All @@ -142,26 +165,26 @@ void HDFWalkerOutput::write_configuration(const WalkerConfigurations& W, hdf_arc
myWalkerOffset.push_back(walker_offsets[myComm->rank()]);
}
hyperslab_proxy<std::vector<int>, 1> slab(myWalkerOffset, gcounts, counts, offsets);
hout.write(slab, "walker_partition");
hout.write(slab, partition_name);
}
{ // write walker configuration
std::array<size_t, 3> gcounts{number_of_walkers_, number_of_particles_, OHMMS_DIM};
std::array<size_t, 3> counts{W.getActiveWalkers(), number_of_particles_, OHMMS_DIM};
std::array<size_t, 3> offsets{static_cast<size_t>(walker_offsets[myComm->rank()]), 0, 0};
hyperslab_proxy<BufferType, 3> slab(RemoteData[0], gcounts, counts, offsets);
hout.write(slab, hdf::walkers);
hout.write(slab, dataset_name);
}
{
std::array<size_t, 1> gcounts{number_of_walkers_};
std::array<size_t, 1> counts{W.getActiveWalkers()};
std::array<size_t, 1> offsets{static_cast<size_t>(walker_offsets[myComm->rank()])};
hyperslab_proxy<std::vector<QMCTraits::FullPrecRealType>, 1> slab(RemoteDataW[0], gcounts, counts, offsets);
hout.write(slab, hdf::walker_weights);
hout.write(slab, weights_name);
}
}
else
{ //gaterv to the master and master writes it, could use isend/irecv
hout.write(walker_offsets, "walker_partition");
hout.write(walker_offsets, partition_name);
if (myComm->size() > 1)
{
std::vector<int> displ(myComm->size()), counts(myComm->size());
Expand All @@ -186,11 +209,11 @@ void HDFWalkerOutput::write_configuration(const WalkerConfigurations& W, hdf_arc
int buffer_id = (myComm->size() > 1) ? 1 : 0;
{
std::array<size_t, 3> gcounts{number_of_walkers_, number_of_particles_, OHMMS_DIM};
hout.writeSlabReshaped(RemoteData[buffer_id], gcounts, hdf::walkers);
hout.writeSlabReshaped(RemoteData[buffer_id], gcounts, dataset_name);
}
{
std::array<size_t, 1> gcounts{number_of_walkers_};
hout.writeSlabReshaped(RemoteDataW[buffer_id], gcounts, hdf::walker_weights);
hout.writeSlabReshaped(RemoteDataW[buffer_id], gcounts, weights_name);
}
}
}
Expand Down
11 changes: 8 additions & 3 deletions src/Particle/HDFWalkerOutput.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,14 @@ class HDFWalkerOutput
~HDFWalkerOutput();

/** dump configurations
* @param w walkers
* Write walkers into hdf file.
* The "walkers" dataset typically resides at "state_0/walkers", which
* contains no information about when it was written (at which block).
* The identify_block flag appends the block index to uniquely identify
* each walker dump, e.g., "state_0/walkers10" is from block 10.
*
*/
bool dump(const WalkerConfigurations& w, int block);
bool dump(const WalkerConfigurations& w, int block, const bool identify_block=false);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please document what identify_block=true do.

// bool dump(ForwardWalkingHistoryObject& FWO);

private:
Expand All @@ -62,7 +67,7 @@ class HDFWalkerOutput
std::array<BufferType, 2> RemoteData;
std::array<std::vector<QMCTraits::FullPrecRealType>, 2> RemoteDataW;
int block;
void write_configuration(const WalkerConfigurations& W, hdf_archive& hout, int block);
void write_configuration(const WalkerConfigurations& W, hdf_archive& hout, int block, const bool identify_block);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please document what identify_block=true do.

};

} // namespace qmcplusplus
Expand Down
5 changes: 5 additions & 0 deletions src/QMCDrivers/QMCDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,11 @@ void QMCDriver::recordBlock(int block)
branchEngine->write(RootName, true); //save energy_history
RandomNumberControl::write(RootName, myComm);
}
if (Period4ConfigDump!=0 && block%Period4ConfigDump == 0)
{ // append current walkers to config.h5
const bool identify_block = true;
wOut->dump(W, block, identify_block);
}
}

bool QMCDriver::finalize(int block, bool dumpwalkers)
Expand Down
Loading