Skip to content

Commit

Permalink
Maximum reduction for combining tape statistics.
Browse files Browse the repository at this point in the history
Alternative to the sum reduction, the previous default.
Used to correctly display static ressource shared among tapes in tape
statistics.

Merge pull request #53 from 'feature/localReductionsForTapeValues'
Reviewed-by: Max Sagebaum <[email protected]>
  • Loading branch information
jblueh committed Mar 5, 2024
2 parents 7534499 + d1d16c7 commit 8408470
Show file tree
Hide file tree
Showing 15 changed files with 189 additions and 50 deletions.
3 changes: 2 additions & 1 deletion documentation/developer/simpleTape.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ struct SimpleTape : public codi::ReverseTapeInterface<double, double, int> {

values.addSection("Adjoint vector");
values.addLongEntry("Number of adjoints", (1 + maxIdentifier));
values.addDoubleEntry("Memory allocated", sizeof(double) * (1 + maxIdentifier), true, true);
values.addDoubleEntry("Memory allocated", sizeof(double) * (1 + maxIdentifier),
codi::TapeValues::LocalReductionOperation::Sum, true, true);

values.addSection("Index manager");
values.addLongEntry("Max. live indices", (1 + maxIdentifier));
Expand Down
9 changes: 6 additions & 3 deletions include/codi/expressions/real/binaryOperators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,8 @@ namespace codi {

/// \copydoc codi::BinaryOperation::gradientB()
template<typename ArgA, typename ArgB>
static CODI_INLINE RealTraits::PassiveReal<Real> gradientB(ArgA const& argA, ArgB const& argB, Real const& result) {
static CODI_INLINE RealTraits::PassiveReal<Real> gradientB(ArgA const& argA, ArgB const& argB,
Real const& result) {
CODI_UNUSED(argA, argB, result);

return 0.0;
Expand Down Expand Up @@ -525,7 +526,8 @@ namespace codi {

/// \copydoc codi::BinaryOperation::gradientB()
template<typename ArgA, typename ArgB>
static CODI_INLINE RealTraits::PassiveReal<Real> gradientB(ArgA const& argA, ArgB const& argB, Real const& result) {
static CODI_INLINE RealTraits::PassiveReal<Real> gradientB(ArgA const& argA, ArgB const& argB,
Real const& result) {
CODI_UNUSED(argA, argB, result);

return 0.0;
Expand Down Expand Up @@ -743,7 +745,8 @@ namespace codi {

/// \copydoc codi::BinaryOperation::gradientA()
template<typename ArgA, typename ArgB>
static CODI_INLINE RealTraits::PassiveReal<Real> gradientA(ArgA const& argA, ArgB const& argB, Real const& result) {
static CODI_INLINE RealTraits::PassiveReal<Real> gradientA(ArgA const& argA, ArgB const& argB,
Real const& result) {
CODI_UNUSED(argA, argB, result);

return 1.0;
Expand Down
4 changes: 2 additions & 2 deletions include/codi/tapes/data/blockData.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ namespace codi {
double memoryAlloc = (double)allocedSize * (double)entrySize;

values.addUnsignedLongEntry("Total number", dataEntries);
values.addDoubleEntry("Memory used", memoryUsed, true, false);
values.addDoubleEntry("Memory allocated", memoryAlloc, false, true);
values.addDoubleEntry("Memory used", memoryUsed, TapeValues::LocalReductionOperation::Sum, true, false);
values.addDoubleEntry("Memory allocated", memoryAlloc, TapeValues::LocalReductionOperation::Sum, false, true);
}

/// \copydoc DataInterface::extractPosition
Expand Down
4 changes: 2 additions & 2 deletions include/codi/tapes/data/chunkedData.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,8 @@ namespace codi {

values.addUnsignedLongEntry("Total number", dataEntries);
values.addUnsignedLongEntry("Number of chunks", numberOfChunks);
values.addDoubleEntry("Memory used", memoryUsed, true, false);
values.addDoubleEntry("Memory allocated", memoryAlloc, false, true);
values.addDoubleEntry("Memory used", memoryUsed, TapeValues::LocalReductionOperation::Sum, true, false);
values.addDoubleEntry("Memory allocated", memoryAlloc, TapeValues::LocalReductionOperation::Sum, false, true);
}

/// \copydoc DataInterface::extractPosition
Expand Down
6 changes: 5 additions & 1 deletion include/codi/tapes/indices/linearIndexManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,11 @@ namespace codi {
/// \copydoc IndexManagerInterface::addToTapeValues <br><br>
/// Implementation: Adds maximum live indices.
void addToTapeValues(TapeValues& values) const {
values.addLongEntry("Max. live indices", getLargestCreatedIndex());
TapeValues::LocalReductionOperation constexpr operation = NeedsStaticStorage
? TapeValues::LocalReductionOperation::Max
: TapeValues::LocalReductionOperation::Sum;

values.addLongEntry("Max. live indices", getLargestCreatedIndex(), operation);
}

/// \copydoc IndexManagerInterface::freeIndex <br><br>
Expand Down
6 changes: 5 additions & 1 deletion include/codi/tapes/indices/multiUseIndexManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,11 @@ namespace codi {

double memoryindexUseVector = (double)indexUse.size() * (double)(sizeof(Index));

values.addDoubleEntry("Memory: index use vector", memoryindexUseVector, true, true);
TapeValues::LocalReductionOperation constexpr operation = NeedsStaticStorage
? TapeValues::LocalReductionOperation::Max
: TapeValues::LocalReductionOperation::Sum;

values.addDoubleEntry("Memory: index use vector", memoryindexUseVector, operation, true, true);
}

/// \copydoc ReuseIndexManager::assignIndex
Expand Down
5 changes: 4 additions & 1 deletion include/codi/tapes/indices/parallelReuseIndexManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,10 @@ namespace codi {
void addToTapeValues(TapeValues& values) const {
unsigned long maximumGlobalIndex = globalMaximumIndex();

values.addUnsignedLongEntry("Max. live indices", maximumGlobalIndex);
// As maximumGlobalIndex is static, it uses a local maximum reduction.
TapeValues::LocalReductionOperation constexpr operation = TapeValues::LocalReductionOperation::Max;

values.addUnsignedLongEntry("Max. live indices", maximumGlobalIndex, operation);
// The number of current live indices cannot be computed from one instance alone.
// It equals the number of maximum live indices minus the number of indices stored across all instances.

Expand Down
8 changes: 6 additions & 2 deletions include/codi/tapes/indices/reuseIndexManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,12 @@ namespace codi {
unsigned long storedIndices = this->usedIndicesPos + this->unusedIndicesPos;
long currentLiveIndices = maximumGlobalIndex - storedIndices;

values.addUnsignedLongEntry("Max. live indices", maximumGlobalIndex);
values.addLongEntry("Cur. live indices", currentLiveIndices);
TapeValues::LocalReductionOperation constexpr operation = NeedsStaticStorage
? TapeValues::LocalReductionOperation::Max
: TapeValues::LocalReductionOperation::Sum;

values.addUnsignedLongEntry("Max. live indices", maximumGlobalIndex, operation);
values.addLongEntry("Cur. live indices", currentLiveIndices, operation);

Base::addToTapeValues(values);
}
Expand Down
15 changes: 12 additions & 3 deletions include/codi/tapes/indices/reuseIndexManagerBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ namespace codi {
return static_cast<Impl&>(*this);
}

/// Const cast to the implementation.
CODI_INLINE Impl const& cast() const {
return static_cast<Impl const&>(*this);
}

/// Method to generate new indices. Only called when unusedIndices is empty.
CODI_NO_INLINE void generateNewIndices() {
cast().generateNewIndices();
Expand Down Expand Up @@ -237,9 +242,13 @@ namespace codi {
double memoryStoredIndices = (double)storedIndices * (double)(sizeof(Index));
double memoryAllocatedIndices = (double)allocatedIndices * (double)(sizeof(Index));

values.addUnsignedLongEntry("Indices stored", storedIndices);
values.addDoubleEntry("Memory used", memoryStoredIndices, true, false);
values.addDoubleEntry("Memory allocated", memoryAllocatedIndices, false, true);
TapeValues::LocalReductionOperation constexpr operation = cast().NeedsStaticStorage
? TapeValues::LocalReductionOperation::Max
: TapeValues::LocalReductionOperation::Sum;

values.addUnsignedLongEntry("Indices stored", storedIndices, operation);
values.addDoubleEntry("Memory used", memoryStoredIndices, operation, true, false);
values.addDoubleEntry("Memory allocated", memoryAllocatedIndices, operation, false, true);
}

/// @}
Expand Down
9 changes: 7 additions & 2 deletions include/codi/tapes/jacobianBaseTape.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#include "../misc/macros.hpp"
#include "../misc/mathUtility.hpp"
#include "../misc/memberStore.hpp"
#include "../traits/adjointVectorTraits.hpp"
#include "../traits/computationTraits.hpp"
#include "../traits/expressionTraits.hpp"
#include "commonTapeImplementation.hpp"
Expand Down Expand Up @@ -494,9 +495,13 @@ namespace codi {
size_t nAdjoints = indexManager.get().getLargestCreatedIndex();
double memoryAdjoints = static_cast<double>(nAdjoints) * static_cast<double>(sizeof(Gradient));

bool constexpr globalAdjoints = AdjointVectorTraits::IsGlobal<Adjoints>::value;
TapeValues::LocalReductionOperation constexpr operation =
globalAdjoints ? TapeValues::LocalReductionOperation::Max : TapeValues::LocalReductionOperation::Sum;

values.addSection("Adjoint vector");
values.addUnsignedLongEntry("Number of adjoints", nAdjoints);
values.addDoubleEntry("Memory allocated", memoryAdjoints, true, true);
values.addUnsignedLongEntry("Number of adjoints", nAdjoints, operation);
values.addDoubleEntry("Memory allocated", memoryAdjoints, operation, true, true);

values.addSection("Index manager");
indexManager.get().addToTapeValues(values);
Expand Down
105 changes: 77 additions & 28 deletions include/codi/tapes/misc/tapeValues.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ namespace codi {
* - getUsedMemorySize(): Get the used memory size.
*/
struct TapeValues {
public:
enum class LocalReductionOperation {
Sum,
Max
};

private:
enum class EntryType {
Double,
Expand All @@ -84,11 +90,14 @@ namespace codi {
public:
std::string name;
EntryType type;
LocalReductionOperation operation;
size_t pos;

Entry() : name(), type(), pos() {}
Entry() : name(), type(), operation(), pos() {}

Entry(std::string const& name, EntryType const& type, size_t const& pos) : name(name), type(type), pos(pos) {}
Entry(std::string const& name, EntryType const& type, LocalReductionOperation const& operation,
size_t const& pos)
: name(name), type(type), operation(operation), pos(pos) {}
};

struct Section {
Expand Down Expand Up @@ -116,18 +125,19 @@ namespace codi {
TapeValues(std::string const& tapeName)
: sections(), doubleData(), longData(), unsignedLongData(), usedMemoryIndex(0), allocatedMemoryIndex(1) {
addSection(tapeName);
addEntryInternal("Total memory used", EntryType::Double, doubleData, 0.0);
addEntryInternal("Total memory allocated", EntryType::Double, doubleData, 0.0);
addEntryInternal("Total memory used", EntryType::Double, LocalReductionOperation::Sum, doubleData, 0.0);
addEntryInternal("Total memory allocated", EntryType::Double, LocalReductionOperation::Sum, doubleData, 0.0);
}

/*******************************************************************************/
/// @name Add data
/// @{

/// Add double entry. If it is a memory entry, it should be in bytes.
void addDoubleEntry(std::string const& name, double const& value, bool usedMem = false,
void addDoubleEntry(std::string const& name, double const& value,
LocalReductionOperation operation = LocalReductionOperation::Sum, bool usedMem = false,
bool allocatedMem = false) {
addEntryInternal(name, EntryType::Double, doubleData, value);
addEntryInternal(name, EntryType::Double, operation, doubleData, value);

if (usedMem) {
doubleData[usedMemoryIndex] += value;
Expand All @@ -139,8 +149,9 @@ namespace codi {
}

/// Add long entry.
void addLongEntry(std::string const& name, long const& value) {
addEntryInternal(name, EntryType::Long, longData, value);
void addLongEntry(std::string const& name, long const& value,
LocalReductionOperation operation = LocalReductionOperation::Sum) {
addEntryInternal(name, EntryType::Long, operation, longData, value);
}

/// Add section. All further entries are added under this section.
Expand All @@ -149,8 +160,9 @@ namespace codi {
}

/// Add unsigned long entry.
void addUnsignedLongEntry(std::string const& name, unsigned long const& value) {
addEntryInternal(name, EntryType::UnsignedLong, unsignedLongData, value);
void addUnsignedLongEntry(std::string const& name, unsigned long const& value,
LocalReductionOperation operation = LocalReductionOperation::Sum) {
addEntryInternal(name, EntryType::UnsignedLong, operation, unsignedLongData, value);
}

/// @}
Expand Down Expand Up @@ -227,24 +239,46 @@ namespace codi {

/// Perform entry-wise additions.
void combineData(TapeValues const& other) {

// Basic checks to ensure that we add tape values of the same tape type.
// Size check for the number of sections.
codiAssert(this->sections.size() == other.sections.size());
codiAssert(this->sections.size() == 0 || this->sections[0].name == other.sections[0].name);

// Size checks for the subsequent loops.
codiAssert(this->doubleData.size() == other.doubleData.size());
codiAssert(this->longData.size() == other.longData.size());
codiAssert(this->unsignedLongData.size() == other.unsignedLongData.size());

for (size_t i = 0; i < this->doubleData.size(); ++i) {
this->doubleData[i] += other.doubleData[i];
}
for (size_t i = 0; i < this->longData.size(); ++i) {
this->longData[i] += other.longData[i];
}
for (size_t i = 0; i < this->unsignedLongData.size(); ++i) {
this->unsignedLongData[i] += other.unsignedLongData[i];
for (size_t section = 0; section < this->sections.size(); ++section) {
auto& thisSection = this->sections[section];
auto const& otherSection = other.sections[section];

// Basic check to ensure that we combine identically structured tape values.
codiAssert(thisSection.name == otherSection.name);

// Size check for the number of entries.
codiAssert(thisSection.data.size() == otherSection.data.size());

for (size_t entry = 0; entry < thisSection.data.size(); ++entry) {
auto& thisEntry = thisSection.data[entry];
auto const& otherEntry = otherSection.data[entry];

// Basic checks to ensure that we combine identically structured tape values.
codiAssert(thisEntry.name == otherEntry.name);
codiAssert(thisEntry.type == otherEntry.type);
codiAssert(thisEntry.operation == otherEntry.operation);

switch (thisEntry.type) {
case EntryType::Double: {
performLocalReduction(this->doubleData[thisEntry.pos], other.doubleData[otherEntry.pos],
thisEntry.operation);
break;
}
case EntryType::Long: {
performLocalReduction(this->longData[thisEntry.pos], other.longData[otherEntry.pos],
thisEntry.operation);
break;
}
case EntryType::UnsignedLong: {
performLocalReduction(this->unsignedLongData[thisEntry.pos], other.unsignedLongData[otherEntry.pos],
thisEntry.operation);
break;
}
}
}
}
}

Expand Down Expand Up @@ -288,15 +322,30 @@ namespace codi {
private:

template<typename T>
void addEntryInternal(std::string const& name, EntryType const& type, std::vector<T>& vector, T const& value) {
void performLocalReduction(T& lhs, T const& rhs, LocalReductionOperation operation) {
switch (operation) {
case LocalReductionOperation::Sum: {
lhs += rhs;
break;
}
case LocalReductionOperation::Max: {
lhs = std::max(lhs, rhs);
break;
}
}
}

template<typename T>
void addEntryInternal(std::string const& name, EntryType const& type, LocalReductionOperation const& operation,
std::vector<T>& vector, T const& value) {
size_t entryPos = vector.size();
vector.push_back(value);

if (sections.empty()) {
addSection("General");
}

sections.back().data.push_back(Entry(name, type, entryPos));
sections.back().data.push_back(Entry(name, type, operation, entryPos));
}

std::string formatEntry(Entry const& entry, int maximumFieldSize) const {
Expand Down
4 changes: 2 additions & 2 deletions include/codi/tapes/primalValueBaseTape.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -562,11 +562,11 @@ namespace codi {

values.addSection("Adjoint vector");
values.addUnsignedLongEntry("Number of adjoints", nAdjoints);
values.addDoubleEntry("Memory allocated", memoryAdjoints, true, true);
values.addDoubleEntry("Memory allocated", memoryAdjoints, TapeValues::LocalReductionOperation::Sum, true, true);

values.addSection("Primal vector");
values.addUnsignedLongEntry("Number of primals", nPrimals);
values.addDoubleEntry("Memory allocated", memoryPrimals, true, true);
values.addDoubleEntry("Memory allocated", memoryPrimals, TapeValues::LocalReductionOperation::Sum, true, true);

values.addSection("Index manager");
indexManager.get().addToTapeValues(values);
Expand Down
2 changes: 1 addition & 1 deletion include/codi/tools/parallel/parallelToolbox.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ namespace codi {
using StaticThreadLocalPointer = CODI_DD(CODI_T(T_StaticThreadLocalPointer<Type, Owner>),
CODI_T(StaticThreadLocalPointerInterface<Type, Owner, CODI_ANY>));

using Synchronization = CODI_DD(T_Synchronization, DefaultSynchronization); ///< See codi::ParallelToolbox.
using Synchronization = CODI_DD(T_Synchronization, DefaultSynchronization); ///< See codi::ParallelToolbox.

using Lock = codi::Lock<Mutex>; ///< See codi::Lock.
using ReadWriteMutex = codi::ReadWriteMutex<ThreadInformation, Atomic<int>>; ///< See codi::ReadWriteMutex.
Expand Down
Loading

0 comments on commit 8408470

Please sign in to comment.