Skip to content

Commit

Permalink
Add ability to read portions of the data
Browse files Browse the repository at this point in the history
  • Loading branch information
peterfpeterson committed Dec 27, 2024
1 parent bd40668 commit 0332345
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 8 deletions.
3 changes: 3 additions & 0 deletions Framework/Nexus/inc/MantidNexus/H5Util.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ template <typename NumT> std::vector<NumT> readArray1DCoerce(const H5::Group &gr

template <typename NumT> void readArray1DCoerce(const H5::DataSet &dataset, std::vector<NumT> &output);

template <typename NumT>
void readArray1DCoerce(const H5::DataSet &dataset, std::vector<NumT> &output, const size_t length, const size_t offset);

/// Test if a group already exists within an HDF5 file or parent group.
MANTID_NEXUS_DLL bool groupExists(H5::H5Object &h5, const std::string &groupPath);

Expand Down
73 changes: 66 additions & 7 deletions Framework/Nexus/src/H5Util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
// SPDX - License - Identifier: GPL - 3.0 +
#include "MantidNexus/H5Util.h"
#include "MantidAPI/LogManager.h"
#include "MantidKernel/System.h"

#include <H5Cpp.h>
#include <algorithm>
Expand Down Expand Up @@ -282,20 +281,25 @@ template <typename NumT> std::vector<NumT> readArray1DCoerce(const H5::Group &gr

namespace {
template <typename InputNumT, typename OutputNumT>
void convertingRead(const DataSet &dataset, const DataType &dataType, std::vector<OutputNumT> &output) {
DataSpace dataSpace = dataset.getSpace();

std::vector<InputNumT> temp(dataSpace.getSelectNpoints());
dataset.read(temp.data(), dataType, dataSpace);
void convertingRead(const DataSet &dataset, const DataType &dataType, std::vector<OutputNumT> &output,
const DataSpace &memspace, const DataSpace &filespace) {
std::vector<InputNumT> temp(filespace.getSelectNpoints());
dataset.read(temp.data(), dataType, memspace, filespace);

output.resize(temp.size());

std::transform(temp.begin(), temp.end(), output.begin(),
[](const InputNumT a) { // lambda
[](const auto &a) { // lambda
return boost::numeric_cast<OutputNumT>(a);
});
}

template <typename InputNumT, typename OutputNumT>
void convertingRead(const DataSet &dataset, const DataType &dataType, std::vector<OutputNumT> &output) {
const DataSpace dataSpace = dataset.getSpace();
convertingRead<InputNumT, OutputNumT>(dataset, dataType, output, dataSpace, dataSpace);
}

template <typename InputNumT, typename OutputNumT>
std::vector<OutputNumT> convertingNumArrayAttributeRead(Attribute &attribute, const DataType &dataType) {
DataSpace dataSpace = attribute.getSpace();
Expand Down Expand Up @@ -404,6 +408,48 @@ template <typename NumT> void readArray1DCoerce(const DataSet &dataset, std::vec
}
}

template <typename NumT>
void readArray1DCoerce(const H5::DataSet &dataset, std::vector<NumT> &output, const size_t length,
const size_t offset) {
DataSpace filespace = dataset.getSpace();

const auto length_actual = static_cast<size_t>(filespace.getSelectNpoints());
if ((offset == 0) && (length_actual <= length)) {
// use standard approach
readArray1DCoerce(dataset, output);
} else {
// set extent and offset in DataSpace
hsize_t rankedoffset[1] = {static_cast<hsize_t>(offset)};
hsize_t rankedextent[1] = {
static_cast<hsize_t>(std::min(length, length_actual - offset))}; // don't read past the end
filespace.selectHyperslab(H5S_SELECT_SET, rankedextent, rankedoffset);

// size of thing being read out
DataSpace memspace(1, rankedextent);

const DataType dataType = dataset.getDataType();
if (getType<NumT>() == dataType) { // no conversion necessary
output.resize(static_cast<size_t>(filespace.getSelectNpoints()));
dataset.read(output.data(), dataType, memspace, filespace);
} else if (PredType::NATIVE_INT32 == dataType) {
convertingRead<int32_t>(dataset, dataType, output, memspace, filespace);
} else if (PredType::NATIVE_UINT32 == dataType) {
convertingRead<uint32_t>(dataset, dataType, output, memspace, filespace);
} else if (PredType::NATIVE_INT64 == dataType) {
convertingRead<int64_t>(dataset, dataType, output, memspace, filespace);
} else if (PredType::NATIVE_UINT64 == dataType) {
convertingRead<uint64_t>(dataset, dataType, output, memspace, filespace);
} else if (PredType::NATIVE_FLOAT == dataType) {
convertingRead<float>(dataset, dataType, output, memspace, filespace);
} else if (PredType::NATIVE_DOUBLE == dataType) {
convertingRead<double>(dataset, dataType, output, memspace, filespace);
} else {
// not a supported type
throw DataTypeIException();
}
}
}

/// Test if a group exists in an HDF5 file or parent group.
bool groupExists(H5::H5Object &h5, const std::string &groupPath) {
bool status = true;
Expand Down Expand Up @@ -600,4 +646,17 @@ template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::ve
template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector<uint32_t> &output);
template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector<int64_t> &output);
template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector<uint64_t> &output);

template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector<float> &output,
const size_t length, const size_t offset);
template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector<double> &output,
const size_t length, const size_t offset);
template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector<int32_t> &output,
const size_t length, const size_t offset);
template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector<uint32_t> &output,
const size_t length, const size_t offset);
template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector<int64_t> &output,
const size_t length, const size_t offset);
template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector<uint64_t> &output,
const size_t length, const size_t offset);
} // namespace Mantid::NeXus::H5Util
59 changes: 58 additions & 1 deletion Framework/Nexus/test/H5UtilTest.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class H5UtilTest : public CxxTest::TestSuite {

TS_ASSERT(std::filesystem::exists(FILENAME));

{ // read tests
{ // ---------- simple read tests
H5File file(FILENAME, H5F_ACC_RDONLY);
auto group = file.openGroup(GRP_NAME);

Expand All @@ -151,6 +151,63 @@ class H5UtilTest : public CxxTest::TestSuite {
const boost::numeric::positive_overflow &);
TS_ASSERT_THROWS_NOTHING(H5Util::readArray1DCoerce<uint32_t>(group, "array1d_int32"));

// ---------- slicing read tests
auto dataSetFloat = group.openDataSet("array1d_float");
auto dataSetDouble = group.openDataSet("array1d_double");

std::vector<double> output;
// full dataset
output.clear();
H5Util::readArray1DCoerce(dataSetFloat, output, array1d_double.size(), 0);
TS_ASSERT_EQUALS(output, array1d_double); // whole thing w/ coercion
output.clear();
H5Util::readArray1DCoerce(dataSetDouble, output, array1d_double.size(), 0);
TS_ASSERT_EQUALS(output, array1d_double); // whole thing w/o coercion
output.clear();
H5Util::readArray1DCoerce(dataSetFloat, output, array1d_double.size() + 1, 0);
TS_ASSERT_EQUALS(output, array1d_double); // more than the whole thing w/ coercion
output.clear();
H5Util::readArray1DCoerce(dataSetDouble, output, array1d_double.size() + 1, 0);
TS_ASSERT_EQUALS(output, array1d_double); // more than the whole thing w/o coercion

{ // partial dataset from front 1->end
const std::vector<double> expected({1, 2, 3, 4});
output.clear();
H5Util::readArray1DCoerce(dataSetFloat, output, array1d_double.size() - 1, 1);
TS_ASSERT_EQUALS(output, expected); // w/ coercion
output.clear();
H5Util::readArray1DCoerce(dataSetDouble, output, array1d_double.size() - 1, 1);
TS_ASSERT_EQUALS(output, expected); // w/o coercion
}

{ // partial dataset from front 0->end-1
const std::vector<double> expected({0, 1, 2, 3});
output.clear();
H5Util::readArray1DCoerce(dataSetFloat, output, array1d_double.size() - 1, 0);
TS_ASSERT_EQUALS(output, expected); // w/ coercion
output.clear();
H5Util::readArray1DCoerce(dataSetDouble, output, array1d_double.size() - 1, 0);
TS_ASSERT_EQUALS(output, expected); // w/o coercion
}
{ // partial dataset from front 1->end-1
const std::vector<double> expected({1, 2, 3});
output.clear();
H5Util::readArray1DCoerce(dataSetFloat, output, array1d_double.size() - 2, 1);
TS_ASSERT_EQUALS(output, expected); // w/ coercion
output.clear();
H5Util::readArray1DCoerce(dataSetDouble, output, array1d_double.size() - 2, 1);
TS_ASSERT_EQUALS(output, expected); // w/o coercion
}
{ // from 1->end+1
const std::vector<double> expected({1, 2, 3, 4});
output.clear();
H5Util::readArray1DCoerce(dataSetFloat, output, array1d_double.size() + 1, 1);
TS_ASSERT_EQUALS(output, expected); // w/ coercion
output.clear();
H5Util::readArray1DCoerce(dataSetDouble, output, array1d_double.size() + 1, 1);
TS_ASSERT_EQUALS(output, expected); // w/o coercion
}

file.close();
}

Expand Down

0 comments on commit 0332345

Please sign in to comment.