From 5dc6ce10029d563663ac45c8d45dcf591f97f65b Mon Sep 17 00:00:00 2001 From: Pete Peterson Date: Thu, 26 Dec 2024 11:59:37 -0500 Subject: [PATCH 1/3] Do not double read when actual matches requested --- Framework/Nexus/src/H5Util.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Framework/Nexus/src/H5Util.cpp b/Framework/Nexus/src/H5Util.cpp index db02fe4cd9e7..6792c2d69b71 100644 --- a/Framework/Nexus/src/H5Util.cpp +++ b/Framework/Nexus/src/H5Util.cpp @@ -380,15 +380,13 @@ std::vector readNumArrayAttributeCoerce(const H5::H5Object &object, const } template void readArray1DCoerce(const DataSet &dataset, std::vector &output) { - DataType dataType = dataset.getDataType(); + const DataType dataType = dataset.getDataType(); if (getType() == dataType) { // no conversion necessary - DataSpace dataSpace = dataset.getSpace(); - output.resize(dataSpace.getSelectNpoints()); + const DataSpace dataSpace = dataset.getSpace(); + output.resize(static_cast(dataSpace.getSelectNpoints())); dataset.read(output.data(), dataType, dataSpace); - } - - if (PredType::NATIVE_INT32 == dataType) { + } else if (PredType::NATIVE_INT32 == dataType) { convertingRead(dataset, dataType, output); } else if (PredType::NATIVE_UINT32 == dataType) { convertingRead(dataset, dataType, output); From 3c6a54a60f0b43553d2dace241fe4a68a958096d Mon Sep 17 00:00:00 2001 From: Pete Peterson Date: Fri, 27 Dec 2024 13:23:21 -0500 Subject: [PATCH 2/3] Add ability to read portions of the data --- Framework/Nexus/inc/MantidNexus/H5Util.h | 3 + Framework/Nexus/src/H5Util.cpp | 72 ++++++++++++++++++++++-- Framework/Nexus/test/H5UtilTest.h | 59 ++++++++++++++++++- 3 files changed, 127 insertions(+), 7 deletions(-) diff --git a/Framework/Nexus/inc/MantidNexus/H5Util.h b/Framework/Nexus/inc/MantidNexus/H5Util.h index b3eca15e624d..8f2d52a36812 100644 --- a/Framework/Nexus/inc/MantidNexus/H5Util.h +++ b/Framework/Nexus/inc/MantidNexus/H5Util.h @@ -95,6 +95,9 @@ template std::vector readArray1DCoerce(const H5::Group &gr template void readArray1DCoerce(const H5::DataSet &dataset, std::vector &output); +template +void readArray1DCoerce(const H5::DataSet &dataset, std::vector &output, const size_t length, const size_t offset); + /// Test if a group already exists within an HDF5 file or parent group. MANTID_NEXUS_DLL bool groupExists(H5::H5Object &h5, const std::string &groupPath); diff --git a/Framework/Nexus/src/H5Util.cpp b/Framework/Nexus/src/H5Util.cpp index 6792c2d69b71..f64e54ded2ea 100644 --- a/Framework/Nexus/src/H5Util.cpp +++ b/Framework/Nexus/src/H5Util.cpp @@ -282,20 +282,25 @@ template std::vector readArray1DCoerce(const H5::Group &gr namespace { template -void convertingRead(const DataSet &dataset, const DataType &dataType, std::vector &output) { - DataSpace dataSpace = dataset.getSpace(); - - std::vector temp(dataSpace.getSelectNpoints()); - dataset.read(temp.data(), dataType, dataSpace); +void convertingRead(const DataSet &dataset, const DataType &dataType, std::vector &output, + const DataSpace &memspace, const DataSpace &filespace) { + std::vector temp(filespace.getSelectNpoints()); + dataset.read(temp.data(), dataType, memspace, filespace); output.resize(temp.size()); std::transform(temp.begin(), temp.end(), output.begin(), - [](const InputNumT a) { // lambda + [](const auto &a) { // lambda return boost::numeric_cast(a); }); } +template +void convertingRead(const DataSet &dataset, const DataType &dataType, std::vector &output) { + const DataSpace dataSpace = dataset.getSpace(); + convertingRead(dataset, dataType, output, dataSpace, dataSpace); +} + template std::vector convertingNumArrayAttributeRead(Attribute &attribute, const DataType &dataType) { DataSpace dataSpace = attribute.getSpace(); @@ -404,6 +409,48 @@ template void readArray1DCoerce(const DataSet &dataset, std::vec } } +template +void readArray1DCoerce(const H5::DataSet &dataset, std::vector &output, const size_t length, + const size_t offset) { + DataSpace filespace = dataset.getSpace(); + + const auto length_actual = static_cast(filespace.getSelectNpoints()); + if ((offset == 0) && (length_actual <= length)) { + // use standard approach + readArray1DCoerce(dataset, output); + } else { + // set extent and offset in DataSpace + hsize_t rankedoffset[1] = {static_cast(offset)}; + hsize_t rankedextent[1] = { + static_cast(std::min(length, length_actual - offset))}; // don't read past the end + filespace.selectHyperslab(H5S_SELECT_SET, rankedextent, rankedoffset); + + // size of thing being read out + DataSpace memspace(1, rankedextent); + + const DataType dataType = dataset.getDataType(); + if (getType() == dataType) { // no conversion necessary + output.resize(static_cast(filespace.getSelectNpoints())); + dataset.read(output.data(), dataType, memspace, filespace); + } else if (PredType::NATIVE_INT32 == dataType) { + convertingRead(dataset, dataType, output, memspace, filespace); + } else if (PredType::NATIVE_UINT32 == dataType) { + convertingRead(dataset, dataType, output, memspace, filespace); + } else if (PredType::NATIVE_INT64 == dataType) { + convertingRead(dataset, dataType, output, memspace, filespace); + } else if (PredType::NATIVE_UINT64 == dataType) { + convertingRead(dataset, dataType, output, memspace, filespace); + } else if (PredType::NATIVE_FLOAT == dataType) { + convertingRead(dataset, dataType, output, memspace, filespace); + } else if (PredType::NATIVE_DOUBLE == dataType) { + convertingRead(dataset, dataType, output, memspace, filespace); + } else { + // not a supported type + throw DataTypeIException(); + } + } +} + /// Test if a group exists in an HDF5 file or parent group. bool groupExists(H5::H5Object &h5, const std::string &groupPath) { bool status = true; @@ -600,4 +647,17 @@ template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::ve template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output); template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output); template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output); + +template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output, + const size_t length, const size_t offset); +template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output, + const size_t length, const size_t offset); +template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output, + const size_t length, const size_t offset); +template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output, + const size_t length, const size_t offset); +template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output, + const size_t length, const size_t offset); +template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output, + const size_t length, const size_t offset); } // namespace Mantid::NeXus::H5Util diff --git a/Framework/Nexus/test/H5UtilTest.h b/Framework/Nexus/test/H5UtilTest.h index 622858284278..fe06916690e3 100644 --- a/Framework/Nexus/test/H5UtilTest.h +++ b/Framework/Nexus/test/H5UtilTest.h @@ -135,7 +135,7 @@ class H5UtilTest : public CxxTest::TestSuite { TS_ASSERT(std::filesystem::exists(FILENAME)); - { // read tests + { // ---------- simple read tests H5File file(FILENAME, H5F_ACC_RDONLY); auto group = file.openGroup(GRP_NAME); @@ -151,6 +151,63 @@ class H5UtilTest : public CxxTest::TestSuite { const boost::numeric::positive_overflow &); TS_ASSERT_THROWS_NOTHING(H5Util::readArray1DCoerce(group, "array1d_int32")); + // ---------- slicing read tests + auto dataSetFloat = group.openDataSet("array1d_float"); + auto dataSetDouble = group.openDataSet("array1d_double"); + + std::vector output; + // full dataset + output.clear(); + H5Util::readArray1DCoerce(dataSetFloat, output, array1d_double.size(), 0); + TS_ASSERT_EQUALS(output, array1d_double); // whole thing w/ coercion + output.clear(); + H5Util::readArray1DCoerce(dataSetDouble, output, array1d_double.size(), 0); + TS_ASSERT_EQUALS(output, array1d_double); // whole thing w/o coercion + output.clear(); + H5Util::readArray1DCoerce(dataSetFloat, output, array1d_double.size() + 1, 0); + TS_ASSERT_EQUALS(output, array1d_double); // more than the whole thing w/ coercion + output.clear(); + H5Util::readArray1DCoerce(dataSetDouble, output, array1d_double.size() + 1, 0); + TS_ASSERT_EQUALS(output, array1d_double); // more than the whole thing w/o coercion + + { // partial dataset from front 1->end + const std::vector expected({1, 2, 3, 4}); + output.clear(); + H5Util::readArray1DCoerce(dataSetFloat, output, array1d_double.size() - 1, 1); + TS_ASSERT_EQUALS(output, expected); // w/ coercion + output.clear(); + H5Util::readArray1DCoerce(dataSetDouble, output, array1d_double.size() - 1, 1); + TS_ASSERT_EQUALS(output, expected); // w/o coercion + } + + { // partial dataset from front 0->end-1 + const std::vector expected({0, 1, 2, 3}); + output.clear(); + H5Util::readArray1DCoerce(dataSetFloat, output, array1d_double.size() - 1, 0); + TS_ASSERT_EQUALS(output, expected); // w/ coercion + output.clear(); + H5Util::readArray1DCoerce(dataSetDouble, output, array1d_double.size() - 1, 0); + TS_ASSERT_EQUALS(output, expected); // w/o coercion + } + { // partial dataset from front 1->end-1 + const std::vector expected({1, 2, 3}); + output.clear(); + H5Util::readArray1DCoerce(dataSetFloat, output, array1d_double.size() - 2, 1); + TS_ASSERT_EQUALS(output, expected); // w/ coercion + output.clear(); + H5Util::readArray1DCoerce(dataSetDouble, output, array1d_double.size() - 2, 1); + TS_ASSERT_EQUALS(output, expected); // w/o coercion + } + { // from 1->end+1 + const std::vector expected({1, 2, 3, 4}); + output.clear(); + H5Util::readArray1DCoerce(dataSetFloat, output, array1d_double.size() + 1, 1); + TS_ASSERT_EQUALS(output, expected); // w/ coercion + output.clear(); + H5Util::readArray1DCoerce(dataSetDouble, output, array1d_double.size() + 1, 1); + TS_ASSERT_EQUALS(output, expected); // w/o coercion + } + file.close(); } From 325fb4ac23a7b756d8b2d73c0e215e88433001ea Mon Sep 17 00:00:00 2001 From: Pete Peterson Date: Mon, 6 Jan 2025 15:13:07 -0500 Subject: [PATCH 3/3] Combine functions by using default values --- Framework/Nexus/inc/MantidNexus/H5Util.h | 8 +- Framework/Nexus/src/H5Util.cpp | 98 ++++++++---------------- 2 files changed, 37 insertions(+), 69 deletions(-) diff --git a/Framework/Nexus/inc/MantidNexus/H5Util.h b/Framework/Nexus/inc/MantidNexus/H5Util.h index 8f2d52a36812..bd35820e4b60 100644 --- a/Framework/Nexus/inc/MantidNexus/H5Util.h +++ b/Framework/Nexus/inc/MantidNexus/H5Util.h @@ -8,6 +8,7 @@ #include "MantidNexus/DllConfig.h" +#include #include #include #include @@ -91,12 +92,13 @@ std::vector readNumArrayAttributeCoerce(const H5::H5Object &object, const template void readArray1DCoerce(const H5::Group &group, const std::string &name, std::vector &output); -template std::vector readArray1DCoerce(const H5::Group &group, const std::string &name); -template void readArray1DCoerce(const H5::DataSet &dataset, std::vector &output); +template std::vector readArray1DCoerce(const H5::Group &group, const std::string &name); template -void readArray1DCoerce(const H5::DataSet &dataset, std::vector &output, const size_t length, const size_t offset); +void readArray1DCoerce(const H5::DataSet &dataset, std::vector &output, + const size_t length = std::numeric_limits::max(), + const size_t offset = static_cast(0)); /// Test if a group already exists within an HDF5 file or parent group. MANTID_NEXUS_DLL bool groupExists(H5::H5Object &h5, const std::string &groupPath); diff --git a/Framework/Nexus/src/H5Util.cpp b/Framework/Nexus/src/H5Util.cpp index f64e54ded2ea..ed56c3a53884 100644 --- a/Framework/Nexus/src/H5Util.cpp +++ b/Framework/Nexus/src/H5Util.cpp @@ -295,12 +295,6 @@ void convertingRead(const DataSet &dataset, const DataType &dataType, std::vecto }); } -template -void convertingRead(const DataSet &dataset, const DataType &dataType, std::vector &output) { - const DataSpace dataSpace = dataset.getSpace(); - convertingRead(dataset, dataType, output, dataSpace, dataSpace); -} - template std::vector convertingNumArrayAttributeRead(Attribute &attribute, const DataType &dataType) { DataSpace dataSpace = attribute.getSpace(); @@ -384,73 +378,52 @@ std::vector readNumArrayAttributeCoerce(const H5::H5Object &object, const return value; } -template void readArray1DCoerce(const DataSet &dataset, std::vector &output) { - const DataType dataType = dataset.getDataType(); +template +void readArray1DCoerce(const H5::DataSet &dataset, std::vector &output, const size_t length, + const size_t offset) { + DataSpace filespace = dataset.getSpace(); + const auto length_actual = static_cast(filespace.getSelectNpoints()); + + if (offset >= length_actual) { + std::stringstream msg; + msg << "Tried to read offset=" << offset << " into array that is only lenght=" << length_actual << " long"; + throw std::runtime_error(msg.str()); + } + // set extent and offset in DataSpace + const hsize_t rankedoffset[1] = {static_cast(offset)}; + const hsize_t rankedextent[1] = { + static_cast(std::min(length, length_actual - offset))}; // don't read past the end + // select a part of filespace if appropriate + if (rankedextent[0] < length_actual) + filespace.selectHyperslab(H5S_SELECT_SET, rankedextent, rankedoffset); + + // size of thing being read out + DataSpace memspace(1, rankedextent); + + // do the actual read + const DataType dataType = dataset.getDataType(); if (getType() == dataType) { // no conversion necessary - const DataSpace dataSpace = dataset.getSpace(); - output.resize(static_cast(dataSpace.getSelectNpoints())); - dataset.read(output.data(), dataType, dataSpace); + output.resize(static_cast(filespace.getSelectNpoints())); + dataset.read(output.data(), dataType, memspace, filespace); } else if (PredType::NATIVE_INT32 == dataType) { - convertingRead(dataset, dataType, output); + convertingRead(dataset, dataType, output, memspace, filespace); } else if (PredType::NATIVE_UINT32 == dataType) { - convertingRead(dataset, dataType, output); + convertingRead(dataset, dataType, output, memspace, filespace); } else if (PredType::NATIVE_INT64 == dataType) { - convertingRead(dataset, dataType, output); + convertingRead(dataset, dataType, output, memspace, filespace); } else if (PredType::NATIVE_UINT64 == dataType) { - convertingRead(dataset, dataType, output); + convertingRead(dataset, dataType, output, memspace, filespace); } else if (PredType::NATIVE_FLOAT == dataType) { - convertingRead(dataset, dataType, output); + convertingRead(dataset, dataType, output, memspace, filespace); } else if (PredType::NATIVE_DOUBLE == dataType) { - convertingRead(dataset, dataType, output); + convertingRead(dataset, dataType, output, memspace, filespace); } else { // not a supported type throw DataTypeIException(); } } -template -void readArray1DCoerce(const H5::DataSet &dataset, std::vector &output, const size_t length, - const size_t offset) { - DataSpace filespace = dataset.getSpace(); - - const auto length_actual = static_cast(filespace.getSelectNpoints()); - if ((offset == 0) && (length_actual <= length)) { - // use standard approach - readArray1DCoerce(dataset, output); - } else { - // set extent and offset in DataSpace - hsize_t rankedoffset[1] = {static_cast(offset)}; - hsize_t rankedextent[1] = { - static_cast(std::min(length, length_actual - offset))}; // don't read past the end - filespace.selectHyperslab(H5S_SELECT_SET, rankedextent, rankedoffset); - - // size of thing being read out - DataSpace memspace(1, rankedextent); - - const DataType dataType = dataset.getDataType(); - if (getType() == dataType) { // no conversion necessary - output.resize(static_cast(filespace.getSelectNpoints())); - dataset.read(output.data(), dataType, memspace, filespace); - } else if (PredType::NATIVE_INT32 == dataType) { - convertingRead(dataset, dataType, output, memspace, filespace); - } else if (PredType::NATIVE_UINT32 == dataType) { - convertingRead(dataset, dataType, output, memspace, filespace); - } else if (PredType::NATIVE_INT64 == dataType) { - convertingRead(dataset, dataType, output, memspace, filespace); - } else if (PredType::NATIVE_UINT64 == dataType) { - convertingRead(dataset, dataType, output, memspace, filespace); - } else if (PredType::NATIVE_FLOAT == dataType) { - convertingRead(dataset, dataType, output, memspace, filespace); - } else if (PredType::NATIVE_DOUBLE == dataType) { - convertingRead(dataset, dataType, output, memspace, filespace); - } else { - // not a supported type - throw DataTypeIException(); - } - } -} - /// Test if a group exists in an HDF5 file or parent group. bool groupExists(H5::H5Object &h5, const std::string &groupPath) { bool status = true; @@ -641,13 +614,6 @@ template MANTID_NEXUS_DLL std::vector readArray1DCoerce(const H5::Grou template MANTID_NEXUS_DLL std::vector readArray1DCoerce(const H5::Group &group, const std::string &name); template MANTID_NEXUS_DLL std::vector readArray1DCoerce(const H5::Group &group, const std::string &name); -template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output); -template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output); -template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output); -template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output); -template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output); -template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output); - template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output, const size_t length, const size_t offset); template MANTID_NEXUS_DLL void readArray1DCoerce(const DataSet &dataset, std::vector &output,