diff --git a/CMakeLists.txt b/CMakeLists.txt index c17631c6cb..378cf10078 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -771,6 +771,7 @@ set(openPMD_EXAMPLE_NAMES 10_streaming_read 12_span_write 13_write_dynamic_configuration + 14_toml_template ) set(openPMD_PYTHON_EXAMPLE_NAMES 2_read_serial @@ -1383,6 +1384,9 @@ if(openPMD_BUILD_TESTING) ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ --infile ../samples/git-sample/thetaMode/data_%T.bp \ --outfile ../samples/git-sample/thetaMode/data%T.json \ + --outconfig ' \ + json.attribute.mode = \"short\" \n\ + json.dataset.mode = \"template_no_warn\"' \ " WORKING_DIRECTORY ${openPMD_RUNTIME_OUTPUT_DIRECTORY} ) diff --git a/docs/source/backends/json.rst b/docs/source/backends/json.rst index bbae92aaf6..bba6ca5df0 100644 --- a/docs/source/backends/json.rst +++ b/docs/source/backends/json.rst @@ -38,20 +38,46 @@ when working with the JSON backend. Datasets and groups have the same namespace, meaning that there may not be a subgroup and a dataset with the same name contained in one group. -Any **openPMD dataset** is a JSON object with three keys: +Datasets +........ - * ``attributes``: Attributes associated with the dataset. May be ``null`` or not present if no attributes are associated with the dataset. - * ``datatype``: A string describing the type of the stored data. - * ``data`` A nested array storing the actual data in row-major manner. +Datasets can be stored in two modes, either as actual datasets or as dataset templates. +The mode is selected by the :ref:`JSON/TOML parameter` ``json.dataset.mode`` (resp. ``toml.dataset.mode``) with possible values ``["dataset", "template"]`` (default: ``"dataset"``). + +Stored as an actual dataset, an **openPMD dataset** is a JSON object with three JSON keys: + + * ``datatype`` (required): A string describing the type of the stored data. + * ``data`` (required): A nested array storing the actual data in row-major manner. The data needs to be consistent with the fields ``datatype`` and ``extent``. Checking whether this key points to an array can be (and is internally) used to distinguish groups from datasets. + * ``attributes``: Attributes associated with the dataset. May be ``null`` or not present if no attributes are associated with the dataset. + +Stored as a **dataset template**, an openPMD dataset is represented by three JSON keys: + +* ``datatype`` (required): As above. +* ``extent`` (required): A list of integers, describing the extent of the dataset. +* ``attributes``: As above. -**Attributes** are stored as a JSON object with a key for each attribute. +This mode stores only the dataset metadata. +Chunk load/store operations are ignored. + +Attributes +.......... + +In order to avoid name clashes, attributes are generally stored within a separate subgroup ``attributes``. + +Attributes can be stored in two formats. +The format is selected by the :ref:`JSON/TOML parameter` ``json.attribute.mode`` (resp. ``toml.attribute.mode``) with possible values ``["long", "short"]`` (default: ``"long"`` for JSON in openPMD 1.*, ``"short"`` otherwise, i.e. generally in openPMD 2.*, but always in TOML). + +Attributes in **long format** store the datatype explicitly, by representing attributes as JSON objects. Every such attribute is itself a JSON object with two keys: * ``datatype``: A string describing the type of the value. * ``value``: The actual value of type ``datatype``. +Attributes in **short format** are stored as just the simple value corresponding with the attribute. +Since JSON/TOML values are pretty-printed into a human-readable format, byte-level type details can be lost when reading those values again later on (e.g. the distinction between different integer types). + TOML File Format ---------------- diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index f6d15a7ac8..cf78d9cdea 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -104,6 +104,8 @@ The key ``rank_table`` allows specifying the creation of a **rank table**, used Configuration Structure per Backend ----------------------------------- +Please refer to the respective backends' documentations for further information on their configuration. + .. _backendconfig-adios2: ADIOS2 @@ -231,8 +233,21 @@ The parameters eligible for being passed to flush calls may be configured global .. _backendconfig-other: -Other backends -^^^^^^^^^^^^^^ +JSON/TOML +^^^^^^^^^ -Do currently not read the configuration string. -Please refer to the respective backends' documentations for further information on their configuration. +A full configuration of the JSON backend: + +.. literalinclude:: json.json + :language: json + +The TOML backend is configured analogously, replacing the ``"json"`` key with ``"toml"``. + +All keys found under ``hdf5.dataset`` are applicable globally as well as per dataset. +Explanation of the single keys: + +* ``json.dataset.mode`` / ``toml.dataset.mode``: One of ``"dataset"`` (default) or ``"template"``. + In "dataset" mode, the dataset will be written as an n-dimensional (recursive) array, padded with nulls (JSON) or zeroes (TOML) for missing values. + In "template" mode, only the dataset metadata (type, extent and attributes) are stored and no chunks can be written or read. +* ``json.attribute.mode`` / ``toml.attribute.mode``: One of ``"long"`` (default in openPMD 1.*) or ``"short"`` (default in openPMD 2.*). + The long format explicitly encodes the attribute type in the dataset on disk, the short format only writes the actual attribute as a JSON/TOML value, requiring readers to recover the type. diff --git a/docs/source/details/json.json b/docs/source/details/json.json new file mode 100644 index 0000000000..c1491f7245 --- /dev/null +++ b/docs/source/details/json.json @@ -0,0 +1,10 @@ +{ + "json": { + "dataset": { + "mode": "template" + }, + "attribute": { + "mode": "short" + } + } +} diff --git a/examples/14_toml_template.cpp b/examples/14_toml_template.cpp new file mode 100644 index 0000000000..f595e9b10f --- /dev/null +++ b/examples/14_toml_template.cpp @@ -0,0 +1,111 @@ +#include + +std::string backendEnding() +{ + auto extensions = openPMD::getFileExtensions(); + if (auto it = std::find(extensions.begin(), extensions.end(), "toml"); + it != extensions.end()) + { + return *it; + } + else + { + // Fallback for buggy old NVidia compiler + return "json"; + } +} + +void write() +{ + std::string config = R"( +{ + "iteration_encoding": "variable_based", + "json": { + "dataset": {"mode": "template"}, + "attribute": {"mode": "short"} + }, + "toml": { + "dataset": {"mode": "template"}, + "attribute": {"mode": "short"} + } +} +)"; + + openPMD::Series writeTemplate( + "../samples/tomlTemplate." + backendEnding(), + openPMD::Access::CREATE, + config); + auto iteration = writeTemplate.writeIterations()[0]; + + openPMD::Dataset ds{openPMD::Datatype::FLOAT, {5, 5}}; + + auto temperature = + iteration.meshes["temperature"][openPMD::RecordComponent::SCALAR]; + temperature.resetDataset(ds); + + auto E = iteration.meshes["E"]; + E["x"].resetDataset(ds); + E["y"].resetDataset(ds); + /* + * Don't specify datatype and extent for this one to indicate that this + * information is not yet known. + */ + E["z"].resetDataset({openPMD::Datatype::UNDEFINED}); + + ds.extent = {10}; + + auto electrons = iteration.particles["e"]; + electrons["position"]["x"].resetDataset(ds); + electrons["position"]["y"].resetDataset(ds); + electrons["position"]["z"].resetDataset(ds); + + electrons["positionOffset"]["x"].resetDataset(ds); + electrons["positionOffset"]["y"].resetDataset(ds); + electrons["positionOffset"]["z"].resetDataset(ds); + electrons["positionOffset"]["x"].makeConstant(3.14); + electrons["positionOffset"]["y"].makeConstant(3.14); + electrons["positionOffset"]["z"].makeConstant(3.14); + + ds.dtype = openPMD::determineDatatype(); + electrons.particlePatches["numParticles"][openPMD::RecordComponent::SCALAR] + .resetDataset(ds); + electrons + .particlePatches["numParticlesOffset"][openPMD::RecordComponent::SCALAR] + .resetDataset(ds); + electrons.particlePatches["offset"]["x"].resetDataset(ds); + electrons.particlePatches["offset"]["y"].resetDataset(ds); + electrons.particlePatches["offset"]["z"].resetDataset(ds); + electrons.particlePatches["extent"]["x"].resetDataset(ds); + electrons.particlePatches["extent"]["y"].resetDataset(ds); + electrons.particlePatches["extent"]["z"].resetDataset(ds); +} + +void read() +{ + /* + * The config is entirely optional, these things are also detected + * automatically when reading + */ + + // std::string config = R"( + // { + // "iteration_encoding": "variable_based", + // "toml": { + // "dataset": {"mode": "template"}, + // "attribute": {"mode": "short"} + // } + // } + // )"; + + openPMD::Series read( + "../samples/tomlTemplate." + backendEnding(), + openPMD::Access::READ_LINEAR); + read.readIterations(); // @todo change to read.parseBase() + openPMD::helper::listSeries(read); +} + +int main() +{ + write(); + read(); +} diff --git a/include/openPMD/Dataset.hpp b/include/openPMD/Dataset.hpp index 0032888541..a610ce6a67 100644 --- a/include/openPMD/Dataset.hpp +++ b/include/openPMD/Dataset.hpp @@ -44,7 +44,7 @@ class Dataset JOINED_DIMENSION = std::numeric_limits::max() }; - Dataset(Datatype, Extent, std::string options = "{}"); + Dataset(Datatype, Extent = {1}, std::string options = "{}"); /** * @brief Constructor that sets the datatype to undefined. diff --git a/include/openPMD/Error.hpp b/include/openPMD/Error.hpp index 3e516e16ec..d1762e7e6d 100644 --- a/include/openPMD/Error.hpp +++ b/include/openPMD/Error.hpp @@ -109,6 +109,12 @@ namespace error public: NoSuchAttribute(std::string attributeName); }; + + class IllegalInOpenPMDStandard : public Error + { + public: + IllegalInOpenPMDStandard(std::string what); + }; } // namespace error /** diff --git a/include/openPMD/IO/AbstractIOHandler.hpp b/include/openPMD/IO/AbstractIOHandler.hpp index 1288a87b21..a9f05ce871 100644 --- a/include/openPMD/IO/AbstractIOHandler.hpp +++ b/include/openPMD/IO/AbstractIOHandler.hpp @@ -186,9 +186,11 @@ class AbstractIOHandler { friend class Series; friend class ADIOS2IOHandlerImpl; + friend class JSONIOHandlerImpl; friend class detail::ADIOS2File; private: + std::string m_openPMDVersion; IterationEncoding m_encoding = IterationEncoding::groupBased; void setIterationEncoding(IterationEncoding encoding) diff --git a/include/openPMD/IO/JSON/JSONIOHandler.hpp b/include/openPMD/IO/JSON/JSONIOHandler.hpp index 7cb6870f5b..e22fdb93d1 100644 --- a/include/openPMD/IO/JSON/JSONIOHandler.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandler.hpp @@ -23,6 +23,7 @@ #include "openPMD/IO/AbstractIOHandler.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" +#include "openPMD/auxiliary/JSON_internal.hpp" #if openPMD_HAVE_MPI #include diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index b67ac9138a..fc369047ec 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -180,6 +180,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::string originalExtension); #endif + void init(openPMD::json::TracingJSON config); + ~JSONIOHandlerImpl() override; void @@ -265,8 +267,69 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl */ FileFormat m_fileFormat{}; + std::string backendConfigKey() const; + + /* + * First return value: The location of the JSON value (either "json" or + * "toml") Second return value: The value that was maybe found at this place + */ + std::pair> + getBackendConfig(openPMD::json::TracingJSON &) const; + std::string m_originalExtension; + enum class SpecificationVia + { + DefaultValue, + Manually + }; + + ///////////////////// + // Dataset IO mode // + ///////////////////// + + enum class IOMode + { + Dataset, + Template + }; + + IOMode m_mode = IOMode::Dataset; + SpecificationVia m_IOModeSpecificationVia = SpecificationVia::DefaultValue; + bool m_printedSkippedWriteWarningAlready = false; + + struct DatasetMode + { + IOMode m_IOMode; + SpecificationVia m_specificationVia; + bool m_skipWarnings; + + template + operator std::tuple() + { + return std::tuple{ + m_IOMode, m_specificationVia, m_skipWarnings}; + } + }; + DatasetMode retrieveDatasetMode(openPMD::json::TracingJSON &config) const; + + /////////////////////// + // Attribute IO mode // + /////////////////////// + + enum class AttributeMode + { + Short, + Long + }; + + AttributeMode m_attributeMode = AttributeMode::Long; + SpecificationVia m_attributeModeSpecificationVia = + SpecificationVia::DefaultValue; + + std::pair + retrieveAttributeMode(openPMD::json::TracingJSON &config) const; + // HELPER FUNCTIONS // will use the IOHandler to retrieve the correct directory. @@ -313,7 +376,7 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // essentially: m_i = \prod_{j=0}^{i-1} extent_j static Extent getMultiplicators(Extent const &extent); - static Extent getExtent(nlohmann::json &j); + static std::pair getExtent(nlohmann::json &j); // remove single '/' in the beginning and end of a string static std::string removeSlashes(std::string); @@ -371,7 +434,7 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // check whether the json reference contains a valid dataset template - void verifyDataset(Param const ¶meters, nlohmann::json &); + IOMode verifyDataset(Param const ¶meters, nlohmann::json &); static nlohmann::json platformSpecifics(); diff --git a/include/openPMD/RecordComponent.hpp b/include/openPMD/RecordComponent.hpp index ebb5a80ca8..1563226d5e 100644 --- a/include/openPMD/RecordComponent.hpp +++ b/include/openPMD/RecordComponent.hpp @@ -173,7 +173,7 @@ class RecordComponent : public BaseRecordComponent * * @return RecordComponent& */ - virtual RecordComponent &resetDataset(Dataset); + RecordComponent &resetDataset(Dataset); uint8_t getDimensionality() const; Extent getExtent() const; diff --git a/include/openPMD/RecordComponent.tpp b/include/openPMD/RecordComponent.tpp index 0a4086e3d8..d5cead6d3b 100644 --- a/include/openPMD/RecordComponent.tpp +++ b/include/openPMD/RecordComponent.tpp @@ -21,6 +21,8 @@ #pragma once +#include "openPMD/Datatype.hpp" +#include "openPMD/Error.hpp" #include "openPMD/RecordComponent.hpp" #include "openPMD/Span.hpp" #include "openPMD/auxiliary/Memory.hpp" @@ -93,12 +95,38 @@ inline std::shared_ptr RecordComponent::loadChunk(Offset o, Extent e) #endif } +namespace detail +{ + template + struct do_convert + { + template + static std::optional call(Attribute &attr) + { + if constexpr (std::is_convertible_v) + { + return std::make_optional(attr.get()); + } + else + { + return std::nullopt; + } + } + + static constexpr char const *errorMsg = "is_conversible"; + }; +} // namespace detail + template inline void RecordComponent::loadChunk(std::shared_ptr data, Offset o, Extent e) { Datatype dtype = determineDatatype(data); - if (dtype != getDatatype()) + /* + * For constant components, we implement type conversion, so there is + * a separate check further below. + */ + if (dtype != getDatatype() && !constant()) if (!isSameInteger(getDatatype()) && !isSameFloatingPoint(getDatatype()) && !isSameComplexFloatingPoint(getDatatype()) && @@ -160,10 +188,25 @@ RecordComponent::loadChunk(std::shared_ptr data, Offset o, Extent e) for (auto const &dimensionSize : extent) numPoints *= dimensionSize; - T value = rc.m_constantValue.get(); + std::optional val = + switchNonVectorType>( + /* from = */ getDatatype(), rc.m_constantValue); - T *raw_ptr = data.get(); - std::fill(raw_ptr, raw_ptr + numPoints, value); + if (val.has_value()) + { + T *raw_ptr = data.get(); + std::fill(raw_ptr, raw_ptr + numPoints, *val); + } + else + { + std::string const data_type_str = datatypeToString(getDatatype()); + std::string const requ_type_str = + datatypeToString(determineDatatype()); + std::string err_msg = + "Type conversion during chunk loading not possible! "; + err_msg += "Data: " + data_type_str + "; Load as: " + requ_type_str; + throw error::WrongAPIUsage(err_msg); + } } else { diff --git a/include/openPMD/backend/PatchRecordComponent.hpp b/include/openPMD/backend/PatchRecordComponent.hpp index 63875b11e2..5c0cf6bfe7 100644 --- a/include/openPMD/backend/PatchRecordComponent.hpp +++ b/include/openPMD/backend/PatchRecordComponent.hpp @@ -66,8 +66,6 @@ class PatchRecordComponent : public RecordComponent PatchRecordComponent &setUnitSI(double); - PatchRecordComponent &resetDataset(Dataset) override; - uint8_t getDimensionality() const; Extent getExtent() const; diff --git a/include/openPMD/version.hpp b/include/openPMD/version.hpp index c57e3ecf17..b976da1c91 100644 --- a/include/openPMD/version.hpp +++ b/include/openPMD/version.hpp @@ -37,11 +37,20 @@ * compile-time) * @{ */ -#define OPENPMD_STANDARD_MAJOR 1 -#define OPENPMD_STANDARD_MINOR 1 +#define OPENPMD_STANDARD_MAJOR 2 +#define OPENPMD_STANDARD_MINOR 0 #define OPENPMD_STANDARD_PATCH 0 /** @} */ +/** maximum supported version of the openPMD standard (read & write, + * compile-time) + * @{ + */ +#define OPENPMD_STANDARD_DEFAULT_MAJOR 1 +#define OPENPMD_STANDARD_DEFAULT_MINOR 1 +#define OPENPMD_STANDARD_DEFAULT_PATCH 0 +/** @} */ + /** minimum supported version of the openPMD standard (read, compile-time) * @{ */ @@ -77,7 +86,17 @@ std::string getVersion(); * * @return std::string openPMD standard version (dot separated) */ -std::string getStandard(); +[[deprecated( + "Deprecated due to unclear semantics. Use one of getStandardMinimum, " + "getStandardMaximum() or getStandardDefault instead.")]] std::string +getStandard(); + +/** Return the default used version of the openPMD standard (read & write, + * run-time) + * + * @return std::string openPMD standard version (dot separated) + */ +std::string getStandardDefault(); /** Return the minimum supported version of the openPMD standard (read, * run-time) @@ -86,6 +105,13 @@ std::string getStandard(); */ std::string getStandardMinimum(); +/** Return the minimum supported version of the openPMD standard (read, + * run-time) + * + * @return std::string minimum openPMD standard version (dot separated) + */ +std::string getStandardMaximum(); + /** Return the feature variants of the openPMD-api library (run-time) * * @return std::map< std::string, bool > with variants such as backends diff --git a/src/Error.cpp b/src/Error.cpp index f2e27a0213..dbc13f40b0 100644 --- a/src/Error.cpp +++ b/src/Error.cpp @@ -122,6 +122,12 @@ namespace error , description(std::move(description_in)) {} + IllegalInOpenPMDStandard::IllegalInOpenPMDStandard(std::string what_in) + : Error( + "Operation leads to illegal use of the openPMD standard:\n" + + std::move(what_in)) + {} + void throwReadError( AffectedObject affectedObject, Reason reason, diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index e06aa36ed8..f4b17569cb 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -30,6 +30,7 @@ #include "openPMD/auxiliary/Memory.hpp" #include "openPMD/auxiliary/StringManip.hpp" #include "openPMD/auxiliary/TypeTraits.hpp" +#include "openPMD/backend/Attribute.hpp" #include "openPMD/backend/Writable.hpp" #include @@ -63,6 +64,14 @@ namespace openPMD throw std::runtime_error((TEXT)); \ } +namespace JSONDefaults +{ + using const_str = char const *const; + constexpr const_str openpmd_internal = "__openPMD_internal"; + constexpr const_str IOMode = "dataset_mode"; + constexpr const_str AttributeMode = "attribute_mode"; +} // namespace JSONDefaults + namespace { struct DefaultValue @@ -125,34 +134,307 @@ namespace } return *accum_ptr; } + + void warnUnusedJson(openPMD::json::TracingJSON const &jsonConfig) + { + auto shadow = jsonConfig.invertShadow(); + if (shadow.size() > 0) + { + switch (jsonConfig.originallySpecifiedAs) + { + case openPMD::json::SupportedLanguages::JSON: + std::cerr << "Warning: parts of the backend configuration for " + "JSON/TOML backend remain unused:\n" + << shadow << std::endl; + break; + case openPMD::json::SupportedLanguages::TOML: { + auto asToml = openPMD::json::jsonToToml(shadow); + std::cerr << "Warning: parts of the backend configuration for " + "JSON/TOML backend remain unused:\n" + << json::format_toml(asToml) << std::endl; + break; + } + } + } + } + + // Does the same as datatypeToString(), but this makes sure that we don't + // accidentally change the JSON schema by modifying datatypeToString() + std::string jsonDatatypeToString(Datatype dt) + { + switch (dt) + { + using DT = Datatype; + case DT::CHAR: + return "CHAR"; + case DT::UCHAR: + return "UCHAR"; + case DT::SCHAR: + return "SCHAR"; + case DT::SHORT: + return "SHORT"; + case DT::INT: + return "INT"; + case DT::LONG: + return "LONG"; + case DT::LONGLONG: + return "LONGLONG"; + case DT::USHORT: + return "USHORT"; + case DT::UINT: + return "UINT"; + case DT::ULONG: + return "ULONG"; + case DT::ULONGLONG: + return "ULONGLONG"; + case DT::FLOAT: + return "FLOAT"; + case DT::DOUBLE: + return "DOUBLE"; + case DT::LONG_DOUBLE: + return "LONG_DOUBLE"; + case DT::CFLOAT: + return "CFLOAT"; + case DT::CDOUBLE: + return "CDOUBLE"; + case DT::CLONG_DOUBLE: + return "CLONG_DOUBLE"; + case DT::STRING: + return "STRING"; + case DT::VEC_CHAR: + return "VEC_CHAR"; + case DT::VEC_SHORT: + return "VEC_SHORT"; + case DT::VEC_INT: + return "VEC_INT"; + case DT::VEC_LONG: + return "VEC_LONG"; + case DT::VEC_LONGLONG: + return "VEC_LONGLONG"; + case DT::VEC_UCHAR: + return "VEC_UCHAR"; + case DT::VEC_USHORT: + return "VEC_USHORT"; + case DT::VEC_UINT: + return "VEC_UINT"; + case DT::VEC_ULONG: + return "VEC_ULONG"; + case DT::VEC_ULONGLONG: + return "VEC_ULONGLONG"; + case DT::VEC_FLOAT: + return "VEC_FLOAT"; + case DT::VEC_DOUBLE: + return "VEC_DOUBLE"; + case DT::VEC_LONG_DOUBLE: + return "VEC_LONG_DOUBLE"; + case DT::VEC_CFLOAT: + return "VEC_CFLOAT"; + case DT::VEC_CDOUBLE: + return "VEC_CDOUBLE"; + case DT::VEC_CLONG_DOUBLE: + return "VEC_CLONG_DOUBLE"; + case DT::VEC_SCHAR: + return "VEC_SCHAR"; + case DT::VEC_STRING: + return "VEC_STRING"; + case DT::ARR_DBL_7: + return "ARR_DBL_7"; + case DT::BOOL: + return "BOOL"; + case DT::UNDEFINED: + return "UNDEFINED"; + } + return "Unreachable!"; + } } // namespace +auto JSONIOHandlerImpl::retrieveDatasetMode( + openPMD::json::TracingJSON &config) const -> DatasetMode +{ + IOMode ioMode = m_mode; + SpecificationVia specificationVia = SpecificationVia::DefaultValue; + bool skipWarnings = false; + if (auto [configLocation, maybeConfig] = getBackendConfig(config); + maybeConfig.has_value()) + { + auto jsonConfig = maybeConfig.value(); + if (jsonConfig.json().contains("dataset")) + { + auto datasetConfig = jsonConfig["dataset"]; + if (datasetConfig.json().contains("mode")) + { + auto modeOption = openPMD::json::asLowerCaseStringDynamic( + datasetConfig["mode"].json()); + if (!modeOption.has_value()) + { + throw error::BackendConfigSchema( + {configLocation, "mode"}, + "Invalid value of non-string type (accepted values are " + "'dataset' and 'template'."); + } + auto mode = modeOption.value(); + if (mode == "dataset") + { + ioMode = IOMode::Dataset; + specificationVia = SpecificationVia::Manually; + } + else if (mode == "template") + { + ioMode = IOMode::Template; + specificationVia = SpecificationVia::Manually; + } + else if (mode == "template_no_warn") + { + ioMode = IOMode::Template; + specificationVia = SpecificationVia::Manually; + skipWarnings = true; + } + else + { + throw error::BackendConfigSchema( + {configLocation, "dataset", "mode"}, + "Invalid value: '" + mode + + "' (accepted values are 'dataset' and 'template'."); + } + } + } + } + return DatasetMode{ioMode, specificationVia, skipWarnings}; +} + +auto JSONIOHandlerImpl::retrieveAttributeMode( + openPMD::json::TracingJSON &config) const + -> std::pair +{ + AttributeMode res = m_attributeMode; + SpecificationVia res_2 = SpecificationVia::DefaultValue; + if (auto [configLocation, maybeConfig] = getBackendConfig(config); + maybeConfig.has_value()) + { + auto jsonConfig = maybeConfig.value(); + if (jsonConfig.json().contains("attribute")) + { + auto attributeConfig = jsonConfig["attribute"]; + if (attributeConfig.json().contains("mode")) + { + auto modeOption = openPMD::json::asLowerCaseStringDynamic( + attributeConfig["mode"].json()); + if (!modeOption.has_value()) + { + throw error::BackendConfigSchema( + {configLocation, "mode"}, + "Invalid value of non-string type (accepted values are " + "'dataset' and 'template'."); + } + auto mode = modeOption.value(); + if (mode == "short") + { + res = AttributeMode::Short; + res_2 = SpecificationVia::Manually; + } + else if (mode == "long") + { + res = AttributeMode::Long; + res_2 = SpecificationVia::Manually; + } + else + { + throw error::BackendConfigSchema( + {configLocation, "attribute", "mode"}, + "Invalid value: '" + mode + + "' (accepted values are 'short' and 'long'."); + } + } + } + } + return std::make_pair(res, res_2); +} + +std::string JSONIOHandlerImpl::backendConfigKey() const +{ + switch (m_fileFormat) + { + case FileFormat::Json: + return "json"; + case FileFormat::Toml: + return "toml"; + } + throw std::runtime_error("Unreachable!"); +} + +std::pair> +JSONIOHandlerImpl::getBackendConfig(openPMD::json::TracingJSON &config) const +{ + std::string configLocation = backendConfigKey(); + if (config.json().contains(configLocation)) + { + return std::make_pair( + std::move(configLocation), config[configLocation]); + } + else + { + return std::make_pair(std::move(configLocation), std::nullopt); + } +} + JSONIOHandlerImpl::JSONIOHandlerImpl( AbstractIOHandler *handler, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - [[maybe_unused]] openPMD::json::TracingJSON config, + openPMD::json::TracingJSON config, FileFormat format, std::string originalExtension) : AbstractIOHandlerImpl(handler) , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} -{} +{ + init(std::move(config)); +} #if openPMD_HAVE_MPI JSONIOHandlerImpl::JSONIOHandlerImpl( AbstractIOHandler *handler, MPI_Comm comm, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - [[maybe_unused]] openPMD::json::TracingJSON config, + openPMD::json::TracingJSON config, FileFormat format, std::string originalExtension) : AbstractIOHandlerImpl(handler) , m_communicator{comm} , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} -{} +{ + init(std::move(config)); +} #endif +void JSONIOHandlerImpl::init(openPMD::json::TracingJSON config) +{ + // set the defaults + switch (m_fileFormat) + { + case FileFormat::Json: + // @todo take the switch to openPMD 2.0 as a chance to switch to + // short attribute mode as a default here + m_attributeMode = AttributeMode::Long; + m_mode = IOMode::Dataset; + break; + case FileFormat::Toml: + m_attributeMode = AttributeMode::Short; + m_mode = IOMode::Template; + break; + } + std::tie( + m_mode, m_IOModeSpecificationVia, m_printedSkippedWriteWarningAlready) = + retrieveDatasetMode(config); + std::tie(m_attributeMode, m_attributeModeSpecificationVia) = + retrieveAttributeMode(config); + + if (auto [_, backendConfig] = getBackendConfig(config); + backendConfig.has_value()) + { + (void)_; + warnUnusedJson(backendConfig.value()); + } +} + JSONIOHandlerImpl::~JSONIOHandlerImpl() = default; std::future JSONIOHandlerImpl::flush() @@ -173,6 +455,22 @@ void JSONIOHandlerImpl::createFile( access::write(m_handler->m_backendAccess), "[JSON] Creating a file in read-only mode is not possible."); + if (m_attributeModeSpecificationVia == SpecificationVia::DefaultValue) + { + switch (m_fileFormat) + { + + case FileFormat::Json: + m_attributeMode = m_handler->m_openPMDVersion >= "2." + ? AttributeMode::Short + : AttributeMode::Long; + break; + case FileFormat::Toml: + m_attributeMode = AttributeMode::Short; + break; + } + } + if (!writable->written) { std::string name = parameters.name + m_originalExtension; @@ -286,6 +584,24 @@ void JSONIOHandlerImpl::createDataset( "ADIOS1", "Joined Arrays currently only supported in ADIOS2"); } + openPMD::json::TracingJSON config = openPMD::json::parseOptions( + parameter.options, /* considerFiles = */ false); + // Retrieves mode from dataset-specific configuration, falls back to global + // value if not defined + auto [localMode, _, skipWarnings] = retrieveDatasetMode(config); + (void)_; + // No use in introducing logic to skip warnings only for one particular + // dataset. If warnings are skipped, then they are skipped consistently. + // Use |= since `false` is the default value and we don't wish to reset + // the flag. + m_printedSkippedWriteWarningAlready |= skipWarnings; + + parameter.warnUnusedParameters( + config, + backendConfigKey(), + "Warning: parts of the dataset-specific backend configuration for " + "JSON/TOML backend remain unused"); + if (!writable->written) { /* Sanitize name */ @@ -302,24 +618,44 @@ void JSONIOHandlerImpl::createDataset( } setAndGetFilePosition(writable, name); auto &dset = jsonVal[name]; - dset["datatype"] = datatypeToString(parameter.dtype); - auto extent = parameter.extent; - switch (parameter.dtype) + dset["datatype"] = jsonDatatypeToString(parameter.dtype); + + switch (localMode) { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: { - extent.push_back(2); + case IOMode::Dataset: { + auto extent = parameter.extent; + switch (parameter.dtype) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: { + extent.push_back(2); + break; + } + default: + break; + } + // TOML does not support nulls, so initialize with zero + dset["data"] = initializeNDArray( + extent, + m_fileFormat == FileFormat::Json ? std::optional{} + : parameter.dtype); break; } - default: + case IOMode::Template: + if (parameter.extent != Extent{0} && + parameter.dtype != Datatype::UNDEFINED) + { + dset["extent"] = parameter.extent; + } + else + { + // no-op + // If extent is empty or no datatype is defined, don't bother + // writing it + } break; } - // TOML does not support nulls, so initialize with zero - dset["data"] = initializeNDArray( - extent, - m_fileFormat == FileFormat::Json ? std::optional() - : parameter.dtype); writable->written = true; m_dirty.emplace(file); } @@ -358,9 +694,11 @@ void JSONIOHandlerImpl::extendDataset( refreshFileFromParent(writable); auto &j = obtainJsonContents(writable); + IOMode localIOMode; try { - auto datasetExtent = getExtent(j); + Extent datasetExtent; + std::tie(datasetExtent, localIOMode) = getExtent(j); VERIFY_ALWAYS( datasetExtent.size() == parameters.extent.size(), "[JSON] Cannot change dimensionality of a dataset") @@ -377,28 +715,40 @@ void JSONIOHandlerImpl::extendDataset( throw std::runtime_error( "[JSON] The specified location contains no valid dataset"); } - auto extent = parameters.extent; - auto datatype = stringToDatatype(j["datatype"].get()); - switch (datatype) + + switch (localIOMode) { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: { - extent.push_back(2); - break; + case IOMode::Dataset: { + auto extent = parameters.extent; + auto datatype = stringToDatatype(j["datatype"].get()); + switch (datatype) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: { + extent.push_back(2); + break; + } + default: + // nothing to do + break; + } + // TOML does not support nulls, so initialize with zero + nlohmann::json newData = initializeNDArray( + extent, + m_fileFormat == FileFormat::Json ? std::optional{} + : datatype); + nlohmann::json &oldData = j["data"]; + mergeInto(newData, oldData); + j["data"] = newData; } - default: - // nothing to do - break; + break; + case IOMode::Template: { + j["extent"] = parameters.extent; + } + break; } - // TOML does not support nulls, so initialize with zero - nlohmann::json newData = initializeNDArray( - extent, - m_fileFormat == FileFormat::Json ? std::optional() - : datatype); - nlohmann::json &oldData = j["data"]; - mergeInto(newData, oldData); - j["data"] = newData; + writable->written = true; } @@ -694,7 +1044,7 @@ void JSONIOHandlerImpl::openDataset( *parameters.dtype = Datatype(stringToDatatype(datasetJson["datatype"].get())); - *parameters.extent = getExtent(datasetJson); + *parameters.extent = getExtent(datasetJson).first; writable->written = true; } @@ -877,7 +1227,21 @@ void JSONIOHandlerImpl::writeDataset( auto file = refreshFileFromParent(writable); auto &j = obtainJsonContents(writable); - verifyDataset(parameters, j); + switch (verifyDataset(parameters, j)) + { + case IOMode::Dataset: + break; + case IOMode::Template: + if (!m_printedSkippedWriteWarningAlready) + { + std::cerr + << "[JSON/TOML backend: Warning] Trying to write data to a " + "template dataset. Will skip." + << std::endl; + m_printedSkippedWriteWarningAlready = true; + } + return; + } switchType(parameters.dtype, j, parameters); @@ -913,30 +1277,262 @@ void JSONIOHandlerImpl::writeAttribute( } nlohmann::json value; switchType(parameter.dtype, value, parameter.resource); - (*jsonVal)[filePosition->id]["attributes"][parameter.name] = { - {"datatype", datatypeToString(parameter.dtype)}, {"value", value}}; + switch (m_attributeMode) + { + case AttributeMode::Long: + (*jsonVal)[filePosition->id]["attributes"][parameter.name] = { + {"datatype", jsonDatatypeToString(parameter.dtype)}, + {"value", value}}; + break; + case AttributeMode::Short: + // short form + (*jsonVal)[filePosition->id]["attributes"][parameter.name] = value; + break; + } writable->written = true; m_dirty.emplace(file); } +namespace +{ + struct FillWithZeroes + { + template + static void call(void *ptr, Extent const &extent) + { + T *casted = static_cast(ptr); + size_t flattenedExtent = std::accumulate( + extent.begin(), + extent.end(), + size_t(1), + [](size_t left, size_t right) { return left * right; }); + std::fill_n(casted, flattenedExtent, T{}); + } + + static constexpr char const *errorMsg = + "[JSON Backend] Fill with zeroes."; + }; +} // namespace + void JSONIOHandlerImpl::readDataset( Writable *writable, Parameter ¶meters) { refreshFileFromParent(writable); setAndGetFilePosition(writable); auto &j = obtainJsonContents(writable); - verifyDataset(parameters, j); + IOMode localMode = verifyDataset(parameters, j); - try + switch (localMode) { - switchType(parameters.dtype, j["data"], parameters); + case IOMode::Template: + std::cerr << "[Warning] Cannot read chunks in Template mode of JSON " + "backend. Will fill with zeroes instead." + << std::endl; + switchNonVectorType( + parameters.dtype, parameters.data.get(), parameters.extent); + return; + case IOMode::Dataset: + try + { + switchType(parameters.dtype, j["data"], parameters); + } + catch (json::basic_json::type_error &) + { + throw std::runtime_error( + "[JSON] The given path does not contain a valid dataset."); + } + break; } - catch (json::basic_json::type_error &) +} + +namespace +{ + template + Attribute recoverVectorAttributeFromJson(nlohmann::json const &j) { - throw std::runtime_error( - "[JSON] The given path does not contain a valid dataset."); + if (!j.is_array()) + { + throw std::runtime_error( + "[JSON backend: recoverVectorAttributeFromJson] Internal " + "control flow error."); + } + + if (j.size() == 7 && + (std::is_same_v || + std::is_same_v || + std::is_same_v)) + { + /* + * The frontend must deal with wrong type reports here. + */ + std::array res; + for (size_t i = 0; i < 7; ++i) + { + res[i] = j[i].get(); + } + return res; + } + else + { + std::vector res; + res.reserve(j.size()); + for (auto const &i : j) + { + res.push_back(i.get()); + } + return res; + } } -} + + nlohmann::json::value_t unifyNumericType(nlohmann::json const &j) + { + if (!j.is_array() || j.empty()) + { + throw std::runtime_error( + "[JSON backend: recoverVectorAttributeFromJson] Internal " + "control flow error."); + } + auto dtypeRanking = [](nlohmann::json::value_t dtype) -> unsigned { + switch (dtype) + { + case nlohmann::json::value_t::number_unsigned: + return 0; + case nlohmann::json::value_t::number_integer: + return 1; + case nlohmann::json::value_t::number_float: + return 2; + default: + throw std::runtime_error( + "[JSON backend] Encountered vector with mixed number and " + "non-number datatypes."); + } + }; + auto higherDtype = + [&dtypeRanking]( + nlohmann::json::value_t dt1, + nlohmann::json::value_t dt2) -> nlohmann::json::value_t { + if (dtypeRanking(dt1) > dtypeRanking(dt2)) + { + return dt1; + } + else + { + return dt2; + } + }; + + nlohmann::json::value_t res = j[0].type(); + for (size_t i = 1; i < j.size(); ++i) + { + res = higherDtype(res, j[i].type()); + } + return res; + } + + Attribute recoverAttributeFromJson( + nlohmann::json const &j, std::string const &nameForErrorMessages) + { + // @todo use ReadError once it's mainlined + switch (j.type()) + { + case nlohmann::json::value_t::null: + throw std::runtime_error( + "[JSON backend] Attribute must not be null: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::object: + throw std::runtime_error( + "[JSON backend] Shorthand-style attribute must not be an " + "object: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::array: + if (j.empty()) + { + std::cerr << "Cannot recover datatype of empty vector without " + "explicit type annotation for attribute '" + << nameForErrorMessages + << "'. Will continue with VEC_INT datatype." + << std::endl; + return std::vector{}; + } + else + { + auto valueType = j[0].type(); + /* + * If the vector is of numeric type, it might happen that the + * first entry is an integer, but a later entry is a float. + * We need to pick the most generic datatype in that case. + */ + if (valueType == nlohmann::json::value_t::number_float || + valueType == nlohmann::json::value_t::number_unsigned || + valueType == nlohmann::json::value_t::number_integer) + { + valueType = unifyNumericType(j); + } + switch (valueType) + { + case nlohmann::json::value_t::null: + throw std::runtime_error( + "[JSON backend] Attribute must not be null: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::object: + throw std::runtime_error( + "[JSON backend] Invalid contained datatype (object) " + "inside vector-type attribute: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::array: + throw std::runtime_error( + "[JSON backend] Invalid contained datatype (array) " + "inside vector-type attribute: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::string: + return recoverVectorAttributeFromJson(j); + case nlohmann::json::value_t::boolean: + throw std::runtime_error( + "[JSON backend] Attribute must not be vector of bool: " + "'" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::number_integer: + return recoverVectorAttributeFromJson< + nlohmann::json::number_integer_t>(j); + case nlohmann::json::value_t::number_unsigned: + return recoverVectorAttributeFromJson< + nlohmann::json::number_unsigned_t>(j); + case nlohmann::json::value_t::number_float: + return recoverVectorAttributeFromJson< + nlohmann::json::number_float_t>(j); + case nlohmann::json::value_t::binary: + throw std::runtime_error( + "[JSON backend] Attribute must not have binary type: " + "'" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::discarded: + throw std::runtime_error( + "Internal JSON parser datatype leaked into JSON " + "value."); + } + throw std::runtime_error("Unreachable!"); + } + case nlohmann::json::value_t::string: + return j.get(); + case nlohmann::json::value_t::boolean: + return j.get(); + case nlohmann::json::value_t::number_integer: + return j.get(); + case nlohmann::json::value_t::number_unsigned: + return j.get(); + case nlohmann::json::value_t::number_float: + return j.get(); + case nlohmann::json::value_t::binary: + throw std::runtime_error( + "[JSON backend] Attribute must not have binary type: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::discarded: + throw std::runtime_error( + "Internal JSON parser datatype leaked into JSON value."); + } + throw std::runtime_error("Unreachable!"); + } +} // namespace void JSONIOHandlerImpl::readAttribute( Writable *writable, Parameter ¶meters) @@ -962,9 +1558,19 @@ void JSONIOHandlerImpl::readAttribute( auto &j = jsonLoc[name]; try { - *parameters.dtype = - Datatype(stringToDatatype(j["datatype"].get())); - switchType(*parameters.dtype, j["value"], parameters); + if (j.is_object()) + { + *parameters.dtype = + Datatype(stringToDatatype(j["datatype"].get())); + switchType( + *parameters.dtype, j["value"], parameters); + } + else + { + Attribute attr = recoverAttributeFromJson(j, name); + *parameters.dtype = attr.dtype; + *parameters.resource = attr.getResource(); + } } catch (json::type_error &) { @@ -1182,28 +1788,44 @@ Extent JSONIOHandlerImpl::getMultiplicators(Extent const &extent) return res; } -Extent JSONIOHandlerImpl::getExtent(nlohmann::json &j) +auto JSONIOHandlerImpl::getExtent(nlohmann::json &j) + -> std::pair { Extent res; - nlohmann::json *ptr = &j["data"]; - while (ptr->is_array()) + IOMode ioMode; + if (j.contains("data")) { - res.push_back(ptr->size()); - ptr = &(*ptr)[0]; + ioMode = IOMode::Dataset; + nlohmann::json *ptr = &j["data"]; + while (ptr->is_array()) + { + res.push_back(ptr->size()); + ptr = &(*ptr)[0]; + } + switch (stringToDatatype(j["datatype"].get())) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: + // the last "dimension" is only the two entries for the complex + // number, so remove that again + res.erase(res.end() - 1); + break; + default: + break; + } } - switch (stringToDatatype(j["datatype"].get())) + else if (j.contains("extent")) { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: - // the last "dimension" is only the two entries for the complex - // number, so remove that again - res.erase(res.end() - 1); - break; - default: - break; + ioMode = IOMode::Template; + res = j["extent"].get(); } - return res; + else + { + ioMode = IOMode::Template; + res = {0}; + } + return std::make_pair(std::move(res), ioMode); } std::string JSONIOHandlerImpl::removeSlashes(std::string s) @@ -1340,6 +1962,73 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) auto res = serialImplementation(); #endif + if (res->contains(JSONDefaults::openpmd_internal)) + { + auto const &openpmd_internal = res->at(JSONDefaults::openpmd_internal); + + // Init dataset mode according to file's default + if (m_IOModeSpecificationVia == SpecificationVia::DefaultValue && + openpmd_internal.contains(JSONDefaults::IOMode)) + { + auto modeOption = openPMD::json::asLowerCaseStringDynamic( + openpmd_internal.at(JSONDefaults::IOMode)); + if (!modeOption.has_value()) + { + std::cerr + << "[JSON/TOML backend] Warning: Invalid value of " + "non-string type at internal meta table for entry '" + << JSONDefaults::IOMode << "'. Will ignore and continue." + << std::endl; + } + else if (modeOption.value() == "dataset") + { + m_mode = IOMode::Dataset; + } + else if (modeOption.value() == "template") + { + m_mode = IOMode::Template; + } + else + { + std::cerr << "[JSON/TOML backend] Warning: Invalid value '" + << modeOption.value() + << "' at internal meta table for entry '" + << JSONDefaults::IOMode + << "'. Will ignore and continue." << std::endl; + } + } + + if (m_IOModeSpecificationVia == SpecificationVia::DefaultValue && + openpmd_internal.contains(JSONDefaults::AttributeMode)) + { + auto modeOption = openPMD::json::asLowerCaseStringDynamic( + openpmd_internal.at(JSONDefaults::AttributeMode)); + if (!modeOption.has_value()) + { + std::cerr + << "[JSON/TOML backend] Warning: Invalid value of " + "non-string type at internal meta table for entry '" + << JSONDefaults::AttributeMode + << "'. Will ignore and continue." << std::endl; + } + else if (modeOption.value() == "long") + { + m_attributeMode = AttributeMode::Long; + } + else if (modeOption.value() == "short") + { + m_attributeMode = AttributeMode::Short; + } + else + { + std::cerr << "[JSON/TOML backend] Warning: Invalid value '" + << modeOption.value() + << "' at internal meta table for entry '" + << JSONDefaults::IOMode + << "'. Will ignore and continue." << std::endl; + } + } + } m_jsonVals.emplace(file, res); return res; } @@ -1365,7 +2054,30 @@ auto JSONIOHandlerImpl::putJsonContents( return it; } - (*it->second)["platform_byte_widths"] = platformSpecifics(); + switch (m_mode) + { + case IOMode::Dataset: + (*it->second)["platform_byte_widths"] = platformSpecifics(); + (*it->second)[JSONDefaults::openpmd_internal][JSONDefaults::IOMode] = + "dataset"; + break; + case IOMode::Template: + (*it->second)[JSONDefaults::openpmd_internal][JSONDefaults::IOMode] = + "template"; + break; + } + + switch (m_attributeMode) + { + case AttributeMode::Short: + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::AttributeMode] = "short"; + break; + case AttributeMode::Long: + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::AttributeMode] = "long"; + break; + } auto writeSingleFile = [this, &it](std::string const &writeThisFile) { auto [fh, _, fh_with_precision] = @@ -1482,6 +2194,7 @@ merge the .json files somehow (no tooling provided for this (yet)). #else serialImplementation(); #endif + if (unsetDirty) { m_dirty.erase(filename); @@ -1568,8 +2281,8 @@ bool JSONIOHandlerImpl::isDataset(nlohmann::json const &j) { return false; } - auto i = j.find("data"); - return i != j.end() && i.value().is_array(); + auto i = j.find("datatype"); + return i != j.end() && i.value().is_string(); } bool JSONIOHandlerImpl::isGroup(nlohmann::json::const_iterator const &it) @@ -1580,21 +2293,24 @@ bool JSONIOHandlerImpl::isGroup(nlohmann::json::const_iterator const &it) { return false; } - auto i = j.find("data"); - return i == j.end() || !i.value().is_array(); + + auto i = j.find("datatype"); + return i == j.end() || !i.value().is_string(); } template -void JSONIOHandlerImpl::verifyDataset( - Param const ¶meters, nlohmann::json &j) +auto JSONIOHandlerImpl::verifyDataset( + Param const ¶meters, nlohmann::json &j) -> IOMode { VERIFY_ALWAYS( isDataset(j), "[JSON] Specified dataset does not exist or is not a dataset."); + IOMode res; try { - auto datasetExtent = getExtent(j); + Extent datasetExtent; + std::tie(datasetExtent, res) = getExtent(j); VERIFY_ALWAYS( datasetExtent.size() == parameters.extent.size(), "[JSON] Read/Write request does not fit the dataset's dimension"); @@ -1616,6 +2332,7 @@ void JSONIOHandlerImpl::verifyDataset( throw std::runtime_error( "[JSON] The given path does not contain a valid dataset."); } + return res; } nlohmann::json JSONIOHandlerImpl::platformSpecifics() @@ -1641,7 +2358,7 @@ nlohmann::json JSONIOHandlerImpl::platformSpecifics() Datatype::BOOL}; for (auto it = std::begin(datatypes); it != std::end(datatypes); it++) { - res[datatypeToString(*it)] = toBytes(*it); + res[jsonDatatypeToString(*it)] = toBytes(*it); } return res; } @@ -1697,7 +2414,7 @@ nlohmann::json JSONIOHandlerImpl::CppToJSON::operator()(const T &val) } template -nlohmann::json JSONIOHandlerImpl::CppToJSON >::operator()( +nlohmann::json JSONIOHandlerImpl::CppToJSON>::operator()( const std::vector &v) { nlohmann::json j; @@ -1710,7 +2427,7 @@ nlohmann::json JSONIOHandlerImpl::CppToJSON >::operator()( } template -nlohmann::json JSONIOHandlerImpl::CppToJSON >::operator()( +nlohmann::json JSONIOHandlerImpl::CppToJSON>::operator()( const std::array &v) { nlohmann::json j; @@ -1729,7 +2446,7 @@ T JSONIOHandlerImpl::JsonToCpp::operator()(nlohmann::json const &json) } template -std::vector JSONIOHandlerImpl::JsonToCpp >::operator()( +std::vector JSONIOHandlerImpl::JsonToCpp>::operator()( nlohmann::json const &json) { std::vector v; @@ -1742,7 +2459,7 @@ std::vector JSONIOHandlerImpl::JsonToCpp >::operator()( } template -std::array JSONIOHandlerImpl::JsonToCpp >::operator()( +std::array JSONIOHandlerImpl::JsonToCpp>::operator()( nlohmann::json const &json) { std::array a; diff --git a/src/RecordComponent.cpp b/src/RecordComponent.cpp index 0387268514..fc17909fc6 100644 --- a/src/RecordComponent.cpp +++ b/src/RecordComponent.cpp @@ -104,15 +104,20 @@ RecordComponent &RecordComponent::resetDataset(Dataset d) rc.m_hasBeenExtended = true; } - if (d.dtype == Datatype::UNDEFINED) + if (d.extent.empty()) + throw std::runtime_error("Dataset extent must be at least 1D."); + if (d.empty()) { - throw error::WrongAPIUsage( - "[RecordComponent] Must set specific datatype."); + if (d.dtype != Datatype::UNDEFINED) + { + return makeEmpty(std::move(d)); + } + else + { + rc.m_dataset = std::move(d); + return *this; + } } - // if( d.extent.empty() ) - // throw std::runtime_error("Dataset extent must be at least 1D."); - if (d.empty()) - return makeEmpty(std::move(d)); rc.m_isEmpty = false; if (written()) diff --git a/src/Series.cpp b/src/Series.cpp index d587575b44..9aa610f5ce 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -139,7 +139,13 @@ std::string Series::openPMD() const Series &Series::setOpenPMD(std::string const &o) { + if (o >= "2.0") + { + std::cerr << "[Warning] openPMD 2.0 is still under development." + << std::endl; + } setAttribute("openPMD", o); + IOHandler()->m_openPMDVersion = o; return *this; } @@ -162,9 +168,10 @@ std::string Series::basePath() const Series &Series::setBasePath(std::string const &bp) { std::string version = openPMD(); - if (version == "1.0.0" || version == "1.0.1" || version == "1.1.0") + if (version == "1.0.0" || version == "1.0.1" || version == "1.1.0" || + version == "2.0.0") throw std::runtime_error( - "Custom basePath not allowed in openPMD <=1.1.0"); + "Custom basePath not allowed in openPMD <=2.0"); setAttribute("basePath", bp); return *this; @@ -1222,7 +1229,7 @@ void Series::initDefaults(IterationEncoding ie, bool initAll) } } if (!containsAttribute("openPMD")) - setOpenPMD(getStandard()); + setOpenPMD(getStandardDefault()); /* * In Append mode, only init the rest of the defaults after checking that * the file does not yet exist to avoid overriding more than needed. @@ -1828,7 +1835,8 @@ void Series::readOneIterationFileBased(std::string const &filePath) Parameter pOpen; std::string version = openPMD(); - if (version == "1.0.0" || version == "1.0.1" || version == "1.1.0") + if (version == "1.0.0" || version == "1.0.1" || version == "1.1.0" || + version == "2.0.0") pOpen.path = auxiliary::replace_first(basePath(), "/%T/", ""); else throw error::ReadError( @@ -1980,7 +1988,8 @@ creating new iterations. Parameter pOpen; std::string version = openPMD(); - if (version == "1.0.0" || version == "1.0.1" || version == "1.1.0") + if (version == "1.0.0" || version == "1.0.1" || version == "1.1.0" || + version == "2.0.0") pOpen.path = auxiliary::replace_first(basePath(), "/%T/", ""); else throw error::ReadError( @@ -2968,19 +2977,13 @@ auto Series::currentSnapshot() const if (series.iterations.containsAttribute("snapshot")) { auto const &attribute = series.iterations.getAttribute("snapshot"); - switch (attribute.dtype) + auto res = attribute.getOptional(); + if (res.has_value()) { - case Datatype::ULONGLONG: - case Datatype::VEC_ULONGLONG: { - auto const &vec = attribute.get>(); - return vec_t{vec.begin(), vec.end()}; - } - case Datatype::ULONG: - case Datatype::VEC_ULONG: { - auto const &vec = attribute.get>(); - return vec_t{vec.begin(), vec.end()}; + return res.value(); } - default: { + else + { std::stringstream s; s << "Unexpected datatype for '/data/snapshot': " << attribute.dtype << " (expected a vector of integer, found " + @@ -2992,7 +2995,6 @@ auto Series::currentSnapshot() const {}, s.str()); } - } } else { diff --git a/src/backend/PatchRecordComponent.cpp b/src/backend/PatchRecordComponent.cpp index af19923fad..2ac202e44a 100644 --- a/src/backend/PatchRecordComponent.cpp +++ b/src/backend/PatchRecordComponent.cpp @@ -34,23 +34,6 @@ PatchRecordComponent &PatchRecordComponent::setUnitSI(double usi) return *this; } -PatchRecordComponent &PatchRecordComponent::resetDataset(Dataset d) -{ - if (written()) - throw std::runtime_error( - "A Records Dataset can not (yet) be changed after it has been " - "written."); - if (d.extent.empty()) - throw std::runtime_error("Dataset extent must be at least 1D."); - if (d.empty()) - throw std::runtime_error( - "Dataset extent must not be zero in any dimension."); - - get().m_dataset = std::move(d); - setDirty(true); - return *this; -} - uint8_t PatchRecordComponent::getDimensionality() const { return 1; diff --git a/src/binding/python/Error.cpp b/src/binding/python/Error.cpp index 681398c579..27d9c7d9b4 100644 --- a/src/binding/python/Error.cpp +++ b/src/binding/python/Error.cpp @@ -9,6 +9,7 @@ #include "openPMD/Error.hpp" #include "openPMD/binding/python/Common.hpp" +#include void init_Error(py::module &m) { @@ -22,6 +23,8 @@ void init_Error(py::module &m) py::register_exception(m, "ErrorInternal", baseError); py::register_exception( m, "ErrorNoSuchAttribute", baseError); + py::register_exception( + m, "ErrorIllegalInOpenPMDStandard", baseError); #ifndef NDEBUG m.def("test_throw", [](std::string description) { diff --git a/src/version.cpp b/src/version.cpp index c2e8809a32..78f09ca733 100644 --- a/src/version.cpp +++ b/src/version.cpp @@ -34,10 +34,16 @@ std::string openPMD::getVersion() } std::string openPMD::getStandard() +{ + return getStandardMaximum(); +} + +std::string openPMD::getStandardDefault() { std::stringstream standard; - standard << OPENPMD_STANDARD_MAJOR << "." << OPENPMD_STANDARD_MINOR << "." - << OPENPMD_STANDARD_PATCH; + standard << OPENPMD_STANDARD_DEFAULT_MAJOR << "." + << OPENPMD_STANDARD_DEFAULT_MINOR << "." + << OPENPMD_STANDARD_DEFAULT_PATCH; return standard.str(); } @@ -49,3 +55,11 @@ std::string openPMD::getStandardMinimum() << OPENPMD_STANDARD_MIN_PATCH; return standardMin.str(); } + +std::string openPMD::getStandardMaximum() +{ + std::stringstream standard; + standard << OPENPMD_STANDARD_MAJOR << "." << OPENPMD_STANDARD_MINOR << "." + << OPENPMD_STANDARD_PATCH; + return standard.str(); +} diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp index 17739e0b28..d27d68a8c5 100644 --- a/test/CoreTest.cpp +++ b/test/CoreTest.cpp @@ -1,6 +1,4 @@ // expose private and protected members for invasive testing -#include "openPMD/Datatype.hpp" -#include "openPMD/Error.hpp" #if openPMD_USE_INVASIVE_TESTS #define OPENPMD_private public: #define OPENPMD_protected public: @@ -36,8 +34,11 @@ TEST_CASE("versions_test", "[core]") auto const is_dot = [](char const c) { return c == '.'; }; REQUIRE(2u == std::count_if(apiVersion.begin(), apiVersion.end(), is_dot)); - auto const standard = getStandard(); - REQUIRE(standard == "1.1.0"); + auto const standardDefault = getStandardDefault(); + REQUIRE(standardDefault == "1.1.0"); + + auto const standard = getStandardMaximum(); + REQUIRE(standard == "2.0.0"); auto const standardMin = getStandardMinimum(); REQUIRE(standardMin == "1.0.0"); diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 7323a32582..c1f3888c7a 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -2,6 +2,7 @@ #include "openPMD/ChunkInfo_internal.hpp" #include "openPMD/Datatype.hpp" #include "openPMD/IO/Access.hpp" +#include "openPMD/auxiliary/JSON.hpp" #if openPMD_USE_INVASIVE_TESTS #define OPENPMD_private public: #define OPENPMD_protected public: @@ -912,6 +913,7 @@ inline void constant_scalar(std::string const &file_ending) // constant scalar Series s = Series("../samples/constant_scalar." + file_ending, Access::CREATE); + s.setOpenPMD("2.0.0"); auto rho = s.iterations[1].meshes["rho"][MeshRecordComponent::SCALAR]; REQUIRE(s.iterations[1].meshes["rho"].scalar()); rho.resetDataset(Dataset(Datatype::CHAR, {1, 2, 3})); @@ -1270,13 +1272,24 @@ TEST_CASE("particle_patches", "[serial]") } } -inline void dtype_test(const std::string &backend) +inline void dtype_test( + const std::string &backend, + std::optional activateTemplateMode = {}) { bool test_long_double = backend != "json" && backend != "toml"; bool test_long_long = (backend != "json") || sizeof(long long) <= 8; { - Series s = Series("../samples/dtype_test." + backend, Access::CREATE); - + Series s = activateTemplateMode.has_value() + ? Series( + "../samples/dtype_test." + backend, + Access::CREATE, + activateTemplateMode.value()) + : + // test TOML long attribute mode by default + Series( + "../samples/dtype_test." + backend, + Access::CREATE, + R"({"toml":{"attribute":{"mode":"long"}}})"); char c = 'c'; s.setAttribute("char", c); unsigned char uc = 'u'; @@ -1397,8 +1410,12 @@ inline void dtype_test(const std::string &backend) } } - Series s = Series("../samples/dtype_test." + backend, Access::READ_ONLY); - + Series s = activateTemplateMode.has_value() + ? Series( + "../samples/dtype_test." + backend, + Access::READ_ONLY, + activateTemplateMode.value()) + : Series("../samples/dtype_test." + backend, Access::READ_ONLY); REQUIRE(s.getAttribute("char").get() == 'c'); REQUIRE(s.getAttribute("uchar").get() == 'u'); REQUIRE(s.getAttribute("schar").get() == 's'); @@ -1468,6 +1485,10 @@ inline void dtype_test(const std::string &backend) REQUIRE(s.getAttribute("bool").get() == true); REQUIRE(s.getAttribute("boolF").get() == false); + if (activateTemplateMode.has_value()) + { + return; + } // same implementation types (not necessary aliases) detection #if !defined(_MSC_VER) REQUIRE(s.getAttribute("short").dtype == Datatype::SHORT); @@ -1540,6 +1561,17 @@ TEST_CASE("dtype_test", "[serial]") { dtype_test(t); } + dtype_test("json", R"( +{ + "json": { + "dataset": { + "mode": "template" + }, + "attribute": { + "mode": "short" + } + } +})"); if (auto extensions = getFileExtensions(); std::find(extensions.begin(), extensions.end(), "toml") != extensions.end()) @@ -1548,6 +1580,17 @@ TEST_CASE("dtype_test", "[serial]") * testing it here. */ dtype_test("toml"); + dtype_test("toml", R"( +{ + "toml": { + "dataset": { + "mode": "template" + }, + "attribute": { + "mode": "short" + } + } +})"); } } @@ -1560,12 +1603,13 @@ struct ReadFromAnyType } }; -inline void write_test(const std::string &backend) +inline void write_test( + const std::string &backend, + std::string jsonCfg = "{}", + bool test_rank_table = true) { -#ifdef _WIN32 - std::string jsonCfg = "{}"; -#else - std::string jsonCfg = R"({"rank_table": "posix_hostname"})"; +#ifndef _WIN32 + jsonCfg = json::merge(jsonCfg, R"({"rank_table": "posix_hostname"})"); chunk_assignment::RankMeta compare{ {0, host_info::byMethod( @@ -1598,8 +1642,10 @@ inline void write_test(const std::string &backend) return posOff++; }); std::shared_ptr positionOffset_local_1(new uint64_t); - e_1["positionOffset"]["x"].resetDataset( - Dataset(determineDatatype(positionOffset_local_1), {4})); + e_1["positionOffset"]["x"].resetDataset(Dataset( + determineDatatype(positionOffset_local_1), + {4}, + R"({"json":{"dataset":{"mode":"dataset"}}})")); for (uint64_t i = 0; i < 4; ++i) { @@ -1685,7 +1731,10 @@ inline void write_test(const std::string &backend) variantTypeDataset); #ifndef _WIN32 - REQUIRE(read.rankTable(/* collective = */ false) == compare); + if (test_rank_table) + { + REQUIRE(read.rankTable(/* collective = */ false) == compare); + } #endif } @@ -1693,7 +1742,41 @@ TEST_CASE("write_test", "[serial]") { for (auto const &t : testedFileExtensions()) { - write_test(t); + if (t == "json") + { + write_test( + "template." + t, + R"( +{ + "json": { + "dataset": { + "mode": "template" + }, + "attribute": { + "mode": "short" + } + } +})", + false); + write_test( + t, + R"( +{ + "json": { + "dataset": { + "mode": "dataset" + }, + "attribute": { + "mode": "short" + } + } +})", + true); + } + else + { + write_test(t); + } Series list{"../samples/serial_write." + t, Access::READ_ONLY}; helper::listSeries(list); } @@ -1849,7 +1932,7 @@ inline void fileBased_write_test(const std::string &backend) Series o = Series( "../samples/subdir/serial_fileBased_write%03T." + backend, Access::CREATE, - jsonCfg); + json::merge(jsonCfg, R"({"toml":{"dataset":{"mode":"dataset"}}})")); ParticleSpecies &e_1 = o.iterations[1].particles["e"]; @@ -7467,7 +7550,10 @@ void groupbased_read_write(std::string const &ext) std::string filename = "../samples/groupbased_read_write." + ext; { - Series write(filename, Access::CREATE); + Series write( + filename, + Access::CREATE, + R"({"toml":{"dataset":{"mode":"dataset"}}})"); auto E_x = write.iterations[0].meshes["E"]["x"]; auto E_y = write.iterations[0].meshes["E"]["y"]; E_x.resetDataset(ds); @@ -7480,7 +7566,10 @@ void groupbased_read_write(std::string const &ext) } { - Series write(filename, Access::READ_WRITE); + Series write( + filename, + Access::READ_WRITE, + R"({"toml":{"dataset":{"mode":"dataset"}}})"); // create a new iteration auto E_x = write.iterations[1].meshes["E"]["x"]; E_x.resetDataset(ds); @@ -7520,7 +7609,10 @@ void groupbased_read_write(std::string const &ext) // check that truncation works correctly { - Series write(filename, Access::CREATE); + Series write( + filename, + Access::CREATE, + R"({"toml":{"dataset":{"mode":"dataset"}}})"); // create a new iteration auto E_x = write.iterations[2].meshes["E"]["x"]; E_x.resetDataset(ds); diff --git a/test/python/unittest/API/APITest.py b/test/python/unittest/API/APITest.py index 59e6b5c97e..1abe26340a 100644 --- a/test/python/unittest/API/APITest.py +++ b/test/python/unittest/API/APITest.py @@ -25,7 +25,8 @@ from TestUtilities.TestUtilities import generateTestFilePath tested_file_extensions = [ - ext for ext in io.file_extensions if ext != 'sst' and ext != 'ssc' + ext for ext in io.file_extensions + if ext != 'sst' and ext != 'ssc' and ext != 'toml' ]