JCSDA-internal · BenjaminRuston · Nov 25, 2024 · Nov 26, 2024 · Nov 26, 2024 · Jan 6, 2025
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -13,6 +13,7 @@ add_subdirectory(ssec)
 add_subdirectory(wrfda_ncdiag)
 add_subdirectory(single_observation)
 add_subdirectory(mrms)
+add_subdirectory(tomorrow_io)
 
 # Optional components
 if(iodaconv_gnssro_ENABLED)

diff --git a/src/tomorrow_io/CMakeLists.txt b/src/tomorrow_io/CMakeLists.txt
@@ -0,0 +1,21 @@
+# (C) Copyright 2024 The Tomorrow Companies, Inc.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+
+add_executable(convert_tio.x
+	creation.cc
+	creation_tio_sat_inst_specs.cc
+	copy.h
+	copy_function.cc
+	copy_helpers.cc
+	copy_tms.cc
+	copy_tms_datetime.cc
+	detect_tms_type.cc
+	product.cc
+	product.h
+	main_tio.cc
+	)
+target_link_libraries(convert_tio.x PUBLIC ioda)
+set_target_properties(convert_tio.x PROPERTIES CXX_STANDARD 17)
+
diff --git a/src/tomorrow_io/README.md b/src/tomorrow_io/README.md
@@ -0,0 +1,12 @@
+# The Tomorrow Microwave Sounder (TMS) converter
+
+The source codes here provide the ability to convert the Tomorrow.io TMS L1B-TC product
+and the NASA TROPICS L1B product into IODA format.
+
+Usage: `convert_tio.x input_file_1 [input_file_2 ...] output_file`
+
+This converter is written in C++ and uses IODA to read and write the observation data files.
+It provides an example of how to convert data to ioda using the C++ interface.
+
+For details, contact [Ryan Honeyager](mailto:ryan.honeyager@tomorrow.io) (@rhoneyager-tomorrow).
+
diff --git a/src/tomorrow_io/copy.h b/src/tomorrow_io/copy.h
@@ -0,0 +1,63 @@
+#pragma once
+/*
+ * (C) Copyright 2024 The Tomorrow Companiec, Inc.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ */
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "ioda/ObsGroup.h"
+
+namespace tio_converter {
+
+using DimensionRanges_t = std::vector<std::optional<std::pair<size_t, size_t>>>;
+
+struct VariableInfo {
+  mutable ioda::Variable var;  ///< The variable.
+  /// Optionally defines a subset of indices along each axis. Bounds are inclusive on both sides.
+  DimensionRanges_t range;
+};
+
+struct VariableDerivedInfo {
+  /// Supplementary information about the dimensions of the variable.
+  DimensionRanges_t range;
+  /// The dimensions of the variable.
+  ioda::Dimensions dims;
+  /// The type of the variable's data. Ex: unsigned little-endian 32-bit integer.
+  ioda::Type type;
+  /// The starting indices for a hyperslab selection.
+  std::vector<ioda::Dimensions_t> selection_start;
+  /// The span along each axis for a hyperslab selection.
+  std::vector<ioda::Dimensions_t> selection_count;
+  /// The number of data elements in this selection.
+  size_t selection_num_elements;
+  /// The size, in bytes, needed to store the variable's data, accounting for the selection.
+  size_t size_bytes;
+  /// Selection from the file
+  ioda::Selection selection_ioda;
+  /// Selection within memory (starts at 0,0,0,...)
+  ioda::Selection selection_membuf;
+
+  VariableDerivedInfo();
+  VariableDerivedInfo(const VariableInfo &);
+  VariableDerivedInfo(const VariableInfo &, const DimensionRanges_t &);
+};
+
+/// @brief Get the fill value assigned to a variable as a vector of bytes, and optionally
+///   convert to a different type representation.
+/// @param var is the variable to be queried.
+/// @param as_type is the desired return value's data type. Normally this is the source
+///   variable's data type, but optionally you can convert to a different representation.
+///   Useful when converting between differing source and destination types.
+std::vector<char> get_fill_value(const ioda::Variable &var, std::optional<ioda::Type> as_type = {});
+
+/// @brief Generic function to copy a hyperslab of data from one variable to another.
+/// @param from is the specification of the source data. This includes the variable and the hyperslab.
+/// @param to is the specification of the destination location.
+void copy(const VariableInfo &from, const VariableInfo &to);
+
+}  // namespace tio_converter
diff --git a/src/tomorrow_io/copy_function.cc b/src/tomorrow_io/copy_function.cc
@@ -0,0 +1,56 @@
+/*
+ * (C) Copyright 2024 The Tomorrow Companiec, Inc.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ */
+#include <algorithm>
+#include <cstring>
+#include <exception>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "copy.h"
+#include "hdf5.h"
+
+namespace tio_converter {
+
+void copy(const VariableInfo &from, const VariableInfo &to) {
+  using namespace ioda;
+  using std::byte;
+  using std::max;
+  using std::memcmp;
+  using std::memcpy;
+  using std::vector;
+
+  VariableDerivedInfo from_info(from);
+  VariableDerivedInfo to_info(to);
+
+  vector<char> buffer(to_info.size_bytes);
+  from.var.read(
+    gsl::make_span(buffer.data(), buffer.size()),  // Read into the buffer
+    to_info.type,  // Convert data into the destination data type (e.g. int, float, ...)
+    from_info.selection_membuf,  // Needed to tell ioda how the data should be mapped into memory
+    from_info.selection_ioda     // The hyperslab being read
+  );
+
+  vector<char> from_fill                    = get_fill_value(from.var, to_info.type);
+  vector<char> to_fill                      = get_fill_value(to.var);
+  const size_t buffer_size_of_element_bytes = to_info.type.getSize();
+  // Iterate over buffer in buffer_size_of_element_bytes increments.
+  // If we match from_fill_as_bytes_in_dest_representation, replace with the
+  // contents of to_fill_as_bytes_in_dest_representation.
+  for (size_t i = 0; i < buffer.size(); i += buffer_size_of_element_bytes) {
+    if (!memcmp(buffer.data() + i, from_fill.data(), buffer_size_of_element_bytes))
+      memcpy(buffer.data() + i, to_fill.data(), buffer_size_of_element_bytes);
+  }
+
+  to.var.write(gsl::make_span(buffer.data(), buffer.size()),  // Write from this buffer
+               to_info.type,                                  // Output variable type
+               to_info.selection_membuf,                      // Data mapping in memory
+               to_info.selection_ioda                         // The hyperslab being written
+  );
+}
+
+}  // namespace tio_converter
diff --git a/src/tomorrow_io/copy_helpers.cc b/src/tomorrow_io/copy_helpers.cc
@@ -0,0 +1,119 @@
+/*
+ * (C) Copyright 2024 The Tomorrow Companiec, Inc.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ */
+#include <algorithm>
+
+#include "copy.h"
+#include "hdf5.h"
+
+namespace tio_converter {
+
+namespace {
+// This only returns predefined HDF5 types, and those hid_ts are static objects.
+// NOTE (RH): ioda really should be extended to return the endianness of data.
+//            We assume little endian until this is fixed.
+hid_t get_hdf5_type(const ioda::Type &typ) {
+  using namespace ioda;
+  using std::logic_error;
+  const size_t len    = typ.getSize();
+  const TypeClass cls = typ.getClass();
+  if (cls == TypeClass::Integer) {
+    bool sgn = typ.isTypeSigned();
+    if (sgn && (len == 1)) return H5T_STD_I8LE;
+    if (!sgn && (len == 1)) return H5T_STD_U8LE;
+    if (sgn && (len == 2)) return H5T_STD_I16LE;
+    if (!sgn && (len == 2)) return H5T_STD_U16LE;
+    if (sgn && (len == 4)) return H5T_STD_I32LE;
+    if (!sgn && (len == 4)) return H5T_STD_U32LE;
+    if (sgn && (len == 8)) return H5T_STD_I64LE;
+    if (!sgn && (len == 8)) return H5T_STD_U64LE;
+  } else if (cls == TypeClass::Float) {
+#ifdef H5T_NATIVE_FLOAT16  // Introduced in recent HDF5 versions
+    if (len == 2) return H5T_IEEE_F16LE;
+#endif
+    if (len == 4) return H5T_IEEE_F32LE;
+    if (len == 8) return H5T_IEEE_F64LE;
+  }
+  throw logic_error("Unsupported object type.");
+}
+}  // namespace
+
+VariableDerivedInfo::VariableDerivedInfo() = default;
+VariableDerivedInfo::VariableDerivedInfo(const VariableInfo &vi)
+    : VariableDerivedInfo(vi, vi.range) {}
+VariableDerivedInfo::VariableDerivedInfo(const VariableInfo &vi, const DimensionRanges_t &di) {
+  using ioda::Dimensions_t;
+  using ioda::SelectionOperator;
+  using std::accumulate;
+  using std::multiplies;
+  using std::vector;
+
+  range = di;
+  dims  = vi.var.getDimensions();
+  type  = vi.var.getType();
+
+  vector<Dimensions_t> zero_starts(dims.dimensionality);
+  selection_start.resize(dims.dimensionality);
+  selection_count.resize(dims.dimensionality);
+
+  for (size_t i = 0; i < dims.dimensionality; ++i) {
+    if (range.size() > i && range[i]) {
+      selection_start[i] = range[i]->first;
+      selection_count[i] = range[i]->second - range[i]->first + 1;
+    } else {
+      selection_start[i] = 0;
+      selection_count[i] = dims.dimsCur[i];
+    }
+  }
+  selection_ioda.extent(dims.dimsCur)
+    .select({SelectionOperator::SET, selection_start, selection_count});
+  selection_membuf.extent(selection_count)
+    .select({SelectionOperator::SET, zero_starts, selection_count});
+
+  // Determine the size of a buffer needed to read this data in its entirety.
+  // This is just selection_count.
+  selection_num_elements
+    = accumulate(selection_count.begin(), selection_count.end(), 1, multiplies<size_t>());
+  const size_t size_of_element_bytes = type.getSize();
+  size_bytes                         = selection_num_elements * size_of_element_bytes;
+}
+
+std::vector<char> get_fill_value(const ioda::Variable &var, std::optional<ioda::Type> as_type) {
+  using std::logic_error;
+  using std::max;
+  using std::memcpy;
+  using std::vector;
+  const size_t len_bytes_src = var.getType().getSize();
+  vector<char> fill_bytes(len_bytes_src);
+  // BUG (RH): ioda's getFillValue is very slightly buggy in that it reports a
+  // spurious warning when reading the fill value of the TMS L1B MultiMask variable,
+  // which has an unsigned char data type.
+  // "ioda::Variable: hdf and netcdf fill value specifications do not match"
+  // In this case, we can just read the _FillValue attribute directly.
+  if (var.atts.exists("_FillValue")) {
+    ioda::Attribute fvAttr = var.atts.open("_FillValue");
+    fvAttr.read(gsl::make_span(fill_bytes.data(), fill_bytes.size()), fvAttr.getType());
+  } else {
+    const auto src_fill = var.getFillValue();
+    memcpy(fill_bytes.data(), &(src_fill.fillValue_.ui64), len_bytes_src);
+  }
+
+  if (!as_type) return fill_bytes;
+
+  const size_t len_bytes_to = as_type->getSize();
+  const hid_t h5type_from   = get_hdf5_type(var.getType());
+  const hid_t h5type_to     = get_hdf5_type(*as_type);
+  fill_bytes.resize(max(len_bytes_src, len_bytes_to));
+
+  herr_t cvt_res = H5Tconvert(h5type_from, h5type_to,
+                              1,  // Only one 'element' to be converted
+                              fill_bytes.data(), nullptr, H5P_DEFAULT);
+  if (cvt_res < 0) throw logic_error("Fill value type conversion failed.");
+  fill_bytes.resize(len_bytes_to);
+  return fill_bytes;
+}
+
+}  // namespace tio_converter