IMAP-Science-Operations-Center · greglucas · Aug 30, 2024 · Aug 26, 2024 · subagonsouth · Aug 27, 2024
diff --git a/imap_processing/tests/test_utils.py b/imap_processing/tests/test_utils.py
@@ -105,12 +105,3 @@ def test_packet_file_to_datasets_flat_definition():
     )
     with pytest.raises(ValueError, match="Packet fields do not match"):
         utils.packet_file_to_datasets(packet_files, packet_definition)
-
-
-def test__create_minimum_dtype_array():
-    """Test expected return types for minimum data types."""
-    result = utils._create_minimum_dtype_array([1, 2, 3], "uint8")
-    assert result.dtype == np.dtype("uint8")
-    # fallback to a generic array if the requested dtype can't be satisfied
-    result = utils._create_minimum_dtype_array(["a", "b", "c"], "uint8")
-    assert result.dtype == np.dtype("<U1")
diff --git a/imap_processing/utils.py b/imap_processing/utils.py
@@ -6,7 +6,6 @@
 from typing import Optional, Union
 
 import numpy as np
-import numpy.typing as npt
 import pandas as pd
 import xarray as xr
 from space_packet_parser import parser, xtcedef
@@ -228,8 +227,8 @@ def create_dataset(
 
 
 def _get_minimum_numpy_datatype(  # noqa: PLR0912 - Too many branches
-    name: str, definition: xtcedef.XtcePacketDefinition
-) -> str:
+    name: str, definition: xtcedef.XtcePacketDefinition, use_derived_value: bool = True
+) -> Optional[str]:
     """
     Get the minimum datatype for a given variable.
 
@@ -239,6 +238,8 @@ def _get_minimum_numpy_datatype(  # noqa: PLR0912 - Too many branches
         The variable name.
     definition : xtcedef.XtcePacketDefinition
         The XTCE packet definition.
+    use_derived_value : bool, default True
+        Whether or not the derived value from the XTCE definition was used.
 
     Returns
     -------
@@ -247,7 +248,21 @@ def _get_minimum_numpy_datatype(  # noqa: PLR0912 - Too many branches
     """
     data_encoding = definition.named_parameters[name].parameter_type.encoding
 
-    if isinstance(data_encoding, xtcedef.NumericDataEncoding):
+    if use_derived_value and isinstance(
+        definition.named_parameters[name].parameter_type,
+        xtcedef.EnumeratedParameterType,
+    ):
+        # We don't have a way of knowing what is enumerated,
+        # let numpy infer the datatype
+        return None
+    elif isinstance(data_encoding, xtcedef.NumericDataEncoding):
+        if use_derived_value and (
+            data_encoding.context_calibrators is not None
+            or data_encoding.default_calibrator is not None
+        ):
+            # If there are calibrators, we need to default to None and
+            # let numpy infer the datatype
+            return None
         nbits = data_encoding.size_in_bits
         if isinstance(data_encoding, xtcedef.IntegerDataEncoding):
             datatype = "int"
@@ -280,31 +295,6 @@ def _get_minimum_numpy_datatype(  # noqa: PLR0912 - Too many branches
     return datatype
 
 
-def _create_minimum_dtype_array(values: list, dtype: str) -> npt.NDArray:
-    """
-    Create an array with the minimum datatype.
-
-    If it can't be coerced to that datatype, fallback to general array creation
-    without a specific datatype. This can happen with derived values.
-
-    Parameters
-    ----------
-    values : list
-        List of values.
-    dtype : str
-        The datatype.
-
-    Returns
-    -------
-    array : np.array
-        The array of values.
-    """
-    try:
-        return np.array(values, dtype=dtype)
-    except ValueError:
-        return np.array(values)
-
-
 def packet_file_to_datasets(
     packet_file: Union[str, Path],
     xtce_packet_definition: Union[str, Path],
@@ -384,7 +374,7 @@ def packet_file_to_datasets(
                 if key not in datatype_mapping[apid]:
                     # Add this datatype to the mapping
                     datatype_mapping[apid][key] = _get_minimum_numpy_datatype(
-                        key, packet_definition
+                        key, packet_definition, use_derived_value=use_derived_value
                     )
 
     dataset_by_apid = {}
@@ -398,9 +388,7 @@ def packet_file_to_datasets(
             {
                 key.lower(): (
                     "epoch",
-                    _create_minimum_dtype_array(
-                        list_of_values, dtype=datatype_mapping[apid][key]
-                    ),
+                    np.asarray(list_of_values, dtype=datatype_mapping[apid][key]),
 elif isinstance(data_encoding, xtcedef.FloatDataEncoding): 
     datatype = "float" 
     if nbits == 32: 
         datatype += "32" 
     else: 
         datatype += "64" 
 elif isinstance(data_encoding, xtcedef.FloatDataEncoding): 
     datatype = "float" 
     if nbits == 32: 
         datatype += "32" 
     else: 
         datatype += "64" 
                 )
                 for key, list_of_values in data.items()
             },