FIX: Avoid loss of precision when casting in packet loading

When using derived values there can be situations where a linear conversion factor is applied to a uint8 value to turn a raw measurement into a float temperature value for instance. These are represented as a small uint datatype onboard, but need to be represented as a float or larger integer datatype on the ground so we don't lose precision. Previously we were getting 2.1 cast to 2 after the derived types were attempted to be cast to their onboard types.
IMAP-Science-Operations-Center · Aug 26, 2024 · 4c8d505 · 4c8d505
1 parent 87ede4d
commit 4c8d505
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 8 deletions.
diff --git a/imap_processing/tests/test_utils.py b/imap_processing/tests/test_utils.py
@@ -97,10 +97,26 @@ def test_packet_file_to_datasets(use_derived_value, expected_mode):
     np.testing.assert_array_equal(data["mode"], [expected_mode] * len(data["mode"]))
 
 
-def test__create_minimum_dtype_array():
+@pytest.mark.parametrize(
+    ("arr", "dtype", "expected_dtype"),
+    # Expected basic case
+    [
+        ([1, 2, 3], "uint8", "uint8"),
+        # We shouldn't go lower than requested either
+        ([1, 2, 3], "uint16", "uint16"),
+        # Can't cast negative, fallback to default
+        ([-1, 2, 3], "uint8", "int64"),
+        # Small signed ints should be good
+        ([-1, 2, 3], "int8", "int8"),
+        # Can't cast strings to ints, fallback to default
+        (["a", "b", "c"], "uint8", "<U1"),
+        # Can't cast floats to ints, fallback to default
+        ([1, 2.5, 3], "uint8", "float64"),
+        # Can't cast larger ints, fallback to default
+        ([1, 1000, 2000], "uint8", "int64"),
+    ],
+)
+def test__create_minimum_dtype_array(arr, dtype, expected_dtype):
     """Test expected return types for minimum data types."""
-    result = utils._create_minimum_dtype_array([1, 2, 3], "uint8")
-    assert result.dtype == np.dtype("uint8")
-    # fallback to a generic array if the requested dtype can't be satisfied
-    result = utils._create_minimum_dtype_array(["a", "b", "c"], "uint8")
-    assert result.dtype == np.dtype("<U1")
+    result = utils._create_minimum_dtype_array(arr, dtype)
+    assert result.dtype == np.dtype(expected_dtype)
diff --git a/imap_processing/utils.py b/imap_processing/utils.py
@@ -299,10 +299,18 @@ def _create_minimum_dtype_array(values: list, dtype: str) -> npt.NDArray:
     array : np.array
         The array of values.
     """
+    # Create an initial array and then try to safely cast it to the desired dtype
+    x = np.asarray(values)
     try:
-        return np.array(values, dtype=dtype)
+        # ValueError: when trying to cast strings (enum states) to ints
+        y = x.astype(dtype, copy=False)
+        # We need to compare the arrays to see if we trimmed any values by
+        # casting to a smaller datatype (e.g. float64 to uint8, 2.1 to 2)
+        if np.array_equal(x, y):
+            return y
     except ValueError:
-        return np.array(values)
+        pass
+    return x
 
 
 def packet_file_to_datasets(