From 4c8d505aabbe93d0e64eb847d6c7cdda5c341136 Mon Sep 17 00:00:00 2001 From: Greg Lucas Date: Mon, 26 Aug 2024 06:53:33 -0600 Subject: [PATCH] FIX: Avoid loss of precision when casting in packet loading When using derived values there can be situations where a linear conversion factor is applied to a uint8 value to turn a raw measurement into a float temperature value for instance. These are represented as a small uint datatype onboard, but need to be represented as a float or larger integer datatype on the ground so we don't lose precision. Previously we were getting 2.1 cast to 2 after the derived types were attempted to be cast to their onboard types. --- imap_processing/tests/test_utils.py | 28 ++++++++++++++++++++++------ imap_processing/utils.py | 12 ++++++++++-- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/imap_processing/tests/test_utils.py b/imap_processing/tests/test_utils.py index 787289a6d..6f42dfa4b 100644 --- a/imap_processing/tests/test_utils.py +++ b/imap_processing/tests/test_utils.py @@ -97,10 +97,26 @@ def test_packet_file_to_datasets(use_derived_value, expected_mode): np.testing.assert_array_equal(data["mode"], [expected_mode] * len(data["mode"])) -def test__create_minimum_dtype_array(): +@pytest.mark.parametrize( + ("arr", "dtype", "expected_dtype"), + # Expected basic case + [ + ([1, 2, 3], "uint8", "uint8"), + # We shouldn't go lower than requested either + ([1, 2, 3], "uint16", "uint16"), + # Can't cast negative, fallback to default + ([-1, 2, 3], "uint8", "int64"), + # Small signed ints should be good + ([-1, 2, 3], "int8", "int8"), + # Can't cast strings to ints, fallback to default + (["a", "b", "c"], "uint8", " npt.NDArray: array : np.array The array of values. """ + # Create an initial array and then try to safely cast it to the desired dtype + x = np.asarray(values) try: - return np.array(values, dtype=dtype) + # ValueError: when trying to cast strings (enum states) to ints + y = x.astype(dtype, copy=False) + # We need to compare the arrays to see if we trimmed any values by + # casting to a smaller datatype (e.g. float64 to uint8, 2.1 to 2) + if np.array_equal(x, y): + return y except ValueError: - return np.array(values) + pass + return x def packet_file_to_datasets(