Skip to content

Commit

Permalink
FIX: Avoid loss of precision when casting in packet loading
Browse files Browse the repository at this point in the history
When using derived values there can be situations where a linear
conversion factor is applied to a uint8 value to turn a raw measurement
into a float temperature value for instance. These are represented
as a small uint datatype onboard, but need to be represented as a
float or larger integer datatype on the ground so we don't lose
precision. Previously we were getting 2.1 cast to 2 after the
derived types were attempted to be cast to their onboard types.
  • Loading branch information
greglucas committed Aug 26, 2024
1 parent 87ede4d commit 4c8d505
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 8 deletions.
28 changes: 22 additions & 6 deletions imap_processing/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,26 @@ def test_packet_file_to_datasets(use_derived_value, expected_mode):
np.testing.assert_array_equal(data["mode"], [expected_mode] * len(data["mode"]))


def test__create_minimum_dtype_array():
@pytest.mark.parametrize(
("arr", "dtype", "expected_dtype"),
# Expected basic case
[
([1, 2, 3], "uint8", "uint8"),
# We shouldn't go lower than requested either
([1, 2, 3], "uint16", "uint16"),
# Can't cast negative, fallback to default
([-1, 2, 3], "uint8", "int64"),
# Small signed ints should be good
([-1, 2, 3], "int8", "int8"),
# Can't cast strings to ints, fallback to default
(["a", "b", "c"], "uint8", "<U1"),
# Can't cast floats to ints, fallback to default
([1, 2.5, 3], "uint8", "float64"),
# Can't cast larger ints, fallback to default
([1, 1000, 2000], "uint8", "int64"),
],
)
def test__create_minimum_dtype_array(arr, dtype, expected_dtype):
"""Test expected return types for minimum data types."""
result = utils._create_minimum_dtype_array([1, 2, 3], "uint8")
assert result.dtype == np.dtype("uint8")
# fallback to a generic array if the requested dtype can't be satisfied
result = utils._create_minimum_dtype_array(["a", "b", "c"], "uint8")
assert result.dtype == np.dtype("<U1")
result = utils._create_minimum_dtype_array(arr, dtype)
assert result.dtype == np.dtype(expected_dtype)
12 changes: 10 additions & 2 deletions imap_processing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,18 @@ def _create_minimum_dtype_array(values: list, dtype: str) -> npt.NDArray:
array : np.array
The array of values.
"""
# Create an initial array and then try to safely cast it to the desired dtype
x = np.asarray(values)
try:
return np.array(values, dtype=dtype)
# ValueError: when trying to cast strings (enum states) to ints
y = x.astype(dtype, copy=False)
# We need to compare the arrays to see if we trimmed any values by
# casting to a smaller datatype (e.g. float64 to uint8, 2.1 to 2)
if np.array_equal(x, y):
return y
except ValueError:
return np.array(values)
pass
return x


def packet_file_to_datasets(
Expand Down

0 comments on commit 4c8d505

Please sign in to comment.