Skip to content

Commit

Permalink
FIX: Avoid loss of precision when casting in packet loading (#786)
Browse files Browse the repository at this point in the history
When using derived values there can be situations where a linear
conversion factor is applied to a uint8 value to turn a raw measurement
into a float temperature value for instance. These are represented
as a small uint datatype onboard, but need to be represented as a
float or larger integer datatype on the ground so we don't lose
precision. Previously we were getting 2.1 cast to 2 after the
derived types were attempted to be cast to their onboard types.
  • Loading branch information
greglucas committed Aug 30, 2024
1 parent 427cbf1 commit 164aca0
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 42 deletions.
9 changes: 0 additions & 9 deletions imap_processing/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,3 @@ def test_packet_file_to_datasets_flat_definition():
)
with pytest.raises(ValueError, match="Packet fields do not match"):
utils.packet_file_to_datasets(packet_files, packet_definition)


def test__create_minimum_dtype_array():
"""Test expected return types for minimum data types."""
result = utils._create_minimum_dtype_array([1, 2, 3], "uint8")
assert result.dtype == np.dtype("uint8")
# fallback to a generic array if the requested dtype can't be satisfied
result = utils._create_minimum_dtype_array(["a", "b", "c"], "uint8")
assert result.dtype == np.dtype("<U1")
54 changes: 21 additions & 33 deletions imap_processing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from typing import Optional, Union

import numpy as np
import numpy.typing as npt
import pandas as pd
import xarray as xr
from space_packet_parser import parser, xtcedef
Expand Down Expand Up @@ -228,8 +227,8 @@ def create_dataset(


def _get_minimum_numpy_datatype( # noqa: PLR0912 - Too many branches
name: str, definition: xtcedef.XtcePacketDefinition
) -> str:
name: str, definition: xtcedef.XtcePacketDefinition, use_derived_value: bool = True
) -> Optional[str]:
"""
Get the minimum datatype for a given variable.
Expand All @@ -239,6 +238,8 @@ def _get_minimum_numpy_datatype( # noqa: PLR0912 - Too many branches
The variable name.
definition : xtcedef.XtcePacketDefinition
The XTCE packet definition.
use_derived_value : bool, default True
Whether or not the derived value from the XTCE definition was used.
Returns
-------
Expand All @@ -247,7 +248,21 @@ def _get_minimum_numpy_datatype( # noqa: PLR0912 - Too many branches
"""
data_encoding = definition.named_parameters[name].parameter_type.encoding

if isinstance(data_encoding, xtcedef.NumericDataEncoding):
if use_derived_value and isinstance(
definition.named_parameters[name].parameter_type,
xtcedef.EnumeratedParameterType,
):
# We don't have a way of knowing what is enumerated,
# let numpy infer the datatype
return None
elif isinstance(data_encoding, xtcedef.NumericDataEncoding):
if use_derived_value and (
data_encoding.context_calibrators is not None
or data_encoding.default_calibrator is not None
):
# If there are calibrators, we need to default to None and
# let numpy infer the datatype
return None
nbits = data_encoding.size_in_bits
if isinstance(data_encoding, xtcedef.IntegerDataEncoding):
datatype = "int"
Expand Down Expand Up @@ -280,31 +295,6 @@ def _get_minimum_numpy_datatype( # noqa: PLR0912 - Too many branches
return datatype


def _create_minimum_dtype_array(values: list, dtype: str) -> npt.NDArray:
"""
Create an array with the minimum datatype.
If it can't be coerced to that datatype, fallback to general array creation
without a specific datatype. This can happen with derived values.
Parameters
----------
values : list
List of values.
dtype : str
The datatype.
Returns
-------
array : np.array
The array of values.
"""
try:
return np.array(values, dtype=dtype)
except ValueError:
return np.array(values)


def packet_file_to_datasets(
packet_file: Union[str, Path],
xtce_packet_definition: Union[str, Path],
Expand Down Expand Up @@ -384,7 +374,7 @@ def packet_file_to_datasets(
if key not in datatype_mapping[apid]:
# Add this datatype to the mapping
datatype_mapping[apid][key] = _get_minimum_numpy_datatype(
key, packet_definition
key, packet_definition, use_derived_value=use_derived_value
)

dataset_by_apid = {}
Expand All @@ -398,9 +388,7 @@ def packet_file_to_datasets(
{
key.lower(): (
"epoch",
_create_minimum_dtype_array(
list_of_values, dtype=datatype_mapping[apid][key]
),
np.asarray(list_of_values, dtype=datatype_mapping[apid][key]),
)
for key, list_of_values in data.items()
},
Expand Down

0 comments on commit 164aca0

Please sign in to comment.