From 77b853a560d4a817115ef44d0a12271145f53e8b Mon Sep 17 00:00:00 2001 From: Greg Lucas Date: Mon, 26 Aug 2024 14:58:22 -0600 Subject: [PATCH] FIX: Avoid loss of precision when casting in packet loading When using derived values there can be situations where a linear conversion factor is applied to a uint8 value to turn a raw measurement into a float temperature value for instance. These are represented as a small uint datatype onboard, but need to be represented as a float or larger integer datatype on the ground so we don't lose precision. Previously we were getting 2.1 cast to 2 after the derived types were attempted to be cast to their onboard types. --- imap_processing/tests/test_utils.py | 9 ----- imap_processing/utils.py | 54 +++++++++++------------------ 2 files changed, 21 insertions(+), 42 deletions(-) diff --git a/imap_processing/tests/test_utils.py b/imap_processing/tests/test_utils.py index d33051777..792c5423a 100644 --- a/imap_processing/tests/test_utils.py +++ b/imap_processing/tests/test_utils.py @@ -105,12 +105,3 @@ def test_packet_file_to_datasets_flat_definition(): ) with pytest.raises(ValueError, match="Packet fields do not match"): utils.packet_file_to_datasets(packet_files, packet_definition) - - -def test__create_minimum_dtype_array(): - """Test expected return types for minimum data types.""" - result = utils._create_minimum_dtype_array([1, 2, 3], "uint8") - assert result.dtype == np.dtype("uint8") - # fallback to a generic array if the requested dtype can't be satisfied - result = utils._create_minimum_dtype_array(["a", "b", "c"], "uint8") - assert result.dtype == np.dtype(" str: + name: str, definition: xtcedef.XtcePacketDefinition, use_derived_value: bool = True +) -> Optional[str]: """ Get the minimum datatype for a given variable. @@ -239,6 +238,8 @@ def _get_minimum_numpy_datatype( # noqa: PLR0912 - Too many branches The variable name. definition : xtcedef.XtcePacketDefinition The XTCE packet definition. + use_derived_value : bool, default True + Whether or not the derived value from the XTCE definition was used. Returns ------- @@ -247,7 +248,21 @@ def _get_minimum_numpy_datatype( # noqa: PLR0912 - Too many branches """ data_encoding = definition.named_parameters[name].parameter_type.encoding - if isinstance(data_encoding, xtcedef.NumericDataEncoding): + if use_derived_value and isinstance( + definition.named_parameters[name].parameter_type, + xtcedef.EnumeratedParameterType, + ): + # We don't have a way of knowing what is enumerated, + # let numpy infer the datatype + return None + elif isinstance(data_encoding, xtcedef.NumericDataEncoding): + if use_derived_value and ( + data_encoding.context_calibrators is not None + or data_encoding.default_calibrator is not None + ): + # If there are calibrators, we need to default to None and + # let numpy infer the datatype + return None nbits = data_encoding.size_in_bits if isinstance(data_encoding, xtcedef.IntegerDataEncoding): datatype = "int" @@ -280,31 +295,6 @@ def _get_minimum_numpy_datatype( # noqa: PLR0912 - Too many branches return datatype -def _create_minimum_dtype_array(values: list, dtype: str) -> npt.NDArray: - """ - Create an array with the minimum datatype. - - If it can't be coerced to that datatype, fallback to general array creation - without a specific datatype. This can happen with derived values. - - Parameters - ---------- - values : list - List of values. - dtype : str - The datatype. - - Returns - ------- - array : np.array - The array of values. - """ - try: - return np.array(values, dtype=dtype) - except ValueError: - return np.array(values) - - def packet_file_to_datasets( packet_file: Union[str, Path], xtce_packet_definition: Union[str, Path], @@ -384,7 +374,7 @@ def packet_file_to_datasets( if key not in datatype_mapping[apid]: # Add this datatype to the mapping datatype_mapping[apid][key] = _get_minimum_numpy_datatype( - key, packet_definition + key, packet_definition, use_derived_value=use_derived_value ) dataset_by_apid = {} @@ -398,9 +388,7 @@ def packet_file_to_datasets( { key.lower(): ( "epoch", - _create_minimum_dtype_array( - list_of_values, dtype=datatype_mapping[apid][key] - ), + np.asarray(list_of_values, dtype=datatype_mapping[apid][key]), ) for key, list_of_values in data.items() },