From 0f2247831aa0ce8cb116b440fd19bf7bbf695551 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 10:49:40 -0600 Subject: [PATCH 01/22] Updated decompression algorithm to read in a binary string instead of individual integer values --- imap_processing/codice/decompress.py | 90 ++++++++++--------- .../tests/codice/test_decompress.py | 31 ++++--- 2 files changed, 63 insertions(+), 58 deletions(-) diff --git a/imap_processing/codice/decompress.py b/imap_processing/codice/decompress.py index bbbad4b6c..e4abf83ea 100644 --- a/imap_processing/codice/decompress.py +++ b/imap_processing/codice/decompress.py @@ -26,18 +26,16 @@ This information was provided via email from Greg Dunn on Oct 23, 2023 """ -# TODO: Add support for performing decompression of a list of values instead of -# a single value - import lzma from enum import IntEnum -from typing import Union + +import bitarray from imap_processing.codice.constants import LOSSY_A_TABLE, LOSSY_B_TABLE from imap_processing.codice.utils import CoDICECompression -def _apply_lossy_a(compressed_value: int) -> int: +def _apply_lossy_a(compressed_bytes: bytes) -> list[int]: """ Apply 8-bit to 32-bit Lossy A decompression algorithm. @@ -45,63 +43,62 @@ def _apply_lossy_a(compressed_value: int) -> int: Parameters ---------- - compressed_value : int - The compressed 8-bit value. + compressed_bytes : bytes + The compressed byte stream. Returns ------- - int - The 24- or 32-bit decompressed value. + decompressed_values : list[int] + The 24- or 32-bit decompressed values. """ - return LOSSY_A_TABLE[compressed_value] + compressed_values = list(compressed_bytes) + decompressed_values = [LOSSY_A_TABLE[item] for item in compressed_values] + return decompressed_values -def _apply_lossy_b(compressed_value: int) -> int: +def _apply_lossy_b(compressed_bytes: bytes) -> list[int]: """ - Apply 8-bit to 32-bit Lossy B decompression algorithm. + Apply 8-bit to 32-bit Lossy A decompression algorithm. The Lossy B algorithm uses a lookup table imported into this module. Parameters ---------- - compressed_value : int - The compressed 8-bit value. + compressed_bytes : bytes + The compressed byte stream. Returns ------- - int - The 24- or 32-bit decompressed value. + decompressed_values : list[int] + The 24- or 32-bit decompressed values. """ - return LOSSY_B_TABLE[compressed_value] + compressed_values = list(compressed_bytes) + decompressed_values = [LOSSY_B_TABLE[item] for item in compressed_values] + return decompressed_values -def _apply_lzma_lossless(compressed_value: Union[int, bytes]) -> int: +def _apply_lzma_lossless(compressed_bytes: bytes) -> bytes: """ Apply LZMA lossless decompression algorithm. Parameters ---------- - compressed_value : int or bytes - The compressed 8-bit value. + compressed_bytes : bytes + The compressed byte stream. Returns ------- - decompressed_value : int - The 24- or 32-bit decompressed value. + lzma_decompressed_values : bytes + The 24- or 32-bit lzma decompressed values. """ - if isinstance(compressed_value, int): - bytes_compressed_value = compressed_value.to_bytes(compressed_value, "big") - else: - bytes_compressed_value = compressed_value - decompressed_value = lzma.decompress(bytes_compressed_value) - decompressed_value_int = int.from_bytes(decompressed_value, byteorder="big") + lzma_decompressed_values = lzma.decompress(compressed_bytes) - return decompressed_value_int + return lzma_decompressed_values -def decompress(compressed_value: int, algorithm: IntEnum) -> int: +def decompress(compressed_binary: str, algorithm: IntEnum) -> list[int]: """ - Will decompress the value. + Perform decompression on a binary string into a list of integers. Apply the appropriate decompression algorithm(s) based on the value of the ``algorithm`` attribute. One or more individual algorithms may be @@ -109,32 +106,37 @@ def decompress(compressed_value: int, algorithm: IntEnum) -> int: Parameters ---------- - compressed_value : int - The 8-bit compressed value to decompress. + compressed_binary : str + The compressed binary string. algorithm : int The algorithm to apply. Supported algorithms are provided in the ``codice_utils.CoDICECompression`` class. Returns ------- - decompressed_value : int - The 24- or 32-bit decompressed value. + decompressed_values : list[int] + The 24- or 32-bit decompressed values. """ + # Convert the binary string to a byte stream + compressed_bytes = bitarray.bitarray(compressed_binary).tobytes() + + # Apply the appropriate decompression algorithm if algorithm == CoDICECompression.NO_COMPRESSION: - decompressed_value = compressed_value + decompressed_values = list(compressed_bytes) elif algorithm == CoDICECompression.LOSSY_A: - decompressed_value = _apply_lossy_a(compressed_value) + decompressed_values = _apply_lossy_a(compressed_bytes) elif algorithm == CoDICECompression.LOSSY_B: - decompressed_value = _apply_lossy_b(compressed_value) + decompressed_values = _apply_lossy_b(compressed_bytes) elif algorithm == CoDICECompression.LOSSLESS: - decompressed_value = _apply_lzma_lossless(compressed_value) + decompressed_bytes = _apply_lzma_lossless(compressed_bytes) + decompressed_values = list(decompressed_bytes) elif algorithm == CoDICECompression.LOSSY_A_LOSSLESS: - decompressed_value = _apply_lzma_lossless(compressed_value) - decompressed_value = _apply_lossy_a(decompressed_value) + decompressed_bytes = _apply_lzma_lossless(compressed_bytes) + decompressed_values = _apply_lossy_a(decompressed_bytes) elif algorithm == CoDICECompression.LOSSY_B_LOSSLESS: - decompressed_value = _apply_lzma_lossless(compressed_value) - decompressed_value = _apply_lossy_b(decompressed_value) + decompressed_bytes = _apply_lzma_lossless(compressed_bytes) + decompressed_values = _apply_lossy_b(decompressed_bytes) else: raise ValueError(f"{algorithm} is not supported") - return decompressed_value + return decompressed_values diff --git a/imap_processing/tests/codice/test_decompress.py b/imap_processing/tests/codice/test_decompress.py index e74f60d73..853a94ccc 100644 --- a/imap_processing/tests/codice/test_decompress.py +++ b/imap_processing/tests/codice/test_decompress.py @@ -9,34 +9,37 @@ from imap_processing.codice.utils import CoDICECompression # Test the algorithms using input value of 234 (picked randomly) -LZMA_EXAMPLE = lzma.compress((234).to_bytes(1, byteorder="big")) +lzma_bytes = lzma.compress((234).to_bytes(1, byteorder="big")) +LZMA_EXAMPLE = "".join(format(byte, "08b") for byte in lzma_bytes) TEST_DATA = [ - (234, CoDICECompression.NO_COMPRESSION, 234), - (234, CoDICECompression.LOSSY_A, 221184), - (234, CoDICECompression.LOSSY_B, 1441792), - (LZMA_EXAMPLE, CoDICECompression.LOSSLESS, 234), - (LZMA_EXAMPLE, CoDICECompression.LOSSY_A_LOSSLESS, 221184), - (LZMA_EXAMPLE, CoDICECompression.LOSSY_B_LOSSLESS, 1441792), + ("11101010", CoDICECompression.NO_COMPRESSION, [234]), + ("11101010", CoDICECompression.LOSSY_A, [221184]), + ("11101010", CoDICECompression.LOSSY_B, [1441792]), + (LZMA_EXAMPLE, CoDICECompression.LOSSLESS, [234]), + (LZMA_EXAMPLE, CoDICECompression.LOSSY_A_LOSSLESS, [221184]), + (LZMA_EXAMPLE, CoDICECompression.LOSSY_B_LOSSLESS, [1441792]), ] @pytest.mark.parametrize( - ("compressed_value", "algorithm", "expected_result"), TEST_DATA + ("compressed_binary", "algorithm", "expected_result"), TEST_DATA ) -def test_decompress(compressed_value: int, algorithm: IntEnum, expected_result: int): +def test_decompress( + compressed_binary: str, algorithm: IntEnum, expected_result: list[int] +): """Tests the ``decompress`` function Parameters ---------- - compressed_value : int - The compressed value to test decompression on + compressed_binary : str + The compressed binary string to test decompression on algorithm : IntEnum The algorithm to use in decompression - expected_result : int + expected_result : list[int] The expected, decompressed value """ - decompressed_value = decompress(compressed_value, algorithm) + decompressed_value = decompress(compressed_binary, algorithm) assert decompressed_value == expected_result @@ -44,4 +47,4 @@ def test_decompress_raises(): """Tests that the ``decompress`` function raises with an unknown algorithm""" with pytest.raises(ValueError, match="some_unsupported_algorithm"): - decompress(234, "some_unsupported_algorithm") + decompress("11101010", "some_unsupported_algorithm") From caf55ec3427fa3c13abea74292ebb7bbd1154a3f Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 10:51:20 -0600 Subject: [PATCH 02/22] Removed collapse table lookup, as the info needed for these is instead "hard coded" into the configuration dictionary; Added spin sector config variable --- imap_processing/codice/constants.py | 44 ++++++++++++++--------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/imap_processing/codice/constants.py b/imap_processing/codice/constants.py index a8bcf3503..9054431f4 100644 --- a/imap_processing/codice/constants.py +++ b/imap_processing/codice/constants.py @@ -12,6 +12,9 @@ ESA = ElectroStatic Analyzer """ +# TODO: What to do in the case of a value of 255 in LOSSY_A and LOSSY_B +# compression? + from imap_processing.codice.utils import CODICEAPID, CoDICECompression APIDS_FOR_SCIENCE_PROCESSING = [ @@ -76,73 +79,85 @@ DATA_PRODUCT_CONFIGURATIONS = { CODICEAPID.COD_HI_INST_COUNTS_AGGREGATED: { "num_counters": 1, - "num_energy_steps": 128, + "num_energy_steps": 1, + "num_spin_sectors": 6, "variable_names": HI_INST_COUNTS_AGGREGATED_NAMES, "dataset_name": "imap_codice_l1a_hi_counters_aggregated", }, CODICEAPID.COD_HI_INST_COUNTS_SINGLES: { "num_counters": 3, - "num_energy_steps": 128, + "num_energy_steps": 1, + "num_spin_sectors": 16, "variable_names": HI_INST_COUNTS_SINGLES_NAMES, "dataset_name": "imap_codice_l1a_hi_counters_singles", }, CODICEAPID.COD_HI_OMNI_SPECIES_COUNTS: { "num_counters": 8, - "num_energy_steps": 128, + "num_energy_steps": 1, + "num_spin_sectors": 60, # TODO: Double-check this "variable_names": HI_OMNI_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_hi_omni", }, CODICEAPID.COD_HI_SECT_SPECIES_COUNTS: { "num_counters": 4, - "num_energy_steps": 128, + "num_energy_steps": 1, + "num_spin_sectors": 1152, # TODO: Double-check this "variable_names": HI_SECT_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_hi_sectored", }, CODICEAPID.COD_LO_INST_COUNTS_AGGREGATED: { "num_counters": 1, "num_energy_steps": 128, + "num_spin_sectors": 36, "variable_names": LO_INST_COUNTS_AGGREGATED_NAMES, "dataset_name": "imap_codice_l1a_lo_counters_aggregated", }, CODICEAPID.COD_LO_INST_COUNTS_SINGLES: { "num_counters": 1, "num_energy_steps": 128, + "num_spin_sectors": 144, "variable_names": LO_INST_COUNTS_SINGLES_NAMES, "dataset_name": "imap_codice_l1a_lo_counters_singles", }, CODICEAPID.COD_LO_SW_ANGULAR_COUNTS: { "num_counters": 4, "num_energy_steps": 128, + "num_spin_sectors": 60, "variable_names": LO_SW_ANGULAR_NAMES, "dataset_name": "imap_codice_l1a_lo_sw_angular", }, CODICEAPID.COD_LO_NSW_ANGULAR_COUNTS: { "num_counters": 1, "num_energy_steps": 128, + "num_spin_sectors": 228, "variable_names": LO_NSW_ANGULAR_NAMES, "dataset_name": "imap_codice_l1a_lo_nsw_angular", }, CODICEAPID.COD_LO_SW_PRIORITY_COUNTS: { "num_counters": 5, "num_energy_steps": 128, + "num_spin_sectors": 12, "variable_names": LO_SW_PRIORITY_NAMES, "dataset_name": "imap_codice_l1a_lo_sw_priority", }, CODICEAPID.COD_LO_NSW_PRIORITY_COUNTS: { "num_counters": 2, "num_energy_steps": 128, + "num_spin_sectors": 12, "variable_names": LO_NSW_PRIORITY_NAMES, "dataset_name": "imap_codice_l1a_lo_nsw_priority", }, CODICEAPID.COD_LO_SW_SPECIES_COUNTS: { "num_counters": 16, "num_energy_steps": 128, + "num_spin_sectors": 1, "variable_names": LO_SW_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_lo_sw_species", }, CODICEAPID.COD_LO_NSW_SPECIES_COUNTS: { "num_counters": 8, "num_energy_steps": 128, + "num_spin_sectors": 1, "variable_names": LO_NSW_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_lo_nsw_species", }, @@ -179,25 +194,6 @@ 9: CoDICECompression.LOSSY_A_LOSSLESS, } -# Collapse table ID lookup table for Lo data products -# The key is the view_id and the value is the ID for the collapse table -LO_COLLAPSE_TABLE_ID_LOOKUP = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8} - -# Collapse table ID lookup table for Hi data products -# The key is the view_id and the value is the ID for the collapse table -Hi_COLLAPSE_TABLE_ID_LOOKUP = { - 0: 8, - 1: 9, - 2: 10, - 3: 0, - 4: 1, - 5: 2, - 6: 4, - 7: 5, - 8: 6, - 9: 7, -} - # ESA Sweep table ID lookup table # The combination of plan_id and plan_step determine the ESA sweep Table to use # Currently, ESA sweep table 0 is used for every plan_id/plan_step combination, @@ -538,6 +534,7 @@ 252: 475136, 253: 491520, 254: 507904, + 255: 999999, } LOSSY_B_TABLE = { @@ -796,4 +793,5 @@ 252: 6815744, 253: 7340032, 254: 7864320, + 255: 999999, } From bb9cb3060d085ff46ed5d5e32c762747b4ddf86b Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 10:53:24 -0600 Subject: [PATCH 03/22] Added spin sector attribute definition --- .../cdf/config/imap_codice_l1a_variable_attrs.yaml | 11 +++++++++++ .../cdf/config/imap_codice_l1b_variable_attrs.yaml | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml index 54701c71c..aee0d7acd 100644 --- a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml @@ -36,6 +36,17 @@ energy_attrs: VALIDMAX: 127 VAR_TYPE: support_data +spin_sector_attrs: + <<: *default + CATDESC: Spin sector indicating range of spin angles + FIELDNAM: Spin sector + FORMAT: I4 + LABLAXIS: spin sector + UNITS: ' ' + VALIDMIN: 0 + VALIDMAX: 1152 + VAR_TYPE: support_data + # <=== Labels ===> energy_label: CATDESC: Energy per charge (E/q) sweeping step diff --git a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml index 1d5d44eb5..c9de1c451 100644 --- a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml @@ -32,6 +32,17 @@ energy_attrs: VALIDMAX: 127 VAR_TYPE: support_data +spin_sector_attrs: + <<: *default + CATDESC: Spin sector indicating range of spin angles + FIELDNAM: Spin sector + FORMAT: I4 + LABLAXIS: spin sector + UNITS: ' ' + VALIDMIN: 0 + VALIDMAX: 1152 + VAR_TYPE: support_data + # <=== Labels ===> energy_label: CATDESC: Energy per charge (E/q) sweeping step From 6f81befdb4261a61f7ac826095e527137023b1c1 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 10:57:28 -0600 Subject: [PATCH 04/22] Updated code to more accurately unpack science data --- imap_processing/codice/codice_l1a.py | 124 ++++++++++++------ .../tests/codice/test_codice_l1a.py | 29 ++-- 2 files changed, 98 insertions(+), 55 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index 0c925fee7..f9032930a 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -27,16 +27,21 @@ from imap_processing.cdf.utils import met_to_j2000ns from imap_processing.codice import constants from imap_processing.codice.codice_l0 import decom_packets +from imap_processing.codice.decompress import decompress from imap_processing.codice.utils import CODICEAPID, add_metadata_to_array from imap_processing.utils import group_by_apid, sort_by_time logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -# TODO: Decom data arrays need to be decompressed # TODO: In decommutation, how to have a variable length data and then a checksum # after it? (Might be fixed with new XTCE script updates) # TODO: Add support for decomming multiple APIDs from a single file +# TODO: Add these as variables in CDF: SPIN_PERIOD, ST_BIAS_GAIN_MODE, +# SW_BIAS_GAIN_MODE, RGFO_HALF_SPIN, NSO_HALF_SPIN, DATA_QUALITY +# TODO: Use new packet_file_to_dataset() function to simplify things +# TODO: Determine what should go in event data CDF and how it should be +# structured. class CoDICEL1aPipeline: @@ -92,6 +97,7 @@ def configure_data_products(self, apid: int) -> None: config = constants.DATA_PRODUCT_CONFIGURATIONS.get(apid) # type: ignore[call-overload] self.num_counters = config["num_counters"] self.num_energy_steps = config["num_energy_steps"] + self.num_spin_sectors = config["num_spin_sectors"] self.variable_names = config["variable_names"] self.dataset_name = config["dataset_name"] @@ -121,11 +127,17 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset # Define coordinates epoch = xr.DataArray( - met_to_j2000ns(met), # TODO: Fix after SIT-3 (see note below) + [met_to_j2000ns(met)], name="epoch", dims=["epoch"], attrs=cdf_attrs.get_variable_attributes("epoch"), ) + spin_sector = xr.DataArray( + np.arange(self.num_spin_sectors), + name="spin_sector", + dims=["spin_sector"], + attrs=cdf_attrs.get_variable_attributes("spin_sector_attrs"), + ) energy_steps = xr.DataArray( np.arange(self.num_energy_steps), name="energy", @@ -145,6 +157,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset dataset = xr.Dataset( coords={ "epoch": epoch, + "spin_sector": spin_sector, "energy": energy_steps, "energy_label": energy_label, }, @@ -153,12 +166,8 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset # Create a data variable for each counter for variable_data, variable_name in zip(self.data, self.variable_names): - # TODO: Currently, cdflib doesn't properly write/read CDF files that - # have a single epoch value. To get around this for now, use - # two epoch values and reshape accordingly. Revisit this after - # SIT-3. See https://github.com/MAVENSDC/cdflib/issues/268 - variable_data_arr = np.array(list(variable_data) * 2, dtype=int).reshape( - 2, self.num_energy_steps + variable_data_arr = np.array(variable_data).reshape( + 1, self.num_spin_sectors, self.num_energy_steps ) cdf_attrs_key = ( f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}" @@ -166,7 +175,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset dataset[variable_name] = xr.DataArray( variable_data_arr, name=variable_name, - dims=["epoch", "energy"], + dims=["epoch", "spin_sector", "energy"], attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key), ) @@ -262,14 +271,41 @@ def get_esa_sweep_values(self) -> None: sweep_table = sweep_data[sweep_data["table_idx"] == sweep_table_id] self.esa_sweep_values = sweep_table["esa_v"].values - def unpack_science_data(self, science_values: str) -> None: + def unpack_hi_science_data(self, science_values: str) -> None: """ - Unpack the science data from the packet. + Unpack the CoDICE-Hi science data from the packet. - For LO SW Species Counts data, the science data within the packet is a - blob of compressed values of length 2048 bits (16 species * 128 energy - levels). These data need to be divided up by species so that each - species can have their own data variable in the L1A CDF file. + The science data within the packet is a compressed, binary string of + values. + + Parameters + ---------- + science_values : str + A string of binary data representing the science values of the data. + """ + self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id] + + # Decompress the binary string + science_values = decompress(science_values, self.compression_algorithm) + + # Divide up the data by the number of priorities or species + chunk_size = len(science_values) // self.num_counters + science_values_unpacked = [ + science_values[i : i + chunk_size] + for i in range(0, len(science_values), chunk_size) + ] + + # TODO: Determine how to properly divide up hi data. For now, just use + # arrays for each counter + self.data = science_values_unpacked + + def unpack_lo_science_data(self, science_values: str) -> None: + """ + Unpack the CoDICE-Lo science data from the packet. + + The science data within the packet is a compressed, binary string of + values. These data need to be divided up by species or priorities, + and re-arranged into 2D arrays representing energy and spin angle. Parameters ---------- @@ -277,18 +313,27 @@ def unpack_science_data(self, science_values: str) -> None: A string of binary data representing the science values of the data. """ self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id] - self.collapse_table_id = constants.LO_COLLAPSE_TABLE_ID_LOOKUP[self.view_id] - # TODO: Turn this back on after SIT-3 - # For SIT-3, just create appropriate length data arrays of all ones + # Decompress the binary string + science_values = decompress(science_values, self.compression_algorithm) + # Divide up the data by the number of priorities or species - # science_values = packets[0].data["DATA"].raw_value - # num_bits = len(science_values) - # chunk_size = len(science_values) // self.num_counters - # self.data = [ - # science_values[i : i + chunk_size] for i in range(0, num_bits, chunk_size) - # ] - self.data = [["1"] * 128] * self.num_counters + chunk_size = len(science_values) // self.num_counters + science_values_unpacked = [ + science_values[i : i + chunk_size] + for i in range(0, len(science_values), chunk_size) + ] + + # Further divide up the data by energy levels + # The result is a [12,128] array representing 12 spin angles and 128 + # energy levels + self.data = [] + for counter_data in science_values_unpacked: + data_array = [ + counter_data[i : i + self.num_energy_steps] + for i in range(0, len(counter_data), self.num_energy_steps) + ] + self.data.append(data_array) def create_event_dataset( @@ -334,9 +379,6 @@ def create_event_dataset( attrs=cdf_attrs.get_global_attributes(dataset_name), ) - # TODO: Determine what should go in event data CDF and how it should be - # structured. - return dataset @@ -385,13 +427,15 @@ def create_hskp_dataset( ) # TODO: Change 'TBD' catdesc and fieldname - # Once housekeeping packet definition file is re-generated with updated - # version of space_packet_parser, can get fieldname and catdesc info via: - # for key, value in (packet.header | packet.data).items(): - # fieldname = value.short_description - # catdesc = value.long_description - # I am holding off making this change until I acquire updated housekeeping - # packets/validation data that match the latest telemetry definitions + # Once housekeeping packet definition file is re-generated with + # updated version of space_packet_parser, can get fieldname and + # catdesc info via: + # for key, value in (packet.header | packet.data).items(): + # fieldname = value.short_description + # catdesc = value.long_description + # I am holding off making this change until I acquire updated + # housekeeping packets/validation data that match the latest telemetry + # definitions for key, value in metadata_arrays.items(): attrs = cdf_attrs.get_variable_attributes("codice_support_attrs") attrs["CATDESC"] = "TBD" @@ -457,8 +501,6 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: dataset : xarray.Dataset The ``xarray`` dataset containing the science data and supporting metadata. """ - # TODO: Use new packet_file_to_dataset() function to simplify things - # Decom the packets, group data by APID, and sort by time packets = decom_packets(file_path) grouped_data = group_by_apid(packets) @@ -496,7 +538,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: # Determine the start time of the packet met = packets[0].data["ACQ_START_SECONDS"].raw_value - met = [met, met + 1] # TODO: Remove after cdflib fix + # Extract the data science_values = packets[0].data["DATA"].raw_value @@ -506,8 +548,12 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: # Run the pipeline to create a dataset for the product pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id) pipeline.configure_data_products(apid) - pipeline.unpack_science_data(science_values) + if "_lo_" in pipeline.dataset_name: + pipeline.unpack_lo_science_data(science_values) + elif "_hi_" in pipeline.dataset_name: + pipeline.unpack_hi_science_data(science_values) dataset = pipeline.create_science_dataset(met, data_version) logger.info(f"\nFinal data product:\n{dataset}\n") + return dataset diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py index 383a07b9b..1bef0363a 100644 --- a/imap_processing/tests/codice/test_codice_l1a.py +++ b/imap_processing/tests/codice/test_codice_l1a.py @@ -19,19 +19,19 @@ EXPECTED_ARRAY_SHAPES = [ (99,), # hskp - (1, 128), # hi-counters-aggregated - (1, 128), # hi-counters-singles - (1, 128), # hi-omni - (1, 128), # hi-sectored - (1, 128), # hi-pha - (1, 128), # lo-counters-aggregated - (1, 128), # lo-counters-aggregated - (1, 128), # lo-sw-angular - (1, 128), # lo-nsw-angular - (1, 128), # lo-sw-priority - (1, 128), # lo-nsw-priority - (1, 128), # lo-sw-species - (1, 128), # lo-nsw-species + (1, 6, 1), # hi-counters-aggregated + (1, 16, 1), # hi-counters-singles + (1, 60, 1), # hi-omni + (1, 1152, 1), # hi-sectored + (1, 1), # hi-pha + (1, 36, 128), # lo-counters-aggregated + (1, 144, 128), # lo-counters-aggregated + (1, 60, 128), # lo-sw-angular + (1, 228, 128), # lo-nsw-angular + (1, 12, 128), # lo-sw-priority + (1, 12, 128), # lo-nsw-priority + (1, 1, 128), # lo-sw-species + (1, 1, 128), # lo-nsw-species (1, 128), # lo-pha ] EXPECTED_ARRAY_SIZES = [ @@ -110,9 +110,6 @@ def test_l1a_cdf_filenames(test_l1a_data: xr.Dataset, expected_logical_source: s assert dataset.attrs["Logical_source"] == expected_logical_source -@pytest.mark.xfail( - reason="Currently failing due to cdflib/epoch issue. See https://github.com/MAVENSDC/cdflib/issues/268" -) @pytest.mark.parametrize( "test_l1a_data, expected_shape", list(zip(TEST_PACKETS, EXPECTED_ARRAY_SHAPES)), From 7ae84a46a55ceffcb72891e242448d4b8baf78ef Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 15:01:13 -0600 Subject: [PATCH 05/22] Fixed mypy errors --- imap_processing/codice/codice_l1a.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index f9032930a..c2efd1ee7 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -42,6 +42,7 @@ # TODO: Use new packet_file_to_dataset() function to simplify things # TODO: Determine what should go in event data CDF and how it should be # structured. +# TODO: Make sure CDF attributes match expected nomenclature class CoDICEL1aPipeline: @@ -286,13 +287,15 @@ def unpack_hi_science_data(self, science_values: str) -> None: self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id] # Decompress the binary string - science_values = decompress(science_values, self.compression_algorithm) + science_values_decompressed = decompress( + science_values, self.compression_algorithm + ) # Divide up the data by the number of priorities or species - chunk_size = len(science_values) // self.num_counters + chunk_size = len(science_values_decompressed) // self.num_counters science_values_unpacked = [ - science_values[i : i + chunk_size] - for i in range(0, len(science_values), chunk_size) + science_values_decompressed[i : i + chunk_size] + for i in range(0, len(science_values_decompressed), chunk_size) ] # TODO: Determine how to properly divide up hi data. For now, just use @@ -315,13 +318,15 @@ def unpack_lo_science_data(self, science_values: str) -> None: self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id] # Decompress the binary string - science_values = decompress(science_values, self.compression_algorithm) + science_values_decompressed = decompress( + science_values, self.compression_algorithm + ) # Divide up the data by the number of priorities or species - chunk_size = len(science_values) // self.num_counters + chunk_size = len(science_values_decompressed) // self.num_counters science_values_unpacked = [ - science_values[i : i + chunk_size] - for i in range(0, len(science_values), chunk_size) + science_values_decompressed[i : i + chunk_size] + for i in range(0, len(science_values_decompressed), chunk_size) ] # Further divide up the data by energy levels From dc2602c157ce6a70c5e4086871fc039593285987 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 15:06:01 -0600 Subject: [PATCH 06/22] Fixed mypy errors --- imap_processing/codice/codice_l1a.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index c2efd1ee7..7f036c06c 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -334,7 +334,7 @@ def unpack_lo_science_data(self, science_values: str) -> None: # energy levels self.data = [] for counter_data in science_values_unpacked: - data_array = [ + data_array: list[list[int]] = [ counter_data[i : i + self.num_energy_steps] for i in range(0, len(counter_data), self.num_energy_steps) ] From 953c10c274833baa031fa6a2cf525b49d695290c Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 15:13:55 -0600 Subject: [PATCH 07/22] Fixed mypy errors --- imap_processing/codice/codice_l1a.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index 7f036c06c..f1a0888d5 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -334,11 +334,11 @@ def unpack_lo_science_data(self, science_values: str) -> None: # energy levels self.data = [] for counter_data in science_values_unpacked: - data_array: list[list[int]] = [ + data_array = [ counter_data[i : i + self.num_energy_steps] for i in range(0, len(counter_data), self.num_energy_steps) ] - self.data.append(data_array) + self.data.append(data_array) # type: ignore[arg-type] def create_event_dataset( From 69814dafecae3b5d76bc1c2354d72bbd174a5007 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 15:19:58 -0600 Subject: [PATCH 08/22] Fixed doc build errors --- imap_processing/codice/codice_l1a.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index f1a0888d5..1e857c0e3 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -75,8 +75,10 @@ class CoDICEL1aPipeline: Retrieve the acquisition times via the Lo stepping table. get_esa_sweep_values() Retrieve the ESA sweep values. - unpack_science_data() - Make 4D L1a data product from the decompressed science data. + unpack_hi_science_data() + Decompress, unpack, and restructure CoDICE-Hi data arrays. + unpack_lo_science_data() + Decompress, unpack, and restructure CoDICE-Lo data arrays. """ def __init__(self, table_id: int, plan_id: int, plan_step: int, view_id: int): @@ -274,7 +276,7 @@ def get_esa_sweep_values(self) -> None: def unpack_hi_science_data(self, science_values: str) -> None: """ - Unpack the CoDICE-Hi science data from the packet. + Decompress, unpack, and restructure CoDICE-Hi data arrays. The science data within the packet is a compressed, binary string of values. @@ -304,7 +306,7 @@ def unpack_hi_science_data(self, science_values: str) -> None: def unpack_lo_science_data(self, science_values: str) -> None: """ - Unpack the CoDICE-Lo science data from the packet. + Decompress, unpack, and restructure CoDICE-Lo data arrays. The science data within the packet is a compressed, binary string of values. These data need to be divided up by species or priorities, From 64467beeb0d02a524ff4cb9745cbca7f16146187 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Wed, 21 Aug 2024 09:15:17 -0600 Subject: [PATCH 09/22] Implemented packet_file_to_datasets utility function --- imap_processing/codice/codice_l0.py | 21 +-- imap_processing/codice/codice_l1a.py | 132 ++++++++++-------- imap_processing/codice/constants.py | 19 +++ .../tests/codice/test_codice_l1a.py | 2 +- 4 files changed, 93 insertions(+), 81 deletions(-) diff --git a/imap_processing/codice/codice_l0.py b/imap_processing/codice/codice_l0.py index 4515af8fb..a5cc62c6f 100644 --- a/imap_processing/codice/codice_l0.py +++ b/imap_processing/codice/codice_l0.py @@ -17,6 +17,7 @@ from pathlib import Path from imap_processing import decom, imap_module_directory +from imap_processing.codice import constants def decom_packets(packet_file: Path) -> list: @@ -33,26 +34,8 @@ def decom_packets(packet_file: Path) -> list: list : list All the unpacked data. """ - packet_to_xtce_mapping = { - "imap_codice_l0_hi-counters-aggregated_20240429_v001.pkts": "P_COD_HI_INST_COUNTS_AGGREGATED.xml", # noqa - "imap_codice_l0_hi-counters-singles_20240429_v001.pkts": "P_COD_HI_INST_COUNTS_SINGLES.xml", # noqa - "imap_codice_l0_hi-omni_20240429_v001.pkts": "P_COD_HI_OMNI_SPECIES_COUNTS.xml", - "imap_codice_l0_hi-sectored_20240429_v001.pkts": "P_COD_HI_SECT_SPECIES_COUNTS.xml", # noqa - "imap_codice_l0_hi-pha_20240429_v001.pkts": "P_COD_HI_PHA.xml", - "imap_codice_l0_hskp_20100101_v001.pkts": "P_COD_NHK.xml", - "imap_codice_l0_lo-counters-aggregated_20240429_v001.pkts": "P_COD_LO_INST_COUNTS_AGGREGATED.xml", # noqa - "imap_codice_l0_lo-counters-singles_20240429_v001.pkts": "P_COD_LO_INST_COUNTS_SINGLES.xml", # noqa - "imap_codice_l0_lo-sw-angular_20240429_v001.pkts": "P_COD_LO_SW_ANGULAR_COUNTS.xml", # noqa - "imap_codice_l0_lo-nsw-angular_20240429_v001.pkts": "P_COD_LO_NSW_ANGULAR_COUNTS.xml", # noqa - "imap_codice_l0_lo-sw-priority_20240429_v001.pkts": "P_COD_LO_SW_PRIORITY_COUNTS.xml", # noqa - "imap_codice_l0_lo-nsw-priority_20240429_v001.pkts": "P_COD_LO_NSW_PRIORITY_COUNTS.xml", # noqa - "imap_codice_l0_lo-sw-species_20240429_v001.pkts": "P_COD_LO_SW_SPECIES_COUNTS.xml", # noqa - "imap_codice_l0_lo-nsw-species_20240429_v001.pkts": "P_COD_LO_NSW_SPECIES_COUNTS.xml", # noqa - "imap_codice_l0_lo-pha_20240429_v001.pkts": "P_COD_LO_PHA.xml", - } - xtce_document = Path( - f"{imap_module_directory}/codice/packet_definitions/{packet_to_xtce_mapping[packet_file.name]}" + f"{imap_module_directory}/codice/packet_definitions/{constants.PACKET_TO_XTCE_MAPPING[packet_file.name]}" ) decom_packet_list: list = decom.decom_packets(packet_file, xtce_document) return decom_packet_list diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index 1e857c0e3..602a229da 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -13,23 +13,20 @@ from __future__ import annotations -import collections import logging from pathlib import Path import numpy as np import pandas as pd -import space_packet_parser import xarray as xr from imap_processing import imap_module_directory from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes from imap_processing.cdf.utils import met_to_j2000ns from imap_processing.codice import constants -from imap_processing.codice.codice_l0 import decom_packets from imap_processing.codice.decompress import decompress -from imap_processing.codice.utils import CODICEAPID, add_metadata_to_array -from imap_processing.utils import group_by_apid, sort_by_time +from imap_processing.codice.utils import CODICEAPID +from imap_processing.utils import packet_file_to_datasets logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -39,7 +36,6 @@ # TODO: Add support for decomming multiple APIDs from a single file # TODO: Add these as variables in CDF: SPIN_PERIOD, ST_BIAS_GAIN_MODE, # SW_BIAS_GAIN_MODE, RGFO_HALF_SPIN, NSO_HALF_SPIN, DATA_QUALITY -# TODO: Use new packet_file_to_dataset() function to simplify things # TODO: Determine what should go in event data CDF and how it should be # structured. # TODO: Make sure CDF attributes match expected nomenclature @@ -344,19 +340,17 @@ def unpack_lo_science_data(self, science_values: str) -> None: def create_event_dataset( - met: list[int], event_data: str, dataset_name: str, data_version: str + apid: int, packet: xr.Dataset, data_version: str ) -> xr.Dataset: """ Create dataset for event data. Parameters ---------- - met : list[int] - The Mission Elapsed Time of the data. - event_data : str - A string of binary numbers representing the event data. - dataset_name : str - The name for the dataset. + apid : int + The APID of the packet. + packet : xarray.Dataset + The packet to process. data_version : str Version of the data product being created. @@ -365,6 +359,17 @@ def create_event_dataset( dataset : xarray.Dataset Xarray dataset containing the event data. """ + if apid == CODICEAPID.COD_LO_PHA: + dataset_name = "imap_codice_l1a_lo_pha" + elif apid == CODICEAPID.COD_HI_PHA: + dataset_name = "imap_codice_l1a_hi_pha" + + # Determine the start time of the packet + met = packet.acq_start_seconds.data[0] + + # Extract the data + # event_data = packet.event_data.data (Currently turned off, see TODO) + cdf_attrs = ImapCdfAttributes() cdf_attrs.add_instrument_global_attrs("codice") cdf_attrs.add_instrument_variable_attrs("codice", "l1a") @@ -372,7 +377,7 @@ def create_event_dataset( # Define coordinates epoch = xr.DataArray( - met_to_j2000ns(met), # TODO: Fix after SIT-3 (see note below) + met_to_j2000ns([met]), name="epoch", dims=["epoch"], attrs=cdf_attrs.get_variable_attributes("epoch"), @@ -390,7 +395,7 @@ def create_event_dataset( def create_hskp_dataset( - packets: list[space_packet_parser.parser.Packet], + packet: xr.Dataset, data_version: str, ) -> xr.Dataset: """ @@ -398,8 +403,8 @@ def create_hskp_dataset( Parameters ---------- - packets : list[space_packet_parser.parser.Packet] - The list of packets to process. + packet : xarray.Dataset + The packet to process. data_version : str Version of the data product being created. @@ -413,14 +418,9 @@ def create_hskp_dataset( cdf_attrs.add_instrument_variable_attrs("codice", "l1a") cdf_attrs.add_global_attribute("Data_version", data_version) - metadata_arrays: dict = collections.defaultdict(list) - - for packet in packets: - add_metadata_to_array(packet, metadata_arrays) - epoch = xr.DataArray( met_to_j2000ns( - metadata_arrays["SHCOARSE"], + packet.shcoarse.data, reference_epoch=np.datetime64("2010-01-01T00:01:06.184", "ns"), ), name="epoch", @@ -443,19 +443,21 @@ def create_hskp_dataset( # I am holding off making this change until I acquire updated # housekeeping packets/validation data that match the latest telemetry # definitions - for key, value in metadata_arrays.items(): + for variable in packet: attrs = cdf_attrs.get_variable_attributes("codice_support_attrs") attrs["CATDESC"] = "TBD" attrs["DEPEND_0"] = "epoch" attrs["FIELDNAM"] = "TBD" - attrs["LABLAXIS"] = key + attrs["LABLAXIS"] = variable - dataset[key] = xr.DataArray(value, dims=["epoch"], attrs=attrs) + dataset[variable] = xr.DataArray( + packet[variable].data, dims=["epoch"], attrs=attrs + ) return dataset -def get_params(packet: space_packet_parser.parser.Packet) -> tuple[int, int, int, int]: +def get_params(packet: xr.Dataset) -> tuple[int, int, int, int]: """ Return the four 'main' parameters used for l1a processing. @@ -465,7 +467,7 @@ def get_params(packet: space_packet_parser.parser.Packet) -> tuple[int, int, int Parameters ---------- - packet : space_packet_parser.parser.Packet + packet : xarray.Dataset A packet for the APID of interest. Returns @@ -484,10 +486,10 @@ def get_params(packet: space_packet_parser.parser.Packet) -> tuple[int, int, int view_id : int Provides information about how data was collapsed and/or compressed. """ - table_id = packet.data["TABLE_ID"].raw_value - plan_id = packet.data["PLAN_ID"].raw_value - plan_step = packet.data["PLAN_STEP"].raw_value - view_id = packet.data["VIEW_ID"].raw_value + table_id = packet.table_id.data[0] + plan_id = packet.plan_id.data[0] + plan_step = packet.plan_step.data[0] + view_id = packet.view_id.data[0] return table_id, plan_id, plan_step, view_id @@ -509,48 +511,30 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: The ``xarray`` dataset containing the science data and supporting metadata. """ # Decom the packets, group data by APID, and sort by time - packets = decom_packets(file_path) - grouped_data = group_by_apid(packets) + xtce_packet_definition = Path( + f"{imap_module_directory}/codice/packet_definitions/{constants.PACKET_TO_XTCE_MAPPING[file_path.name]}" + ) + packets = packet_file_to_datasets(file_path, xtce_packet_definition) - for apid in grouped_data: + for apid in packets: + packet = packets[apid] logger.info(f"\nProcessing {CODICEAPID(apid).name} packet") if apid == CODICEAPID.COD_NHK: - packets = grouped_data[apid] - sorted_packets = sort_by_time(packets, "SHCOARSE") - dataset = create_hskp_dataset(sorted_packets, data_version) + dataset = create_hskp_dataset(packet, data_version) elif apid in [CODICEAPID.COD_LO_PHA, CODICEAPID.COD_HI_PHA]: - if apid == CODICEAPID.COD_LO_PHA: - dataset_name = "imap_codice_l1a_lo_pha" - elif apid == CODICEAPID.COD_HI_PHA: - dataset_name = "imap_codice_l1a_hi_pha" - - # Sort the packets by time - packets = sort_by_time(grouped_data[apid], "SHCOARSE") - - # Determine the start time of the packet - met = packets[0].data["ACQ_START_SECONDS"].raw_value - met = [met, met + 1] # TODO: Remove after cdflib fix - - # Extract the data - event_data = packets[0].data["EVENT_DATA"].raw_value - - # Create the dataset - dataset = create_event_dataset(met, event_data, dataset_name, data_version) + dataset = create_event_dataset(apid, packet, data_version) elif apid in constants.APIDS_FOR_SCIENCE_PROCESSING: - # Sort the packets by time - packets = sort_by_time(grouped_data[apid], "SHCOARSE") - # Determine the start time of the packet - met = packets[0].data["ACQ_START_SECONDS"].raw_value + met = packet.acq_start_seconds.data[0] # Extract the data - science_values = packets[0].data["DATA"].raw_value + science_values = packet.data.data[0] # Get the four "main" parameters for processing - table_id, plan_id, plan_step, view_id = get_params(packets[0]) + table_id, plan_id, plan_step, view_id = get_params(packet) # Run the pipeline to create a dataset for the product pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id) @@ -564,3 +548,29 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: logger.info(f"\nFinal data product:\n{dataset}\n") return dataset + + +if __name__ == "__main__": + TEST_DATA_PATH = imap_module_directory / "tests" / "codice" / "data" + + TEST_PACKETS = [ + TEST_DATA_PATH / "imap_codice_l0_hskp_20100101_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_hi-counters-aggregated_20240429_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_hi-counters-singles_20240429_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_hi-omni_20240429_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_hi-sectored_20240429_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_hi-pha_20240429_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_lo-counters-aggregated_20240429_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_lo-counters-singles_20240429_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_lo-sw-angular_20240429_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_lo-nsw-angular_20240429_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_lo-sw-priority_20240429_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_lo-nsw-priority_20240429_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_lo-sw-species_20240429_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_lo-nsw-species_20240429_v001.pkts", + TEST_DATA_PATH / "imap_codice_l0_lo-pha_20240429_v001.pkts", + ] + + for file_path in TEST_PACKETS: + dataset = process_codice_l1a(file_path, "001") + print(dataset) diff --git a/imap_processing/codice/constants.py b/imap_processing/codice/constants.py index 9054431f4..f6aaa41ce 100644 --- a/imap_processing/codice/constants.py +++ b/imap_processing/codice/constants.py @@ -14,9 +14,28 @@ # TODO: What to do in the case of a value of 255 in LOSSY_A and LOSSY_B # compression? +# TODO: Improve PACKET_TO_XTCE_MAPPING to not have hard-coded dates/versions from imap_processing.codice.utils import CODICEAPID, CoDICECompression +PACKET_TO_XTCE_MAPPING = { + "imap_codice_l0_hi-counters-aggregated_20240429_v001.pkts": "P_COD_HI_INST_COUNTS_AGGREGATED.xml", # noqa + "imap_codice_l0_hi-counters-singles_20240429_v001.pkts": "P_COD_HI_INST_COUNTS_SINGLES.xml", # noqa + "imap_codice_l0_hi-omni_20240429_v001.pkts": "P_COD_HI_OMNI_SPECIES_COUNTS.xml", + "imap_codice_l0_hi-sectored_20240429_v001.pkts": "P_COD_HI_SECT_SPECIES_COUNTS.xml", + "imap_codice_l0_hi-pha_20240429_v001.pkts": "P_COD_HI_PHA.xml", + "imap_codice_l0_hskp_20100101_v001.pkts": "P_COD_NHK.xml", + "imap_codice_l0_lo-counters-aggregated_20240429_v001.pkts": "P_COD_LO_INST_COUNTS_AGGREGATED.xml", # noqa + "imap_codice_l0_lo-counters-singles_20240429_v001.pkts": "P_COD_LO_INST_COUNTS_SINGLES.xml", # noqa + "imap_codice_l0_lo-sw-angular_20240429_v001.pkts": "P_COD_LO_SW_ANGULAR_COUNTS.xml", + "imap_codice_l0_lo-nsw-angular_20240429_v001.pkts": "P_COD_LO_NSW_ANGULAR_COUNTS.xml", # noqa + "imap_codice_l0_lo-sw-priority_20240429_v001.pkts": "P_COD_LO_SW_PRIORITY_COUNTS.xml", # noqa + "imap_codice_l0_lo-nsw-priority_20240429_v001.pkts": "P_COD_LO_NSW_PRIORITY_COUNTS.xml", # noqa + "imap_codice_l0_lo-sw-species_20240429_v001.pkts": "P_COD_LO_SW_SPECIES_COUNTS.xml", + "imap_codice_l0_lo-nsw-species_20240429_v001.pkts": "P_COD_LO_NSW_SPECIES_COUNTS.xml", # noqa + "imap_codice_l0_lo-pha_20240429_v001.pkts": "P_COD_LO_PHA.xml", +} + APIDS_FOR_SCIENCE_PROCESSING = [ CODICEAPID.COD_HI_INST_COUNTS_AGGREGATED, CODICEAPID.COD_HI_INST_COUNTS_SINGLES, diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py index 1bef0363a..1883e6fa1 100644 --- a/imap_processing/tests/codice/test_codice_l1a.py +++ b/imap_processing/tests/codice/test_codice_l1a.py @@ -35,7 +35,7 @@ (1, 128), # lo-pha ] EXPECTED_ARRAY_SIZES = [ - 123, # hskp + 129, # hskp 1, # hi-counters-aggregated 3, # hi-counters-singles 8, # hi-omni From 4e9252f2edd01cd892586eab658a8d7656f6fe67 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Wed, 28 Aug 2024 11:50:14 -0600 Subject: [PATCH 10/22] Updated expected array shapes --- .../tests/codice/test_codice_l1a.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py index 1bef0363a..d9fd7207c 100644 --- a/imap_processing/tests/codice/test_codice_l1a.py +++ b/imap_processing/tests/codice/test_codice_l1a.py @@ -19,19 +19,19 @@ EXPECTED_ARRAY_SHAPES = [ (99,), # hskp - (1, 6, 1), # hi-counters-aggregated - (1, 16, 1), # hi-counters-singles - (1, 60, 1), # hi-omni - (1, 1152, 1), # hi-sectored + (1, 1, 6, 1), # hi-counters-aggregated # TODO: Double check with Joey + (1, 1, 16, 1), # hi-counters-singles # TODO: Double check with Joey + (1, 15, 4, 1), # hi-omni # TODO: Double check with Joey + (1, 8, 12, 12), # hi-sectored (1, 1), # hi-pha - (1, 36, 128), # lo-counters-aggregated - (1, 144, 128), # lo-counters-aggregated - (1, 60, 128), # lo-sw-angular - (1, 228, 128), # lo-nsw-angular - (1, 12, 128), # lo-sw-priority - (1, 12, 128), # lo-nsw-priority - (1, 1, 128), # lo-sw-species - (1, 1, 128), # lo-nsw-species + (1, 6, 6, 128), # lo-counters-aggregated + (1, 24, 6, 128), # lo-counters-singles + (1, 5, 12, 128), # lo-sw-angular + (1, 19, 12, 128), # lo-nsw-angular + (1, 1, 12, 128), # lo-sw-priority + (1, 1, 12, 128), # lo-nsw-priority + (1, 1, 1, 128), # lo-sw-species + (1, 1, 1, 128), # lo-nsw-species (1, 128), # lo-pha ] EXPECTED_ARRAY_SIZES = [ From c7a8d09bc97b7571c9f422d2e629521f969e553e Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Wed, 28 Aug 2024 11:51:03 -0600 Subject: [PATCH 11/22] Avoiding bitarray dependency --- imap_processing/codice/decompress.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/imap_processing/codice/decompress.py b/imap_processing/codice/decompress.py index e4abf83ea..bb92a75c4 100644 --- a/imap_processing/codice/decompress.py +++ b/imap_processing/codice/decompress.py @@ -29,8 +29,6 @@ import lzma from enum import IntEnum -import bitarray - from imap_processing.codice.constants import LOSSY_A_TABLE, LOSSY_B_TABLE from imap_processing.codice.utils import CoDICECompression @@ -118,7 +116,9 @@ def decompress(compressed_binary: str, algorithm: IntEnum) -> list[int]: The 24- or 32-bit decompressed values. """ # Convert the binary string to a byte stream - compressed_bytes = bitarray.bitarray(compressed_binary).tobytes() + compressed_bytes = int(compressed_binary, 2).to_bytes( + (len(compressed_binary) + 7) // 8, byteorder="big" + ) # Apply the appropriate decompression algorithm if algorithm == CoDICECompression.NO_COMPRESSION: From 9bfb8169b22bd911b7b3677375719224275dad67 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Wed, 28 Aug 2024 11:52:28 -0600 Subject: [PATCH 12/22] Added instrument config key to make some conditional areas of the processing pipeline a bit more readable; added proper numbers for positions/energies/spin_sectors --- imap_processing/codice/constants.py | 50 +++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/imap_processing/codice/constants.py b/imap_processing/codice/constants.py index 9054431f4..f063087f2 100644 --- a/imap_processing/codice/constants.py +++ b/imap_processing/codice/constants.py @@ -79,87 +79,111 @@ DATA_PRODUCT_CONFIGURATIONS = { CODICEAPID.COD_HI_INST_COUNTS_AGGREGATED: { "num_counters": 1, - "num_energy_steps": 1, - "num_spin_sectors": 6, + "num_energy_steps": 1, # TODO: Double check with Joey + "num_positions": 6, # TODO: Double check with Joey + "num_spin_sectors": 1, "variable_names": HI_INST_COUNTS_AGGREGATED_NAMES, "dataset_name": "imap_codice_l1a_hi_counters_aggregated", + "instrument": "hi", }, CODICEAPID.COD_HI_INST_COUNTS_SINGLES: { "num_counters": 3, - "num_energy_steps": 1, - "num_spin_sectors": 16, + "num_energy_steps": 1, # TODO: Double check with Joey + "num_positions": 16, # TODO: Double check with Joey + "num_spin_sectors": 1, "variable_names": HI_INST_COUNTS_SINGLES_NAMES, "dataset_name": "imap_codice_l1a_hi_counters_singles", + "instrument": "hi", }, CODICEAPID.COD_HI_OMNI_SPECIES_COUNTS: { "num_counters": 8, - "num_energy_steps": 1, - "num_spin_sectors": 60, # TODO: Double-check this + "num_energy_steps": 15, # TODO: Double check with Joey + "num_positions": 4, # TODO: Double check with Joey + "num_spin_sectors": 1, "variable_names": HI_OMNI_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_hi_omni", + "instrument": "hi", }, CODICEAPID.COD_HI_SECT_SPECIES_COUNTS: { "num_counters": 4, - "num_energy_steps": 1, - "num_spin_sectors": 1152, # TODO: Double-check this + "num_energy_steps": 8, + "num_positions": 12, + "num_spin_sectors": 12, "variable_names": HI_SECT_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_hi_sectored", + "instrument": "hi", }, CODICEAPID.COD_LO_INST_COUNTS_AGGREGATED: { "num_counters": 1, "num_energy_steps": 128, - "num_spin_sectors": 36, + "num_positions": 6, + "num_spin_sectors": 6, "variable_names": LO_INST_COUNTS_AGGREGATED_NAMES, "dataset_name": "imap_codice_l1a_lo_counters_aggregated", + "instrument": "lo", }, CODICEAPID.COD_LO_INST_COUNTS_SINGLES: { "num_counters": 1, "num_energy_steps": 128, - "num_spin_sectors": 144, + "num_positions": 24, + "num_spin_sectors": 6, "variable_names": LO_INST_COUNTS_SINGLES_NAMES, "dataset_name": "imap_codice_l1a_lo_counters_singles", + "instrument": "lo", }, CODICEAPID.COD_LO_SW_ANGULAR_COUNTS: { "num_counters": 4, "num_energy_steps": 128, - "num_spin_sectors": 60, + "num_positions": 5, + "num_spin_sectors": 12, "variable_names": LO_SW_ANGULAR_NAMES, "dataset_name": "imap_codice_l1a_lo_sw_angular", + "instrument": "lo", }, CODICEAPID.COD_LO_NSW_ANGULAR_COUNTS: { "num_counters": 1, "num_energy_steps": 128, - "num_spin_sectors": 228, + "num_positions": 19, + "num_spin_sectors": 12, "variable_names": LO_NSW_ANGULAR_NAMES, "dataset_name": "imap_codice_l1a_lo_nsw_angular", + "instrument": "lo", }, CODICEAPID.COD_LO_SW_PRIORITY_COUNTS: { "num_counters": 5, "num_energy_steps": 128, + "num_positions": 1, "num_spin_sectors": 12, "variable_names": LO_SW_PRIORITY_NAMES, "dataset_name": "imap_codice_l1a_lo_sw_priority", + "instrument": "lo", }, CODICEAPID.COD_LO_NSW_PRIORITY_COUNTS: { "num_counters": 2, "num_energy_steps": 128, + "num_positions": 1, "num_spin_sectors": 12, "variable_names": LO_NSW_PRIORITY_NAMES, "dataset_name": "imap_codice_l1a_lo_nsw_priority", + "instrument": "lo", }, CODICEAPID.COD_LO_SW_SPECIES_COUNTS: { "num_counters": 16, "num_energy_steps": 128, + "num_positions": 1, "num_spin_sectors": 1, "variable_names": LO_SW_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_lo_sw_species", + "instrument": "lo", }, CODICEAPID.COD_LO_NSW_SPECIES_COUNTS: { "num_counters": 8, "num_energy_steps": 128, + "num_positions": 1, "num_spin_sectors": 1, "variable_names": LO_NSW_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_lo_nsw_species", + "instrument": "lo", }, } @@ -793,5 +817,5 @@ 252: 6815744, 253: 7340032, 254: 7864320, - 255: 999999, + 255: 9999999, } From c4259100d87ec2741778bf7a1e9bfc602abfd1a5 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Wed, 28 Aug 2024 11:53:02 -0600 Subject: [PATCH 13/22] Added attrs for inst_az coordinate --- .../config/imap_codice_l1a_variable_attrs.yaml | 15 +++++++++++++-- .../config/imap_codice_l1b_variable_attrs.yaml | 11 +++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml index aee0d7acd..9d7a535d9 100644 --- a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml @@ -36,15 +36,26 @@ energy_attrs: VALIDMAX: 127 VAR_TYPE: support_data +inst_az_attrs: + <<: *default + CATDESC: Azimuth + FIELDNAM: Azimuth + FORMAT: I2 + LABLAXIS: Azimuth + UNITS: ' ' + VALIDMIN: 0 + VALIDMAX: 31 + VAR_TYPE: support_data + spin_sector_attrs: <<: *default CATDESC: Spin sector indicating range of spin angles FIELDNAM: Spin sector - FORMAT: I4 + FORMAT: I2 LABLAXIS: spin sector UNITS: ' ' VALIDMIN: 0 - VALIDMAX: 1152 + VALIDMAX: 11 VAR_TYPE: support_data # <=== Labels ===> diff --git a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml index c9de1c451..cbb14205f 100644 --- a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml @@ -32,6 +32,17 @@ energy_attrs: VALIDMAX: 127 VAR_TYPE: support_data +inst_az_attrs: + <<: *default + CATDESC: Azimuth + FIELDNAM: Azimuth + FORMAT: I2 + LABLAXIS: Azimuth + UNITS: ' ' + VALIDMIN: 0 + VALIDMAX: 31 + VAR_TYPE: support_data + spin_sector_attrs: <<: *default CATDESC: Spin sector indicating range of spin angles From a21727c9274994ae5d4959602fa92220594384cd Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Wed, 28 Aug 2024 11:54:49 -0600 Subject: [PATCH 14/22] Added further unpacking of science data to properly restructure data arrays by positions, spin_sectors, and energies --- imap_processing/codice/codice_l1a.py | 116 +++++++++++++-------------- 1 file changed, 55 insertions(+), 61 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index 1e857c0e3..a37671c7b 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -101,8 +101,10 @@ def configure_data_products(self, apid: int) -> None: self.num_counters = config["num_counters"] self.num_energy_steps = config["num_energy_steps"] self.num_spin_sectors = config["num_spin_sectors"] + self.num_positions = config["num_positions"] self.variable_names = config["variable_names"] self.dataset_name = config["dataset_name"] + self.instrument = config["instrument"] def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset: """ @@ -135,6 +137,12 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset dims=["epoch"], attrs=cdf_attrs.get_variable_attributes("epoch"), ) + inst_az = xr.DataArray( + np.arange(self.num_positions), + name="inst_az", + dims=["inst_az"], + attrs=cdf_attrs.get_variable_attributes("inst_az_attrs"), + ) spin_sector = xr.DataArray( np.arange(self.num_spin_sectors), name="spin_sector", @@ -160,6 +168,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset dataset = xr.Dataset( coords={ "epoch": epoch, + "inst_az": inst_az, "spin_sector": spin_sector, "energy": energy_steps, "energy_label": energy_label, @@ -169,21 +178,34 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset # Create a data variable for each counter for variable_data, variable_name in zip(self.data, self.variable_names): - variable_data_arr = np.array(variable_data).reshape( - 1, self.num_spin_sectors, self.num_energy_steps - ) + # Data arrays are structured depending on the instrument + if self.instrument == "lo": + variable_data_arr = np.array(variable_data).reshape( + 1, self.num_positions, self.num_spin_sectors, self.num_energy_steps + ) + dims = ["epoch", "inst_az", "spin_sector", "energy"] + elif self.instrument == "hi": + variable_data_arr = np.array(variable_data).reshape( + 1, self.num_energy_steps, self.num_positions, self.num_spin_sectors + ) + dims = ["epoch", "energy", "inst_az", "spin_sector"] + + # Get the CDF attributes cdf_attrs_key = ( f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}" ) + attrs = cdf_attrs.get_variable_attributes(cdf_attrs_key) + + # Create the CDF data variable dataset[variable_name] = xr.DataArray( variable_data_arr, name=variable_name, - dims=["epoch", "spin_sector", "energy"], - attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key), + dims=dims, + attrs=attrs, ) # Add ESA Sweep Values and acquisition times (lo only) - if "_lo_" in self.dataset_name: + if self.instrument == "lo": self.get_esa_sweep_values() self.get_acquisition_times() dataset["esa_sweep_values"] = xr.DataArray( @@ -274,43 +296,15 @@ def get_esa_sweep_values(self) -> None: sweep_table = sweep_data[sweep_data["table_idx"] == sweep_table_id] self.esa_sweep_values = sweep_table["esa_v"].values - def unpack_hi_science_data(self, science_values: str) -> None: + def unpack_science_data(self, science_values: str) -> None: """ - Decompress, unpack, and restructure CoDICE-Hi data arrays. + Decompress, unpack, and restructure science data arrays. The science data within the packet is a compressed, binary string of - values. - - Parameters - ---------- - science_values : str - A string of binary data representing the science values of the data. - """ - self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id] - - # Decompress the binary string - science_values_decompressed = decompress( - science_values, self.compression_algorithm - ) - - # Divide up the data by the number of priorities or species - chunk_size = len(science_values_decompressed) // self.num_counters - science_values_unpacked = [ - science_values_decompressed[i : i + chunk_size] - for i in range(0, len(science_values_decompressed), chunk_size) - ] - - # TODO: Determine how to properly divide up hi data. For now, just use - # arrays for each counter - self.data = science_values_unpacked - - def unpack_lo_science_data(self, science_values: str) -> None: - """ - Decompress, unpack, and restructure CoDICE-Lo data arrays. - - The science data within the packet is a compressed, binary string of - values. These data need to be divided up by species or priorities, - and re-arranged into 2D arrays representing energy and spin angle. + values. These data need to be divided up by species or priorities (or + what I am calling "counters" as a general term), and re-arranged into + 3D arrays representing spin sectors, positions, and energies (the order + of which depends on the instrument). Parameters ---------- @@ -319,28 +313,31 @@ def unpack_lo_science_data(self, science_values: str) -> None: """ self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id] - # Decompress the binary string + # Decompress the binary string into a list of integers science_values_decompressed = decompress( science_values, self.compression_algorithm ) - # Divide up the data by the number of priorities or species - chunk_size = len(science_values_decompressed) // self.num_counters - science_values_unpacked = [ - science_values_decompressed[i : i + chunk_size] - for i in range(0, len(science_values_decompressed), chunk_size) - ] + # Re-arrange the counter data + # For CoDICE-lo, data are a 3D arrays with a shape representing + # [,,] + if self.instrument == "lo": + self.data = np.array(science_values_decompressed, dtype=np.uint).reshape( + self.num_counters, + self.num_positions, + self.num_spin_sectors, + self.num_energy_steps, + ) - # Further divide up the data by energy levels - # The result is a [12,128] array representing 12 spin angles and 128 - # energy levels - self.data = [] - for counter_data in science_values_unpacked: - data_array = [ - counter_data[i : i + self.num_energy_steps] - for i in range(0, len(counter_data), self.num_energy_steps) - ] - self.data.append(data_array) # type: ignore[arg-type] + # For CoDICE-hi, data are a 3D array with a shape representing + # [,,] + elif self.instrument == "hi": + self.data = np.array(science_values_decompressed, dtype=np.uint).reshape( + self.num_counters, + self.num_energy_steps, + self.num_positions, + self.num_spin_sectors, + ) def create_event_dataset( @@ -555,10 +552,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: # Run the pipeline to create a dataset for the product pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id) pipeline.configure_data_products(apid) - if "_lo_" in pipeline.dataset_name: - pipeline.unpack_lo_science_data(science_values) - elif "_hi_" in pipeline.dataset_name: - pipeline.unpack_hi_science_data(science_values) + pipeline.unpack_science_data(science_values) dataset = pipeline.create_science_dataset(met, data_version) logger.info(f"\nFinal data product:\n{dataset}\n") From 38704daf2690c2db679034c456504d5dae30150c Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Wed, 28 Aug 2024 11:58:21 -0600 Subject: [PATCH 15/22] fixed doc build error --- imap_processing/codice/codice_l1a.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index a37671c7b..08c55ff1e 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -75,10 +75,8 @@ class CoDICEL1aPipeline: Retrieve the acquisition times via the Lo stepping table. get_esa_sweep_values() Retrieve the ESA sweep values. - unpack_hi_science_data() - Decompress, unpack, and restructure CoDICE-Hi data arrays. - unpack_lo_science_data() - Decompress, unpack, and restructure CoDICE-Lo data arrays. + unpack_science_data() + Decompress, unpack, and restructure science data arrays. """ def __init__(self, table_id: int, plan_id: int, plan_step: int, view_id: int): From 6e46d1869e6649033e7200ea86e4182543f47b81 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Fri, 30 Aug 2024 10:44:38 -0600 Subject: [PATCH 16/22] Fixed reshape methods to avoid unexpected argument warning --- imap_processing/codice/codice_l1a.py | 34 ++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index b3411ee49..84758f631 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -175,12 +175,22 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset # Data arrays are structured depending on the instrument if self.instrument == "lo": variable_data_arr = np.array(variable_data).reshape( - 1, self.num_positions, self.num_spin_sectors, self.num_energy_steps + ( + 1, + self.num_positions, + self.num_spin_sectors, + self.num_energy_steps, + ) ) dims = ["epoch", "inst_az", "spin_sector", "energy"] elif self.instrument == "hi": variable_data_arr = np.array(variable_data).reshape( - 1, self.num_energy_steps, self.num_positions, self.num_spin_sectors + ( + 1, + self.num_energy_steps, + self.num_positions, + self.num_spin_sectors, + ) ) dims = ["epoch", "energy", "inst_az", "spin_sector"] @@ -317,20 +327,24 @@ def unpack_science_data(self, science_values: str) -> None: # [,,] if self.instrument == "lo": self.data = np.array(science_values_decompressed, dtype=np.uint32).reshape( - self.num_counters, - self.num_positions, - self.num_spin_sectors, - self.num_energy_steps, + ( + self.num_counters, + self.num_positions, + self.num_spin_sectors, + self.num_energy_steps, + ) ) # For CoDICE-hi, data are a 3D array with a shape representing # [,,] elif self.instrument == "hi": self.data = np.array(science_values_decompressed, dtype=np.uint32).reshape( - self.num_counters, - self.num_energy_steps, - self.num_positions, - self.num_spin_sectors, + ( + self.num_counters, + self.num_energy_steps, + self.num_positions, + self.num_spin_sectors, + ) ) From 907439a33edacf3fc7feb88f679b43b19b0aab42 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Fri, 30 Aug 2024 12:13:37 -0600 Subject: [PATCH 17/22] compression_algorithm variable need not be a class attribute --- imap_processing/codice/codice_l1a.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index 84758f631..d2760f851 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -315,12 +315,10 @@ def unpack_science_data(self, science_values: str) -> None: science_values : str A string of binary data representing the science values of the data. """ - self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id] + compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id] # Decompress the binary string into a list of integers - science_values_decompressed = decompress( - science_values, self.compression_algorithm - ) + science_values_decompressed = decompress(science_values, compression_algorithm) # Re-arrange the counter data # For CoDICE-lo, data are a 3D arrays with a shape representing From 10ba2fa351e075edb0475c2dd8512fec092e431b Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Fri, 30 Aug 2024 14:58:37 -0600 Subject: [PATCH 18/22] Updated L0 code to use new packet_file_to_datasets function --- imap_processing/codice/codice_l0.py | 18 +-- .../tests/codice/test_codice_l0.py | 103 ++++++++---------- 2 files changed, 54 insertions(+), 67 deletions(-) diff --git a/imap_processing/codice/codice_l0.py b/imap_processing/codice/codice_l0.py index a5cc62c6f..8b8c29968 100644 --- a/imap_processing/codice/codice_l0.py +++ b/imap_processing/codice/codice_l0.py @@ -16,11 +16,14 @@ from pathlib import Path -from imap_processing import decom, imap_module_directory +import xarray as xr + +from imap_processing import imap_module_directory from imap_processing.codice import constants +from imap_processing.utils import packet_file_to_datasets -def decom_packets(packet_file: Path) -> list: +def decom_packets(packet_file: Path) -> dict[int, xr.Dataset]: """ Decom CoDICE data packets using CoDICE packet definition. @@ -31,11 +34,12 @@ def decom_packets(packet_file: Path) -> list: Returns ------- - list : list - All the unpacked data. + packets : dict[int, xr.Dataset] + Mapping from apid to ``xarray`` dataset, one dataset per apid. """ - xtce_document = Path( + xtce_packet_definition = Path( f"{imap_module_directory}/codice/packet_definitions/{constants.PACKET_TO_XTCE_MAPPING[packet_file.name]}" ) - decom_packet_list: list = decom.decom_packets(packet_file, xtce_document) - return decom_packet_list + packets = packet_file_to_datasets(packet_file, xtce_packet_definition) + + return packets diff --git a/imap_processing/tests/codice/test_codice_l0.py b/imap_processing/tests/codice/test_codice_l0.py index 6920fdd74..9f99d4f53 100644 --- a/imap_processing/tests/codice/test_codice_l0.py +++ b/imap_processing/tests/codice/test_codice_l0.py @@ -5,22 +5,34 @@ import pandas as pd import pytest -import space_packet_parser +import xarray as xr from imap_processing import imap_module_directory from imap_processing.codice import codice_l0 from imap_processing.codice.codice_l1a import create_hskp_dataset from imap_processing.utils import convert_raw_to_eu +# Define the CCSDS header fields (which will be ignored in these tests)) +CCSDS_HEADER_FIELDS = [ + "shcoarse", + "version", + "type", + "sec_hdr_flg", + "pkt_apid", + "seq_flgs", + "src_seq_ctr", + "pkt_len", +] + @pytest.fixture(scope="session") -def decom_test_data() -> list: - """Read test data from file +def decom_test_data() -> xr.Dataset: + """Read test data from file and return a decommutated housekeeping packet. Returns ------- - data_packet_list : list[space_packet_parser.parser.Packet] - The list of decommutated packets + packet : xr.Dataset + A decommutated housekeeping packet """ packet_file = Path( @@ -28,14 +40,9 @@ def decom_test_data() -> list: f"imap_codice_l0_hskp_20100101_v001.pkts" ) - data_packet_list = codice_l0.decom_packets(packet_file) - data_packet_list = [ - packet - for packet in data_packet_list - if packet.header["PKT_APID"].raw_value == 1136 - ] + packet = codice_l0.decom_packets(packet_file)[1136] - return data_packet_list + return packet @pytest.fixture(scope="session") @@ -63,15 +70,15 @@ def validation_data() -> pd.core.frame.DataFrame: def test_eu_hskp_data( - decom_test_data: list[space_packet_parser.parser.Packet], + decom_test_data: xr.Dataset, validation_data: pd.core.frame.DataFrame, ): """Compare the engineering unit (EU) housekeeping data to the validation data. Parameters ---------- - decom_test_data : list[space_packet_parser.parser.Packet] - The decommutated housekeeping packet data + decom_test_data : xr.Dataset + The decommutated housekeeping packet validation_data : pandas.core.frame.DataFrame The validation data to compare against """ @@ -82,80 +89,56 @@ def test_eu_hskp_data( imap_module_directory / "tests/codice/data/eu_unit_lookup_table.csv", "P_COD_NHK", ) - first_data = decom_test_data[0] - validation_row = validation_data.loc[first_data.data["SHCOARSE"].raw_value] - # Determine the number of CCSDS header fields (7 is standard) - num_ccsds_header_fields = 7 + validation_row = validation_data.loc[decom_test_data.shcoarse] # Compare EU values of housekeeping data, skipping CCSDS header fields - for idx, field in enumerate(eu_hk_data): - # Skip the first num_ccsds_header_fields fields - if idx < num_ccsds_header_fields: - continue - # Skip SHCOARSE - if field == "SHCOARSE": + for field in eu_hk_data: + # Skip header values + if field in CCSDS_HEADER_FIELDS: continue - eu_values = eu_hk_data[field].data - validation_values = validation_row[field] + eu_values = getattr(eu_hk_data, field).data + validation_values = validation_row[field.upper()] # Compare each individual element - for eu_val, validation_val in zip(eu_values, [validation_values]): + for eu_val, validation_val in zip(eu_values, validation_values): assert round(eu_val, 5) == round(validation_val, 5) def test_raw_hskp_data( - decom_test_data: list[space_packet_parser.parser.Packet], + decom_test_data: xr.Dataset, validation_data: pd.core.frame.DataFrame, ): """Compare the raw housekeeping data to the validation data. Parameters ---------- - decom_test_data : list[space_packet_parser.parser.Packet] - The decommutated housekeeping packet data + decom_test_data : xr.Dataset + The decommutated housekeeping packet validation_data : pandas.core.frame.DataFrame The validation data to compare against """ - first_data = decom_test_data[0] - validation_row = validation_data.loc[first_data.data["SHCOARSE"].raw_value] + validation_row = validation_data.loc[decom_test_data.shcoarse] # Compare raw values of housekeeping data - for key, value in first_data.data.items(): - if key == "SHCOARSE": - assert value.raw_value == validation_row.name - continue - assert value.raw_value == validation_row[key] + for field in decom_test_data: + if field not in CCSDS_HEADER_FIELDS: + raw_values = getattr(decom_test_data, field).data + validation_values = validation_row[field.upper()] + for raw_value, validation_value in zip(raw_values, validation_values): + assert raw_value == validation_value -def test_total_packets_in_data_file( - decom_test_data: list[space_packet_parser.parser.Packet], -): +def test_total_packets_in_data_file(decom_test_data: xr.Dataset): """Test if total packets in data file is correct Parameters ---------- - decom_test_data : list[space_packet_parser.parser.Packet] - The decommutated housekeeping packet data + decom_test_data : xr.Dataset + The decommutated housekeeping packet """ total_packets = 99 - assert len(decom_test_data) == total_packets - - -def test_ways_to_get_data(decom_test_data: list[space_packet_parser.parser.Packet]): - """Test if data can be retrieved using different ways - - Parameters - ---------- - decom_test_data : list[space_packet_parser.parser.Packet] - The decommutated housekeeping packet data - """ - - data_value_using_key = decom_test_data[0].data - data_value_using_list = decom_test_data[0][1] - - # Check if data is same - assert data_value_using_key == data_value_using_list + assert len(decom_test_data.epoch) == total_packets From c55c8e03289a42c30c12442fdf1a2a7b0f9c8661 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Fri, 30 Aug 2024 15:04:50 -0600 Subject: [PATCH 19/22] Removed temporary if __name__ == __main__ code --- imap_processing/codice/codice_l1a.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index d2760f851..78a5a626f 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -552,29 +552,3 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: logger.info(f"\nFinal data product:\n{dataset}\n") return dataset - - -if __name__ == "__main__": - TEST_DATA_PATH = imap_module_directory / "tests" / "codice" / "data" - - TEST_PACKETS = [ - TEST_DATA_PATH / "imap_codice_l0_hskp_20100101_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_hi-counters-aggregated_20240429_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_hi-counters-singles_20240429_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_hi-omni_20240429_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_hi-sectored_20240429_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_hi-pha_20240429_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_lo-counters-aggregated_20240429_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_lo-counters-singles_20240429_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_lo-sw-angular_20240429_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_lo-nsw-angular_20240429_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_lo-sw-priority_20240429_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_lo-nsw-priority_20240429_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_lo-sw-species_20240429_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_lo-nsw-species_20240429_v001.pkts", - TEST_DATA_PATH / "imap_codice_l0_lo-pha_20240429_v001.pkts", - ] - - for file_path in TEST_PACKETS: - dataset = process_codice_l1a(file_path, "001") - print(dataset) From 0a19ca288fb165d3f22495308588bac8d54268b5 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Fri, 30 Aug 2024 15:09:27 -0600 Subject: [PATCH 20/22] Fixed typo --- imap_processing/codice/decompress.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imap_processing/codice/decompress.py b/imap_processing/codice/decompress.py index bb92a75c4..9a48b72b2 100644 --- a/imap_processing/codice/decompress.py +++ b/imap_processing/codice/decompress.py @@ -56,7 +56,7 @@ def _apply_lossy_a(compressed_bytes: bytes) -> list[int]: def _apply_lossy_b(compressed_bytes: bytes) -> list[int]: """ - Apply 8-bit to 32-bit Lossy A decompression algorithm. + Apply 8-bit to 32-bit Lossy B decompression algorithm. The Lossy B algorithm uses a lookup table imported into this module. From ba75ec4cb5b6216db99e31da2585abf1c35c6208 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Fri, 30 Aug 2024 15:14:39 -0600 Subject: [PATCH 21/22] Fixed doc build error --- imap_processing/codice/codice_l0.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imap_processing/codice/codice_l0.py b/imap_processing/codice/codice_l0.py index 8b8c29968..6f132d580 100644 --- a/imap_processing/codice/codice_l0.py +++ b/imap_processing/codice/codice_l0.py @@ -34,7 +34,7 @@ def decom_packets(packet_file: Path) -> dict[int, xr.Dataset]: Returns ------- - packets : dict[int, xr.Dataset] + packets : dict[int, xarray.Dataset] Mapping from apid to ``xarray`` dataset, one dataset per apid. """ xtce_packet_definition = Path( From 940348a0dc8a5ad382e8fe385ea0d28fba2b7dd7 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 3 Sep 2024 11:46:16 -0600 Subject: [PATCH 22/22] Addressed review comments --- imap_processing/codice/codice_l1a.py | 34 ++++----- .../tests/codice/test_codice_l1a.py | 74 +++++++++---------- 2 files changed, 50 insertions(+), 58 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index 78a5a626f..f1776a4fa 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -22,7 +22,6 @@ from imap_processing import imap_module_directory from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes -from imap_processing.cdf.utils import met_to_j2000ns from imap_processing.codice import constants from imap_processing.codice.decompress import decompress from imap_processing.codice.utils import CODICEAPID @@ -100,7 +99,9 @@ def configure_data_products(self, apid: int) -> None: self.dataset_name = config["dataset_name"] self.instrument = config["instrument"] - def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset: + def create_science_dataset( + self, packet: xr.Dataset, data_version: str + ) -> xr.Dataset: """ Create an ``xarray`` dataset for the unpacked science data. @@ -108,8 +109,8 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset Parameters ---------- - met : numpy.int64 - The mission elapsed time of the packet, used to determine epoch data. + packet : xarray.Dataset + The packet to process. data_version : str Version of the data product being created. @@ -126,7 +127,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset # Define coordinates epoch = xr.DataArray( - [met_to_j2000ns(met)], + packet.epoch, name="epoch", dims=["epoch"], attrs=cdf_attrs.get_variable_attributes("epoch"), @@ -371,9 +372,6 @@ def create_event_dataset( elif apid == CODICEAPID.COD_HI_PHA: dataset_name = "imap_codice_l1a_hi_pha" - # Determine the start time of the packet - met = packet.acq_start_seconds.data[0] - # Extract the data # event_data = packet.event_data.data (Currently turned off, see TODO) @@ -384,7 +382,7 @@ def create_event_dataset( # Define coordinates epoch = xr.DataArray( - met_to_j2000ns([met]), + packet.epoch, name="epoch", dims=["epoch"], attrs=cdf_attrs.get_variable_attributes("epoch"), @@ -426,10 +424,7 @@ def create_hskp_dataset( cdf_attrs.add_global_attribute("Data_version", data_version) epoch = xr.DataArray( - met_to_j2000ns( - packet.shcoarse.data, - reference_epoch=np.datetime64("2010-01-01T00:01:06.184", "ns"), - ), + packet.epoch, name="epoch", dims=["epoch"], attrs=cdf_attrs.get_variable_attributes("epoch"), @@ -493,10 +488,10 @@ def get_params(packet: xr.Dataset) -> tuple[int, int, int, int]: view_id : int Provides information about how data was collapsed and/or compressed. """ - table_id = packet.table_id.data[0] - plan_id = packet.plan_id.data[0] - plan_step = packet.plan_step.data[0] - view_id = packet.view_id.data[0] + table_id = int(packet.table_id.data) + plan_id = int(packet.plan_id.data) + plan_step = int(packet.plan_step.data) + view_id = int(packet.view_id.data) return table_id, plan_id, plan_step, view_id @@ -534,9 +529,6 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: dataset = create_event_dataset(apid, packet, data_version) elif apid in constants.APIDS_FOR_SCIENCE_PROCESSING: - # Determine the start time of the packet - met = packet.acq_start_seconds.data[0] - # Extract the data science_values = packet.data.data[0] @@ -547,7 +539,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id) pipeline.configure_data_products(apid) pipeline.unpack_science_data(science_values) - dataset = pipeline.create_science_dataset(met, data_version) + dataset = pipeline.create_science_dataset(packet, data_version) logger.info(f"\nFinal data product:\n{dataset}\n") diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py index ff5302b48..1d897d3ec 100644 --- a/imap_processing/tests/codice/test_codice_l1a.py +++ b/imap_processing/tests/codice/test_codice_l1a.py @@ -34,23 +34,6 @@ (1, 1, 1, 128), # lo-nsw-species (1, 128), # lo-pha ] -EXPECTED_ARRAY_SIZES = [ - 129, # hskp - 1, # hi-counters-aggregated - 3, # hi-counters-singles - 8, # hi-omni - 4, # hi-sectored - 0, # hi-pha - 3, # lo-counters-aggregated - 3, # lo-counters-singles - 6, # lo-sw-angular - 3, # lo-nsw-angular - 7, # lo-sw-priority - 4, # lo-nsw-priority - 18, # lo-sw-species - 10, # lo-nsw-species - 0, # lo-pha -] EXPECTED_LOGICAL_SOURCE = [ "imap_codice_l1a_hskp", "imap_codice_l1a_hi-counters-aggregated", @@ -68,6 +51,23 @@ "imap_codice_l1a_lo-nsw-species", "imap_codice_l1a_lo-pha", ] +EXPECTED_NUM_VARIABLES = [ + 129, # hskp + 1, # hi-counters-aggregated + 3, # hi-counters-singles + 8, # hi-omni + 4, # hi-sectored + 0, # hi-pha + 3, # lo-counters-aggregated + 3, # lo-counters-singles + 6, # lo-sw-angular + 3, # lo-nsw-angular + 7, # lo-sw-priority + 4, # lo-nsw-priority + 18, # lo-sw-species + 10, # lo-nsw-species + 0, # lo-pha +] @pytest.fixture(params=TEST_PACKETS) @@ -134,26 +134,6 @@ def test_l1a_data_array_shape(test_l1a_data: xr.Dataset, expected_shape: tuple): assert dataset[variable].data.shape == expected_shape -@pytest.mark.parametrize( - "test_l1a_data, expected_size", - list(zip(TEST_PACKETS, EXPECTED_ARRAY_SIZES)), - indirect=["test_l1a_data"], -) -def test_l1a_data_array_size(test_l1a_data: xr.Dataset, expected_size: int): - """Tests that the data arrays in the generated CDFs have the expected size. - - Parameters - ---------- - test_l1a_data : xarray.Dataset - A ``xarray`` dataset containing the test data - expected_size : int - The expected size of the data array - """ - - dataset = test_l1a_data - assert len(dataset) == expected_size - - @pytest.mark.skip("Awaiting validation data") @pytest.mark.parametrize( "test_l1a_data, validation_data", @@ -185,3 +165,23 @@ def test_l1a_data_array_values(test_l1a_data: xr.Dataset, validation_data: Path) np.testing.assert_array_equal( validation_data[variable].data, generated_dataset[variable].data[0] ) + + +@pytest.mark.parametrize( + "test_l1a_data, expected_num_variables", + list(zip(TEST_PACKETS, EXPECTED_NUM_VARIABLES)), + indirect=["test_l1a_data"], +) +def test_l1a_num_variables(test_l1a_data: xr.Dataset, expected_num_variables: int): + """Tests that the data arrays in the generated CDFs have the expected size. + + Parameters + ---------- + test_l1a_data : xarray.Dataset + A ``xarray`` dataset containing the test data + expected_num_variables : int + The expected number of data variables in the CDF + """ + + dataset = test_l1a_data + assert len(dataset) == expected_num_variables