From 0f2247831aa0ce8cb116b440fd19bf7bbf695551 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 10:49:40 -0600 Subject: [PATCH 01/15] Updated decompression algorithm to read in a binary string instead of individual integer values --- imap_processing/codice/decompress.py | 90 ++++++++++--------- .../tests/codice/test_decompress.py | 31 ++++--- 2 files changed, 63 insertions(+), 58 deletions(-) diff --git a/imap_processing/codice/decompress.py b/imap_processing/codice/decompress.py index bbbad4b6c..e4abf83ea 100644 --- a/imap_processing/codice/decompress.py +++ b/imap_processing/codice/decompress.py @@ -26,18 +26,16 @@ This information was provided via email from Greg Dunn on Oct 23, 2023 """ -# TODO: Add support for performing decompression of a list of values instead of -# a single value - import lzma from enum import IntEnum -from typing import Union + +import bitarray from imap_processing.codice.constants import LOSSY_A_TABLE, LOSSY_B_TABLE from imap_processing.codice.utils import CoDICECompression -def _apply_lossy_a(compressed_value: int) -> int: +def _apply_lossy_a(compressed_bytes: bytes) -> list[int]: """ Apply 8-bit to 32-bit Lossy A decompression algorithm. @@ -45,63 +43,62 @@ def _apply_lossy_a(compressed_value: int) -> int: Parameters ---------- - compressed_value : int - The compressed 8-bit value. + compressed_bytes : bytes + The compressed byte stream. Returns ------- - int - The 24- or 32-bit decompressed value. + decompressed_values : list[int] + The 24- or 32-bit decompressed values. """ - return LOSSY_A_TABLE[compressed_value] + compressed_values = list(compressed_bytes) + decompressed_values = [LOSSY_A_TABLE[item] for item in compressed_values] + return decompressed_values -def _apply_lossy_b(compressed_value: int) -> int: +def _apply_lossy_b(compressed_bytes: bytes) -> list[int]: """ - Apply 8-bit to 32-bit Lossy B decompression algorithm. + Apply 8-bit to 32-bit Lossy A decompression algorithm. The Lossy B algorithm uses a lookup table imported into this module. Parameters ---------- - compressed_value : int - The compressed 8-bit value. + compressed_bytes : bytes + The compressed byte stream. Returns ------- - int - The 24- or 32-bit decompressed value. + decompressed_values : list[int] + The 24- or 32-bit decompressed values. """ - return LOSSY_B_TABLE[compressed_value] + compressed_values = list(compressed_bytes) + decompressed_values = [LOSSY_B_TABLE[item] for item in compressed_values] + return decompressed_values -def _apply_lzma_lossless(compressed_value: Union[int, bytes]) -> int: +def _apply_lzma_lossless(compressed_bytes: bytes) -> bytes: """ Apply LZMA lossless decompression algorithm. Parameters ---------- - compressed_value : int or bytes - The compressed 8-bit value. + compressed_bytes : bytes + The compressed byte stream. Returns ------- - decompressed_value : int - The 24- or 32-bit decompressed value. + lzma_decompressed_values : bytes + The 24- or 32-bit lzma decompressed values. """ - if isinstance(compressed_value, int): - bytes_compressed_value = compressed_value.to_bytes(compressed_value, "big") - else: - bytes_compressed_value = compressed_value - decompressed_value = lzma.decompress(bytes_compressed_value) - decompressed_value_int = int.from_bytes(decompressed_value, byteorder="big") + lzma_decompressed_values = lzma.decompress(compressed_bytes) - return decompressed_value_int + return lzma_decompressed_values -def decompress(compressed_value: int, algorithm: IntEnum) -> int: +def decompress(compressed_binary: str, algorithm: IntEnum) -> list[int]: """ - Will decompress the value. + Perform decompression on a binary string into a list of integers. Apply the appropriate decompression algorithm(s) based on the value of the ``algorithm`` attribute. One or more individual algorithms may be @@ -109,32 +106,37 @@ def decompress(compressed_value: int, algorithm: IntEnum) -> int: Parameters ---------- - compressed_value : int - The 8-bit compressed value to decompress. + compressed_binary : str + The compressed binary string. algorithm : int The algorithm to apply. Supported algorithms are provided in the ``codice_utils.CoDICECompression`` class. Returns ------- - decompressed_value : int - The 24- or 32-bit decompressed value. + decompressed_values : list[int] + The 24- or 32-bit decompressed values. """ + # Convert the binary string to a byte stream + compressed_bytes = bitarray.bitarray(compressed_binary).tobytes() + + # Apply the appropriate decompression algorithm if algorithm == CoDICECompression.NO_COMPRESSION: - decompressed_value = compressed_value + decompressed_values = list(compressed_bytes) elif algorithm == CoDICECompression.LOSSY_A: - decompressed_value = _apply_lossy_a(compressed_value) + decompressed_values = _apply_lossy_a(compressed_bytes) elif algorithm == CoDICECompression.LOSSY_B: - decompressed_value = _apply_lossy_b(compressed_value) + decompressed_values = _apply_lossy_b(compressed_bytes) elif algorithm == CoDICECompression.LOSSLESS: - decompressed_value = _apply_lzma_lossless(compressed_value) + decompressed_bytes = _apply_lzma_lossless(compressed_bytes) + decompressed_values = list(decompressed_bytes) elif algorithm == CoDICECompression.LOSSY_A_LOSSLESS: - decompressed_value = _apply_lzma_lossless(compressed_value) - decompressed_value = _apply_lossy_a(decompressed_value) + decompressed_bytes = _apply_lzma_lossless(compressed_bytes) + decompressed_values = _apply_lossy_a(decompressed_bytes) elif algorithm == CoDICECompression.LOSSY_B_LOSSLESS: - decompressed_value = _apply_lzma_lossless(compressed_value) - decompressed_value = _apply_lossy_b(decompressed_value) + decompressed_bytes = _apply_lzma_lossless(compressed_bytes) + decompressed_values = _apply_lossy_b(decompressed_bytes) else: raise ValueError(f"{algorithm} is not supported") - return decompressed_value + return decompressed_values diff --git a/imap_processing/tests/codice/test_decompress.py b/imap_processing/tests/codice/test_decompress.py index e74f60d73..853a94ccc 100644 --- a/imap_processing/tests/codice/test_decompress.py +++ b/imap_processing/tests/codice/test_decompress.py @@ -9,34 +9,37 @@ from imap_processing.codice.utils import CoDICECompression # Test the algorithms using input value of 234 (picked randomly) -LZMA_EXAMPLE = lzma.compress((234).to_bytes(1, byteorder="big")) +lzma_bytes = lzma.compress((234).to_bytes(1, byteorder="big")) +LZMA_EXAMPLE = "".join(format(byte, "08b") for byte in lzma_bytes) TEST_DATA = [ - (234, CoDICECompression.NO_COMPRESSION, 234), - (234, CoDICECompression.LOSSY_A, 221184), - (234, CoDICECompression.LOSSY_B, 1441792), - (LZMA_EXAMPLE, CoDICECompression.LOSSLESS, 234), - (LZMA_EXAMPLE, CoDICECompression.LOSSY_A_LOSSLESS, 221184), - (LZMA_EXAMPLE, CoDICECompression.LOSSY_B_LOSSLESS, 1441792), + ("11101010", CoDICECompression.NO_COMPRESSION, [234]), + ("11101010", CoDICECompression.LOSSY_A, [221184]), + ("11101010", CoDICECompression.LOSSY_B, [1441792]), + (LZMA_EXAMPLE, CoDICECompression.LOSSLESS, [234]), + (LZMA_EXAMPLE, CoDICECompression.LOSSY_A_LOSSLESS, [221184]), + (LZMA_EXAMPLE, CoDICECompression.LOSSY_B_LOSSLESS, [1441792]), ] @pytest.mark.parametrize( - ("compressed_value", "algorithm", "expected_result"), TEST_DATA + ("compressed_binary", "algorithm", "expected_result"), TEST_DATA ) -def test_decompress(compressed_value: int, algorithm: IntEnum, expected_result: int): +def test_decompress( + compressed_binary: str, algorithm: IntEnum, expected_result: list[int] +): """Tests the ``decompress`` function Parameters ---------- - compressed_value : int - The compressed value to test decompression on + compressed_binary : str + The compressed binary string to test decompression on algorithm : IntEnum The algorithm to use in decompression - expected_result : int + expected_result : list[int] The expected, decompressed value """ - decompressed_value = decompress(compressed_value, algorithm) + decompressed_value = decompress(compressed_binary, algorithm) assert decompressed_value == expected_result @@ -44,4 +47,4 @@ def test_decompress_raises(): """Tests that the ``decompress`` function raises with an unknown algorithm""" with pytest.raises(ValueError, match="some_unsupported_algorithm"): - decompress(234, "some_unsupported_algorithm") + decompress("11101010", "some_unsupported_algorithm") From caf55ec3427fa3c13abea74292ebb7bbd1154a3f Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 10:51:20 -0600 Subject: [PATCH 02/15] Removed collapse table lookup, as the info needed for these is instead "hard coded" into the configuration dictionary; Added spin sector config variable --- imap_processing/codice/constants.py | 44 ++++++++++++++--------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/imap_processing/codice/constants.py b/imap_processing/codice/constants.py index a8bcf3503..9054431f4 100644 --- a/imap_processing/codice/constants.py +++ b/imap_processing/codice/constants.py @@ -12,6 +12,9 @@ ESA = ElectroStatic Analyzer """ +# TODO: What to do in the case of a value of 255 in LOSSY_A and LOSSY_B +# compression? + from imap_processing.codice.utils import CODICEAPID, CoDICECompression APIDS_FOR_SCIENCE_PROCESSING = [ @@ -76,73 +79,85 @@ DATA_PRODUCT_CONFIGURATIONS = { CODICEAPID.COD_HI_INST_COUNTS_AGGREGATED: { "num_counters": 1, - "num_energy_steps": 128, + "num_energy_steps": 1, + "num_spin_sectors": 6, "variable_names": HI_INST_COUNTS_AGGREGATED_NAMES, "dataset_name": "imap_codice_l1a_hi_counters_aggregated", }, CODICEAPID.COD_HI_INST_COUNTS_SINGLES: { "num_counters": 3, - "num_energy_steps": 128, + "num_energy_steps": 1, + "num_spin_sectors": 16, "variable_names": HI_INST_COUNTS_SINGLES_NAMES, "dataset_name": "imap_codice_l1a_hi_counters_singles", }, CODICEAPID.COD_HI_OMNI_SPECIES_COUNTS: { "num_counters": 8, - "num_energy_steps": 128, + "num_energy_steps": 1, + "num_spin_sectors": 60, # TODO: Double-check this "variable_names": HI_OMNI_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_hi_omni", }, CODICEAPID.COD_HI_SECT_SPECIES_COUNTS: { "num_counters": 4, - "num_energy_steps": 128, + "num_energy_steps": 1, + "num_spin_sectors": 1152, # TODO: Double-check this "variable_names": HI_SECT_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_hi_sectored", }, CODICEAPID.COD_LO_INST_COUNTS_AGGREGATED: { "num_counters": 1, "num_energy_steps": 128, + "num_spin_sectors": 36, "variable_names": LO_INST_COUNTS_AGGREGATED_NAMES, "dataset_name": "imap_codice_l1a_lo_counters_aggregated", }, CODICEAPID.COD_LO_INST_COUNTS_SINGLES: { "num_counters": 1, "num_energy_steps": 128, + "num_spin_sectors": 144, "variable_names": LO_INST_COUNTS_SINGLES_NAMES, "dataset_name": "imap_codice_l1a_lo_counters_singles", }, CODICEAPID.COD_LO_SW_ANGULAR_COUNTS: { "num_counters": 4, "num_energy_steps": 128, + "num_spin_sectors": 60, "variable_names": LO_SW_ANGULAR_NAMES, "dataset_name": "imap_codice_l1a_lo_sw_angular", }, CODICEAPID.COD_LO_NSW_ANGULAR_COUNTS: { "num_counters": 1, "num_energy_steps": 128, + "num_spin_sectors": 228, "variable_names": LO_NSW_ANGULAR_NAMES, "dataset_name": "imap_codice_l1a_lo_nsw_angular", }, CODICEAPID.COD_LO_SW_PRIORITY_COUNTS: { "num_counters": 5, "num_energy_steps": 128, + "num_spin_sectors": 12, "variable_names": LO_SW_PRIORITY_NAMES, "dataset_name": "imap_codice_l1a_lo_sw_priority", }, CODICEAPID.COD_LO_NSW_PRIORITY_COUNTS: { "num_counters": 2, "num_energy_steps": 128, + "num_spin_sectors": 12, "variable_names": LO_NSW_PRIORITY_NAMES, "dataset_name": "imap_codice_l1a_lo_nsw_priority", }, CODICEAPID.COD_LO_SW_SPECIES_COUNTS: { "num_counters": 16, "num_energy_steps": 128, + "num_spin_sectors": 1, "variable_names": LO_SW_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_lo_sw_species", }, CODICEAPID.COD_LO_NSW_SPECIES_COUNTS: { "num_counters": 8, "num_energy_steps": 128, + "num_spin_sectors": 1, "variable_names": LO_NSW_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_lo_nsw_species", }, @@ -179,25 +194,6 @@ 9: CoDICECompression.LOSSY_A_LOSSLESS, } -# Collapse table ID lookup table for Lo data products -# The key is the view_id and the value is the ID for the collapse table -LO_COLLAPSE_TABLE_ID_LOOKUP = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8} - -# Collapse table ID lookup table for Hi data products -# The key is the view_id and the value is the ID for the collapse table -Hi_COLLAPSE_TABLE_ID_LOOKUP = { - 0: 8, - 1: 9, - 2: 10, - 3: 0, - 4: 1, - 5: 2, - 6: 4, - 7: 5, - 8: 6, - 9: 7, -} - # ESA Sweep table ID lookup table # The combination of plan_id and plan_step determine the ESA sweep Table to use # Currently, ESA sweep table 0 is used for every plan_id/plan_step combination, @@ -538,6 +534,7 @@ 252: 475136, 253: 491520, 254: 507904, + 255: 999999, } LOSSY_B_TABLE = { @@ -796,4 +793,5 @@ 252: 6815744, 253: 7340032, 254: 7864320, + 255: 999999, } From bb9cb3060d085ff46ed5d5e32c762747b4ddf86b Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 10:53:24 -0600 Subject: [PATCH 03/15] Added spin sector attribute definition --- .../cdf/config/imap_codice_l1a_variable_attrs.yaml | 11 +++++++++++ .../cdf/config/imap_codice_l1b_variable_attrs.yaml | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml index 54701c71c..aee0d7acd 100644 --- a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml @@ -36,6 +36,17 @@ energy_attrs: VALIDMAX: 127 VAR_TYPE: support_data +spin_sector_attrs: + <<: *default + CATDESC: Spin sector indicating range of spin angles + FIELDNAM: Spin sector + FORMAT: I4 + LABLAXIS: spin sector + UNITS: ' ' + VALIDMIN: 0 + VALIDMAX: 1152 + VAR_TYPE: support_data + # <=== Labels ===> energy_label: CATDESC: Energy per charge (E/q) sweeping step diff --git a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml index 1d5d44eb5..c9de1c451 100644 --- a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml @@ -32,6 +32,17 @@ energy_attrs: VALIDMAX: 127 VAR_TYPE: support_data +spin_sector_attrs: + <<: *default + CATDESC: Spin sector indicating range of spin angles + FIELDNAM: Spin sector + FORMAT: I4 + LABLAXIS: spin sector + UNITS: ' ' + VALIDMIN: 0 + VALIDMAX: 1152 + VAR_TYPE: support_data + # <=== Labels ===> energy_label: CATDESC: Energy per charge (E/q) sweeping step From 6f81befdb4261a61f7ac826095e527137023b1c1 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 10:57:28 -0600 Subject: [PATCH 04/15] Updated code to more accurately unpack science data --- imap_processing/codice/codice_l1a.py | 124 ++++++++++++------ .../tests/codice/test_codice_l1a.py | 29 ++-- 2 files changed, 98 insertions(+), 55 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index 0c925fee7..f9032930a 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -27,16 +27,21 @@ from imap_processing.cdf.utils import met_to_j2000ns from imap_processing.codice import constants from imap_processing.codice.codice_l0 import decom_packets +from imap_processing.codice.decompress import decompress from imap_processing.codice.utils import CODICEAPID, add_metadata_to_array from imap_processing.utils import group_by_apid, sort_by_time logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -# TODO: Decom data arrays need to be decompressed # TODO: In decommutation, how to have a variable length data and then a checksum # after it? (Might be fixed with new XTCE script updates) # TODO: Add support for decomming multiple APIDs from a single file +# TODO: Add these as variables in CDF: SPIN_PERIOD, ST_BIAS_GAIN_MODE, +# SW_BIAS_GAIN_MODE, RGFO_HALF_SPIN, NSO_HALF_SPIN, DATA_QUALITY +# TODO: Use new packet_file_to_dataset() function to simplify things +# TODO: Determine what should go in event data CDF and how it should be +# structured. class CoDICEL1aPipeline: @@ -92,6 +97,7 @@ def configure_data_products(self, apid: int) -> None: config = constants.DATA_PRODUCT_CONFIGURATIONS.get(apid) # type: ignore[call-overload] self.num_counters = config["num_counters"] self.num_energy_steps = config["num_energy_steps"] + self.num_spin_sectors = config["num_spin_sectors"] self.variable_names = config["variable_names"] self.dataset_name = config["dataset_name"] @@ -121,11 +127,17 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset # Define coordinates epoch = xr.DataArray( - met_to_j2000ns(met), # TODO: Fix after SIT-3 (see note below) + [met_to_j2000ns(met)], name="epoch", dims=["epoch"], attrs=cdf_attrs.get_variable_attributes("epoch"), ) + spin_sector = xr.DataArray( + np.arange(self.num_spin_sectors), + name="spin_sector", + dims=["spin_sector"], + attrs=cdf_attrs.get_variable_attributes("spin_sector_attrs"), + ) energy_steps = xr.DataArray( np.arange(self.num_energy_steps), name="energy", @@ -145,6 +157,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset dataset = xr.Dataset( coords={ "epoch": epoch, + "spin_sector": spin_sector, "energy": energy_steps, "energy_label": energy_label, }, @@ -153,12 +166,8 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset # Create a data variable for each counter for variable_data, variable_name in zip(self.data, self.variable_names): - # TODO: Currently, cdflib doesn't properly write/read CDF files that - # have a single epoch value. To get around this for now, use - # two epoch values and reshape accordingly. Revisit this after - # SIT-3. See https://github.com/MAVENSDC/cdflib/issues/268 - variable_data_arr = np.array(list(variable_data) * 2, dtype=int).reshape( - 2, self.num_energy_steps + variable_data_arr = np.array(variable_data).reshape( + 1, self.num_spin_sectors, self.num_energy_steps ) cdf_attrs_key = ( f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}" @@ -166,7 +175,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset dataset[variable_name] = xr.DataArray( variable_data_arr, name=variable_name, - dims=["epoch", "energy"], + dims=["epoch", "spin_sector", "energy"], attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key), ) @@ -262,14 +271,41 @@ def get_esa_sweep_values(self) -> None: sweep_table = sweep_data[sweep_data["table_idx"] == sweep_table_id] self.esa_sweep_values = sweep_table["esa_v"].values - def unpack_science_data(self, science_values: str) -> None: + def unpack_hi_science_data(self, science_values: str) -> None: """ - Unpack the science data from the packet. + Unpack the CoDICE-Hi science data from the packet. - For LO SW Species Counts data, the science data within the packet is a - blob of compressed values of length 2048 bits (16 species * 128 energy - levels). These data need to be divided up by species so that each - species can have their own data variable in the L1A CDF file. + The science data within the packet is a compressed, binary string of + values. + + Parameters + ---------- + science_values : str + A string of binary data representing the science values of the data. + """ + self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id] + + # Decompress the binary string + science_values = decompress(science_values, self.compression_algorithm) + + # Divide up the data by the number of priorities or species + chunk_size = len(science_values) // self.num_counters + science_values_unpacked = [ + science_values[i : i + chunk_size] + for i in range(0, len(science_values), chunk_size) + ] + + # TODO: Determine how to properly divide up hi data. For now, just use + # arrays for each counter + self.data = science_values_unpacked + + def unpack_lo_science_data(self, science_values: str) -> None: + """ + Unpack the CoDICE-Lo science data from the packet. + + The science data within the packet is a compressed, binary string of + values. These data need to be divided up by species or priorities, + and re-arranged into 2D arrays representing energy and spin angle. Parameters ---------- @@ -277,18 +313,27 @@ def unpack_science_data(self, science_values: str) -> None: A string of binary data representing the science values of the data. """ self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id] - self.collapse_table_id = constants.LO_COLLAPSE_TABLE_ID_LOOKUP[self.view_id] - # TODO: Turn this back on after SIT-3 - # For SIT-3, just create appropriate length data arrays of all ones + # Decompress the binary string + science_values = decompress(science_values, self.compression_algorithm) + # Divide up the data by the number of priorities or species - # science_values = packets[0].data["DATA"].raw_value - # num_bits = len(science_values) - # chunk_size = len(science_values) // self.num_counters - # self.data = [ - # science_values[i : i + chunk_size] for i in range(0, num_bits, chunk_size) - # ] - self.data = [["1"] * 128] * self.num_counters + chunk_size = len(science_values) // self.num_counters + science_values_unpacked = [ + science_values[i : i + chunk_size] + for i in range(0, len(science_values), chunk_size) + ] + + # Further divide up the data by energy levels + # The result is a [12,128] array representing 12 spin angles and 128 + # energy levels + self.data = [] + for counter_data in science_values_unpacked: + data_array = [ + counter_data[i : i + self.num_energy_steps] + for i in range(0, len(counter_data), self.num_energy_steps) + ] + self.data.append(data_array) def create_event_dataset( @@ -334,9 +379,6 @@ def create_event_dataset( attrs=cdf_attrs.get_global_attributes(dataset_name), ) - # TODO: Determine what should go in event data CDF and how it should be - # structured. - return dataset @@ -385,13 +427,15 @@ def create_hskp_dataset( ) # TODO: Change 'TBD' catdesc and fieldname - # Once housekeeping packet definition file is re-generated with updated - # version of space_packet_parser, can get fieldname and catdesc info via: - # for key, value in (packet.header | packet.data).items(): - # fieldname = value.short_description - # catdesc = value.long_description - # I am holding off making this change until I acquire updated housekeeping - # packets/validation data that match the latest telemetry definitions + # Once housekeeping packet definition file is re-generated with + # updated version of space_packet_parser, can get fieldname and + # catdesc info via: + # for key, value in (packet.header | packet.data).items(): + # fieldname = value.short_description + # catdesc = value.long_description + # I am holding off making this change until I acquire updated + # housekeeping packets/validation data that match the latest telemetry + # definitions for key, value in metadata_arrays.items(): attrs = cdf_attrs.get_variable_attributes("codice_support_attrs") attrs["CATDESC"] = "TBD" @@ -457,8 +501,6 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: dataset : xarray.Dataset The ``xarray`` dataset containing the science data and supporting metadata. """ - # TODO: Use new packet_file_to_dataset() function to simplify things - # Decom the packets, group data by APID, and sort by time packets = decom_packets(file_path) grouped_data = group_by_apid(packets) @@ -496,7 +538,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: # Determine the start time of the packet met = packets[0].data["ACQ_START_SECONDS"].raw_value - met = [met, met + 1] # TODO: Remove after cdflib fix + # Extract the data science_values = packets[0].data["DATA"].raw_value @@ -506,8 +548,12 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: # Run the pipeline to create a dataset for the product pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id) pipeline.configure_data_products(apid) - pipeline.unpack_science_data(science_values) + if "_lo_" in pipeline.dataset_name: + pipeline.unpack_lo_science_data(science_values) + elif "_hi_" in pipeline.dataset_name: + pipeline.unpack_hi_science_data(science_values) dataset = pipeline.create_science_dataset(met, data_version) logger.info(f"\nFinal data product:\n{dataset}\n") + return dataset diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py index 383a07b9b..1bef0363a 100644 --- a/imap_processing/tests/codice/test_codice_l1a.py +++ b/imap_processing/tests/codice/test_codice_l1a.py @@ -19,19 +19,19 @@ EXPECTED_ARRAY_SHAPES = [ (99,), # hskp - (1, 128), # hi-counters-aggregated - (1, 128), # hi-counters-singles - (1, 128), # hi-omni - (1, 128), # hi-sectored - (1, 128), # hi-pha - (1, 128), # lo-counters-aggregated - (1, 128), # lo-counters-aggregated - (1, 128), # lo-sw-angular - (1, 128), # lo-nsw-angular - (1, 128), # lo-sw-priority - (1, 128), # lo-nsw-priority - (1, 128), # lo-sw-species - (1, 128), # lo-nsw-species + (1, 6, 1), # hi-counters-aggregated + (1, 16, 1), # hi-counters-singles + (1, 60, 1), # hi-omni + (1, 1152, 1), # hi-sectored + (1, 1), # hi-pha + (1, 36, 128), # lo-counters-aggregated + (1, 144, 128), # lo-counters-aggregated + (1, 60, 128), # lo-sw-angular + (1, 228, 128), # lo-nsw-angular + (1, 12, 128), # lo-sw-priority + (1, 12, 128), # lo-nsw-priority + (1, 1, 128), # lo-sw-species + (1, 1, 128), # lo-nsw-species (1, 128), # lo-pha ] EXPECTED_ARRAY_SIZES = [ @@ -110,9 +110,6 @@ def test_l1a_cdf_filenames(test_l1a_data: xr.Dataset, expected_logical_source: s assert dataset.attrs["Logical_source"] == expected_logical_source -@pytest.mark.xfail( - reason="Currently failing due to cdflib/epoch issue. See https://github.com/MAVENSDC/cdflib/issues/268" -) @pytest.mark.parametrize( "test_l1a_data, expected_shape", list(zip(TEST_PACKETS, EXPECTED_ARRAY_SHAPES)), From 7ae84a46a55ceffcb72891e242448d4b8baf78ef Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 15:01:13 -0600 Subject: [PATCH 05/15] Fixed mypy errors --- imap_processing/codice/codice_l1a.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index f9032930a..c2efd1ee7 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -42,6 +42,7 @@ # TODO: Use new packet_file_to_dataset() function to simplify things # TODO: Determine what should go in event data CDF and how it should be # structured. +# TODO: Make sure CDF attributes match expected nomenclature class CoDICEL1aPipeline: @@ -286,13 +287,15 @@ def unpack_hi_science_data(self, science_values: str) -> None: self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id] # Decompress the binary string - science_values = decompress(science_values, self.compression_algorithm) + science_values_decompressed = decompress( + science_values, self.compression_algorithm + ) # Divide up the data by the number of priorities or species - chunk_size = len(science_values) // self.num_counters + chunk_size = len(science_values_decompressed) // self.num_counters science_values_unpacked = [ - science_values[i : i + chunk_size] - for i in range(0, len(science_values), chunk_size) + science_values_decompressed[i : i + chunk_size] + for i in range(0, len(science_values_decompressed), chunk_size) ] # TODO: Determine how to properly divide up hi data. For now, just use @@ -315,13 +318,15 @@ def unpack_lo_science_data(self, science_values: str) -> None: self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id] # Decompress the binary string - science_values = decompress(science_values, self.compression_algorithm) + science_values_decompressed = decompress( + science_values, self.compression_algorithm + ) # Divide up the data by the number of priorities or species - chunk_size = len(science_values) // self.num_counters + chunk_size = len(science_values_decompressed) // self.num_counters science_values_unpacked = [ - science_values[i : i + chunk_size] - for i in range(0, len(science_values), chunk_size) + science_values_decompressed[i : i + chunk_size] + for i in range(0, len(science_values_decompressed), chunk_size) ] # Further divide up the data by energy levels From dc2602c157ce6a70c5e4086871fc039593285987 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 15:06:01 -0600 Subject: [PATCH 06/15] Fixed mypy errors --- imap_processing/codice/codice_l1a.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index c2efd1ee7..7f036c06c 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -334,7 +334,7 @@ def unpack_lo_science_data(self, science_values: str) -> None: # energy levels self.data = [] for counter_data in science_values_unpacked: - data_array = [ + data_array: list[list[int]] = [ counter_data[i : i + self.num_energy_steps] for i in range(0, len(counter_data), self.num_energy_steps) ] From 953c10c274833baa031fa6a2cf525b49d695290c Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 15:13:55 -0600 Subject: [PATCH 07/15] Fixed mypy errors --- imap_processing/codice/codice_l1a.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index 7f036c06c..f1a0888d5 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -334,11 +334,11 @@ def unpack_lo_science_data(self, science_values: str) -> None: # energy levels self.data = [] for counter_data in science_values_unpacked: - data_array: list[list[int]] = [ + data_array = [ counter_data[i : i + self.num_energy_steps] for i in range(0, len(counter_data), self.num_energy_steps) ] - self.data.append(data_array) + self.data.append(data_array) # type: ignore[arg-type] def create_event_dataset( From 69814dafecae3b5d76bc1c2354d72bbd174a5007 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 15:19:58 -0600 Subject: [PATCH 08/15] Fixed doc build errors --- imap_processing/codice/codice_l1a.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index f1a0888d5..1e857c0e3 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -75,8 +75,10 @@ class CoDICEL1aPipeline: Retrieve the acquisition times via the Lo stepping table. get_esa_sweep_values() Retrieve the ESA sweep values. - unpack_science_data() - Make 4D L1a data product from the decompressed science data. + unpack_hi_science_data() + Decompress, unpack, and restructure CoDICE-Hi data arrays. + unpack_lo_science_data() + Decompress, unpack, and restructure CoDICE-Lo data arrays. """ def __init__(self, table_id: int, plan_id: int, plan_step: int, view_id: int): @@ -274,7 +276,7 @@ def get_esa_sweep_values(self) -> None: def unpack_hi_science_data(self, science_values: str) -> None: """ - Unpack the CoDICE-Hi science data from the packet. + Decompress, unpack, and restructure CoDICE-Hi data arrays. The science data within the packet is a compressed, binary string of values. @@ -304,7 +306,7 @@ def unpack_hi_science_data(self, science_values: str) -> None: def unpack_lo_science_data(self, science_values: str) -> None: """ - Unpack the CoDICE-Lo science data from the packet. + Decompress, unpack, and restructure CoDICE-Lo data arrays. The science data within the packet is a compressed, binary string of values. These data need to be divided up by species or priorities, From 4e9252f2edd01cd892586eab658a8d7656f6fe67 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Wed, 28 Aug 2024 11:50:14 -0600 Subject: [PATCH 09/15] Updated expected array shapes --- .../tests/codice/test_codice_l1a.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py index 1bef0363a..d9fd7207c 100644 --- a/imap_processing/tests/codice/test_codice_l1a.py +++ b/imap_processing/tests/codice/test_codice_l1a.py @@ -19,19 +19,19 @@ EXPECTED_ARRAY_SHAPES = [ (99,), # hskp - (1, 6, 1), # hi-counters-aggregated - (1, 16, 1), # hi-counters-singles - (1, 60, 1), # hi-omni - (1, 1152, 1), # hi-sectored + (1, 1, 6, 1), # hi-counters-aggregated # TODO: Double check with Joey + (1, 1, 16, 1), # hi-counters-singles # TODO: Double check with Joey + (1, 15, 4, 1), # hi-omni # TODO: Double check with Joey + (1, 8, 12, 12), # hi-sectored (1, 1), # hi-pha - (1, 36, 128), # lo-counters-aggregated - (1, 144, 128), # lo-counters-aggregated - (1, 60, 128), # lo-sw-angular - (1, 228, 128), # lo-nsw-angular - (1, 12, 128), # lo-sw-priority - (1, 12, 128), # lo-nsw-priority - (1, 1, 128), # lo-sw-species - (1, 1, 128), # lo-nsw-species + (1, 6, 6, 128), # lo-counters-aggregated + (1, 24, 6, 128), # lo-counters-singles + (1, 5, 12, 128), # lo-sw-angular + (1, 19, 12, 128), # lo-nsw-angular + (1, 1, 12, 128), # lo-sw-priority + (1, 1, 12, 128), # lo-nsw-priority + (1, 1, 1, 128), # lo-sw-species + (1, 1, 1, 128), # lo-nsw-species (1, 128), # lo-pha ] EXPECTED_ARRAY_SIZES = [ From c7a8d09bc97b7571c9f422d2e629521f969e553e Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Wed, 28 Aug 2024 11:51:03 -0600 Subject: [PATCH 10/15] Avoiding bitarray dependency --- imap_processing/codice/decompress.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/imap_processing/codice/decompress.py b/imap_processing/codice/decompress.py index e4abf83ea..bb92a75c4 100644 --- a/imap_processing/codice/decompress.py +++ b/imap_processing/codice/decompress.py @@ -29,8 +29,6 @@ import lzma from enum import IntEnum -import bitarray - from imap_processing.codice.constants import LOSSY_A_TABLE, LOSSY_B_TABLE from imap_processing.codice.utils import CoDICECompression @@ -118,7 +116,9 @@ def decompress(compressed_binary: str, algorithm: IntEnum) -> list[int]: The 24- or 32-bit decompressed values. """ # Convert the binary string to a byte stream - compressed_bytes = bitarray.bitarray(compressed_binary).tobytes() + compressed_bytes = int(compressed_binary, 2).to_bytes( + (len(compressed_binary) + 7) // 8, byteorder="big" + ) # Apply the appropriate decompression algorithm if algorithm == CoDICECompression.NO_COMPRESSION: From 9bfb8169b22bd911b7b3677375719224275dad67 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Wed, 28 Aug 2024 11:52:28 -0600 Subject: [PATCH 11/15] Added instrument config key to make some conditional areas of the processing pipeline a bit more readable; added proper numbers for positions/energies/spin_sectors --- imap_processing/codice/constants.py | 50 +++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/imap_processing/codice/constants.py b/imap_processing/codice/constants.py index 9054431f4..f063087f2 100644 --- a/imap_processing/codice/constants.py +++ b/imap_processing/codice/constants.py @@ -79,87 +79,111 @@ DATA_PRODUCT_CONFIGURATIONS = { CODICEAPID.COD_HI_INST_COUNTS_AGGREGATED: { "num_counters": 1, - "num_energy_steps": 1, - "num_spin_sectors": 6, + "num_energy_steps": 1, # TODO: Double check with Joey + "num_positions": 6, # TODO: Double check with Joey + "num_spin_sectors": 1, "variable_names": HI_INST_COUNTS_AGGREGATED_NAMES, "dataset_name": "imap_codice_l1a_hi_counters_aggregated", + "instrument": "hi", }, CODICEAPID.COD_HI_INST_COUNTS_SINGLES: { "num_counters": 3, - "num_energy_steps": 1, - "num_spin_sectors": 16, + "num_energy_steps": 1, # TODO: Double check with Joey + "num_positions": 16, # TODO: Double check with Joey + "num_spin_sectors": 1, "variable_names": HI_INST_COUNTS_SINGLES_NAMES, "dataset_name": "imap_codice_l1a_hi_counters_singles", + "instrument": "hi", }, CODICEAPID.COD_HI_OMNI_SPECIES_COUNTS: { "num_counters": 8, - "num_energy_steps": 1, - "num_spin_sectors": 60, # TODO: Double-check this + "num_energy_steps": 15, # TODO: Double check with Joey + "num_positions": 4, # TODO: Double check with Joey + "num_spin_sectors": 1, "variable_names": HI_OMNI_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_hi_omni", + "instrument": "hi", }, CODICEAPID.COD_HI_SECT_SPECIES_COUNTS: { "num_counters": 4, - "num_energy_steps": 1, - "num_spin_sectors": 1152, # TODO: Double-check this + "num_energy_steps": 8, + "num_positions": 12, + "num_spin_sectors": 12, "variable_names": HI_SECT_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_hi_sectored", + "instrument": "hi", }, CODICEAPID.COD_LO_INST_COUNTS_AGGREGATED: { "num_counters": 1, "num_energy_steps": 128, - "num_spin_sectors": 36, + "num_positions": 6, + "num_spin_sectors": 6, "variable_names": LO_INST_COUNTS_AGGREGATED_NAMES, "dataset_name": "imap_codice_l1a_lo_counters_aggregated", + "instrument": "lo", }, CODICEAPID.COD_LO_INST_COUNTS_SINGLES: { "num_counters": 1, "num_energy_steps": 128, - "num_spin_sectors": 144, + "num_positions": 24, + "num_spin_sectors": 6, "variable_names": LO_INST_COUNTS_SINGLES_NAMES, "dataset_name": "imap_codice_l1a_lo_counters_singles", + "instrument": "lo", }, CODICEAPID.COD_LO_SW_ANGULAR_COUNTS: { "num_counters": 4, "num_energy_steps": 128, - "num_spin_sectors": 60, + "num_positions": 5, + "num_spin_sectors": 12, "variable_names": LO_SW_ANGULAR_NAMES, "dataset_name": "imap_codice_l1a_lo_sw_angular", + "instrument": "lo", }, CODICEAPID.COD_LO_NSW_ANGULAR_COUNTS: { "num_counters": 1, "num_energy_steps": 128, - "num_spin_sectors": 228, + "num_positions": 19, + "num_spin_sectors": 12, "variable_names": LO_NSW_ANGULAR_NAMES, "dataset_name": "imap_codice_l1a_lo_nsw_angular", + "instrument": "lo", }, CODICEAPID.COD_LO_SW_PRIORITY_COUNTS: { "num_counters": 5, "num_energy_steps": 128, + "num_positions": 1, "num_spin_sectors": 12, "variable_names": LO_SW_PRIORITY_NAMES, "dataset_name": "imap_codice_l1a_lo_sw_priority", + "instrument": "lo", }, CODICEAPID.COD_LO_NSW_PRIORITY_COUNTS: { "num_counters": 2, "num_energy_steps": 128, + "num_positions": 1, "num_spin_sectors": 12, "variable_names": LO_NSW_PRIORITY_NAMES, "dataset_name": "imap_codice_l1a_lo_nsw_priority", + "instrument": "lo", }, CODICEAPID.COD_LO_SW_SPECIES_COUNTS: { "num_counters": 16, "num_energy_steps": 128, + "num_positions": 1, "num_spin_sectors": 1, "variable_names": LO_SW_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_lo_sw_species", + "instrument": "lo", }, CODICEAPID.COD_LO_NSW_SPECIES_COUNTS: { "num_counters": 8, "num_energy_steps": 128, + "num_positions": 1, "num_spin_sectors": 1, "variable_names": LO_NSW_SPECIES_NAMES, "dataset_name": "imap_codice_l1a_lo_nsw_species", + "instrument": "lo", }, } @@ -793,5 +817,5 @@ 252: 6815744, 253: 7340032, 254: 7864320, - 255: 999999, + 255: 9999999, } From c4259100d87ec2741778bf7a1e9bfc602abfd1a5 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Wed, 28 Aug 2024 11:53:02 -0600 Subject: [PATCH 12/15] Added attrs for inst_az coordinate --- .../config/imap_codice_l1a_variable_attrs.yaml | 15 +++++++++++++-- .../config/imap_codice_l1b_variable_attrs.yaml | 11 +++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml index aee0d7acd..9d7a535d9 100644 --- a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml @@ -36,15 +36,26 @@ energy_attrs: VALIDMAX: 127 VAR_TYPE: support_data +inst_az_attrs: + <<: *default + CATDESC: Azimuth + FIELDNAM: Azimuth + FORMAT: I2 + LABLAXIS: Azimuth + UNITS: ' ' + VALIDMIN: 0 + VALIDMAX: 31 + VAR_TYPE: support_data + spin_sector_attrs: <<: *default CATDESC: Spin sector indicating range of spin angles FIELDNAM: Spin sector - FORMAT: I4 + FORMAT: I2 LABLAXIS: spin sector UNITS: ' ' VALIDMIN: 0 - VALIDMAX: 1152 + VALIDMAX: 11 VAR_TYPE: support_data # <=== Labels ===> diff --git a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml index c9de1c451..cbb14205f 100644 --- a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml @@ -32,6 +32,17 @@ energy_attrs: VALIDMAX: 127 VAR_TYPE: support_data +inst_az_attrs: + <<: *default + CATDESC: Azimuth + FIELDNAM: Azimuth + FORMAT: I2 + LABLAXIS: Azimuth + UNITS: ' ' + VALIDMIN: 0 + VALIDMAX: 31 + VAR_TYPE: support_data + spin_sector_attrs: <<: *default CATDESC: Spin sector indicating range of spin angles From a21727c9274994ae5d4959602fa92220594384cd Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Wed, 28 Aug 2024 11:54:49 -0600 Subject: [PATCH 13/15] Added further unpacking of science data to properly restructure data arrays by positions, spin_sectors, and energies --- imap_processing/codice/codice_l1a.py | 116 +++++++++++++-------------- 1 file changed, 55 insertions(+), 61 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index 1e857c0e3..a37671c7b 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -101,8 +101,10 @@ def configure_data_products(self, apid: int) -> None: self.num_counters = config["num_counters"] self.num_energy_steps = config["num_energy_steps"] self.num_spin_sectors = config["num_spin_sectors"] + self.num_positions = config["num_positions"] self.variable_names = config["variable_names"] self.dataset_name = config["dataset_name"] + self.instrument = config["instrument"] def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset: """ @@ -135,6 +137,12 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset dims=["epoch"], attrs=cdf_attrs.get_variable_attributes("epoch"), ) + inst_az = xr.DataArray( + np.arange(self.num_positions), + name="inst_az", + dims=["inst_az"], + attrs=cdf_attrs.get_variable_attributes("inst_az_attrs"), + ) spin_sector = xr.DataArray( np.arange(self.num_spin_sectors), name="spin_sector", @@ -160,6 +168,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset dataset = xr.Dataset( coords={ "epoch": epoch, + "inst_az": inst_az, "spin_sector": spin_sector, "energy": energy_steps, "energy_label": energy_label, @@ -169,21 +178,34 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset # Create a data variable for each counter for variable_data, variable_name in zip(self.data, self.variable_names): - variable_data_arr = np.array(variable_data).reshape( - 1, self.num_spin_sectors, self.num_energy_steps - ) + # Data arrays are structured depending on the instrument + if self.instrument == "lo": + variable_data_arr = np.array(variable_data).reshape( + 1, self.num_positions, self.num_spin_sectors, self.num_energy_steps + ) + dims = ["epoch", "inst_az", "spin_sector", "energy"] + elif self.instrument == "hi": + variable_data_arr = np.array(variable_data).reshape( + 1, self.num_energy_steps, self.num_positions, self.num_spin_sectors + ) + dims = ["epoch", "energy", "inst_az", "spin_sector"] + + # Get the CDF attributes cdf_attrs_key = ( f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}" ) + attrs = cdf_attrs.get_variable_attributes(cdf_attrs_key) + + # Create the CDF data variable dataset[variable_name] = xr.DataArray( variable_data_arr, name=variable_name, - dims=["epoch", "spin_sector", "energy"], - attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key), + dims=dims, + attrs=attrs, ) # Add ESA Sweep Values and acquisition times (lo only) - if "_lo_" in self.dataset_name: + if self.instrument == "lo": self.get_esa_sweep_values() self.get_acquisition_times() dataset["esa_sweep_values"] = xr.DataArray( @@ -274,43 +296,15 @@ def get_esa_sweep_values(self) -> None: sweep_table = sweep_data[sweep_data["table_idx"] == sweep_table_id] self.esa_sweep_values = sweep_table["esa_v"].values - def unpack_hi_science_data(self, science_values: str) -> None: + def unpack_science_data(self, science_values: str) -> None: """ - Decompress, unpack, and restructure CoDICE-Hi data arrays. + Decompress, unpack, and restructure science data arrays. The science data within the packet is a compressed, binary string of - values. - - Parameters - ---------- - science_values : str - A string of binary data representing the science values of the data. - """ - self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id] - - # Decompress the binary string - science_values_decompressed = decompress( - science_values, self.compression_algorithm - ) - - # Divide up the data by the number of priorities or species - chunk_size = len(science_values_decompressed) // self.num_counters - science_values_unpacked = [ - science_values_decompressed[i : i + chunk_size] - for i in range(0, len(science_values_decompressed), chunk_size) - ] - - # TODO: Determine how to properly divide up hi data. For now, just use - # arrays for each counter - self.data = science_values_unpacked - - def unpack_lo_science_data(self, science_values: str) -> None: - """ - Decompress, unpack, and restructure CoDICE-Lo data arrays. - - The science data within the packet is a compressed, binary string of - values. These data need to be divided up by species or priorities, - and re-arranged into 2D arrays representing energy and spin angle. + values. These data need to be divided up by species or priorities (or + what I am calling "counters" as a general term), and re-arranged into + 3D arrays representing spin sectors, positions, and energies (the order + of which depends on the instrument). Parameters ---------- @@ -319,28 +313,31 @@ def unpack_lo_science_data(self, science_values: str) -> None: """ self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id] - # Decompress the binary string + # Decompress the binary string into a list of integers science_values_decompressed = decompress( science_values, self.compression_algorithm ) - # Divide up the data by the number of priorities or species - chunk_size = len(science_values_decompressed) // self.num_counters - science_values_unpacked = [ - science_values_decompressed[i : i + chunk_size] - for i in range(0, len(science_values_decompressed), chunk_size) - ] + # Re-arrange the counter data + # For CoDICE-lo, data are a 3D arrays with a shape representing + # [,,] + if self.instrument == "lo": + self.data = np.array(science_values_decompressed, dtype=np.uint).reshape( + self.num_counters, + self.num_positions, + self.num_spin_sectors, + self.num_energy_steps, + ) - # Further divide up the data by energy levels - # The result is a [12,128] array representing 12 spin angles and 128 - # energy levels - self.data = [] - for counter_data in science_values_unpacked: - data_array = [ - counter_data[i : i + self.num_energy_steps] - for i in range(0, len(counter_data), self.num_energy_steps) - ] - self.data.append(data_array) # type: ignore[arg-type] + # For CoDICE-hi, data are a 3D array with a shape representing + # [,,] + elif self.instrument == "hi": + self.data = np.array(science_values_decompressed, dtype=np.uint).reshape( + self.num_counters, + self.num_energy_steps, + self.num_positions, + self.num_spin_sectors, + ) def create_event_dataset( @@ -555,10 +552,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: # Run the pipeline to create a dataset for the product pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id) pipeline.configure_data_products(apid) - if "_lo_" in pipeline.dataset_name: - pipeline.unpack_lo_science_data(science_values) - elif "_hi_" in pipeline.dataset_name: - pipeline.unpack_hi_science_data(science_values) + pipeline.unpack_science_data(science_values) dataset = pipeline.create_science_dataset(met, data_version) logger.info(f"\nFinal data product:\n{dataset}\n") From 38704daf2690c2db679034c456504d5dae30150c Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Wed, 28 Aug 2024 11:58:21 -0600 Subject: [PATCH 14/15] fixed doc build error --- imap_processing/codice/codice_l1a.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index a37671c7b..08c55ff1e 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -75,10 +75,8 @@ class CoDICEL1aPipeline: Retrieve the acquisition times via the Lo stepping table. get_esa_sweep_values() Retrieve the ESA sweep values. - unpack_hi_science_data() - Decompress, unpack, and restructure CoDICE-Hi data arrays. - unpack_lo_science_data() - Decompress, unpack, and restructure CoDICE-Lo data arrays. + unpack_science_data() + Decompress, unpack, and restructure science data arrays. """ def __init__(self, table_id: int, plan_id: int, plan_step: int, view_id: int): From 4d58231d74c9d2f9d4a389a7fb5ae75259f66682 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Fri, 30 Aug 2024 10:23:52 -0600 Subject: [PATCH 15/15] Addressed review comments --- imap_processing/codice/codice_l1a.py | 4 ++-- imap_processing/codice/decompress.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index 08c55ff1e..bc59ace81 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -320,7 +320,7 @@ def unpack_science_data(self, science_values: str) -> None: # For CoDICE-lo, data are a 3D arrays with a shape representing # [,,] if self.instrument == "lo": - self.data = np.array(science_values_decompressed, dtype=np.uint).reshape( + self.data = np.array(science_values_decompressed, dtype=np.uint32).reshape( self.num_counters, self.num_positions, self.num_spin_sectors, @@ -330,7 +330,7 @@ def unpack_science_data(self, science_values: str) -> None: # For CoDICE-hi, data are a 3D array with a shape representing # [,,] elif self.instrument == "hi": - self.data = np.array(science_values_decompressed, dtype=np.uint).reshape( + self.data = np.array(science_values_decompressed, dtype=np.uint32).reshape( self.num_counters, self.num_energy_steps, self.num_positions, diff --git a/imap_processing/codice/decompress.py b/imap_processing/codice/decompress.py index bb92a75c4..9a48b72b2 100644 --- a/imap_processing/codice/decompress.py +++ b/imap_processing/codice/decompress.py @@ -56,7 +56,7 @@ def _apply_lossy_a(compressed_bytes: bytes) -> list[int]: def _apply_lossy_b(compressed_bytes: bytes) -> list[int]: """ - Apply 8-bit to 32-bit Lossy A decompression algorithm. + Apply 8-bit to 32-bit Lossy B decompression algorithm. The Lossy B algorithm uses a lookup table imported into this module.