From 6f81befdb4261a61f7ac826095e527137023b1c1 Mon Sep 17 00:00:00 2001 From: Matthew Bourque Date: Tue, 20 Aug 2024 10:57:28 -0600 Subject: [PATCH] Updated code to more accurately unpack science data --- imap_processing/codice/codice_l1a.py | 124 ++++++++++++------ .../tests/codice/test_codice_l1a.py | 29 ++-- 2 files changed, 98 insertions(+), 55 deletions(-) diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py index 0c925fee7..f9032930a 100644 --- a/imap_processing/codice/codice_l1a.py +++ b/imap_processing/codice/codice_l1a.py @@ -27,16 +27,21 @@ from imap_processing.cdf.utils import met_to_j2000ns from imap_processing.codice import constants from imap_processing.codice.codice_l0 import decom_packets +from imap_processing.codice.decompress import decompress from imap_processing.codice.utils import CODICEAPID, add_metadata_to_array from imap_processing.utils import group_by_apid, sort_by_time logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -# TODO: Decom data arrays need to be decompressed # TODO: In decommutation, how to have a variable length data and then a checksum # after it? (Might be fixed with new XTCE script updates) # TODO: Add support for decomming multiple APIDs from a single file +# TODO: Add these as variables in CDF: SPIN_PERIOD, ST_BIAS_GAIN_MODE, +# SW_BIAS_GAIN_MODE, RGFO_HALF_SPIN, NSO_HALF_SPIN, DATA_QUALITY +# TODO: Use new packet_file_to_dataset() function to simplify things +# TODO: Determine what should go in event data CDF and how it should be +# structured. class CoDICEL1aPipeline: @@ -92,6 +97,7 @@ def configure_data_products(self, apid: int) -> None: config = constants.DATA_PRODUCT_CONFIGURATIONS.get(apid) # type: ignore[call-overload] self.num_counters = config["num_counters"] self.num_energy_steps = config["num_energy_steps"] + self.num_spin_sectors = config["num_spin_sectors"] self.variable_names = config["variable_names"] self.dataset_name = config["dataset_name"] @@ -121,11 +127,17 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset # Define coordinates epoch = xr.DataArray( - met_to_j2000ns(met), # TODO: Fix after SIT-3 (see note below) + [met_to_j2000ns(met)], name="epoch", dims=["epoch"], attrs=cdf_attrs.get_variable_attributes("epoch"), ) + spin_sector = xr.DataArray( + np.arange(self.num_spin_sectors), + name="spin_sector", + dims=["spin_sector"], + attrs=cdf_attrs.get_variable_attributes("spin_sector_attrs"), + ) energy_steps = xr.DataArray( np.arange(self.num_energy_steps), name="energy", @@ -145,6 +157,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset dataset = xr.Dataset( coords={ "epoch": epoch, + "spin_sector": spin_sector, "energy": energy_steps, "energy_label": energy_label, }, @@ -153,12 +166,8 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset # Create a data variable for each counter for variable_data, variable_name in zip(self.data, self.variable_names): - # TODO: Currently, cdflib doesn't properly write/read CDF files that - # have a single epoch value. To get around this for now, use - # two epoch values and reshape accordingly. Revisit this after - # SIT-3. See https://github.com/MAVENSDC/cdflib/issues/268 - variable_data_arr = np.array(list(variable_data) * 2, dtype=int).reshape( - 2, self.num_energy_steps + variable_data_arr = np.array(variable_data).reshape( + 1, self.num_spin_sectors, self.num_energy_steps ) cdf_attrs_key = ( f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}" @@ -166,7 +175,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset dataset[variable_name] = xr.DataArray( variable_data_arr, name=variable_name, - dims=["epoch", "energy"], + dims=["epoch", "spin_sector", "energy"], attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key), ) @@ -262,14 +271,41 @@ def get_esa_sweep_values(self) -> None: sweep_table = sweep_data[sweep_data["table_idx"] == sweep_table_id] self.esa_sweep_values = sweep_table["esa_v"].values - def unpack_science_data(self, science_values: str) -> None: + def unpack_hi_science_data(self, science_values: str) -> None: """ - Unpack the science data from the packet. + Unpack the CoDICE-Hi science data from the packet. - For LO SW Species Counts data, the science data within the packet is a - blob of compressed values of length 2048 bits (16 species * 128 energy - levels). These data need to be divided up by species so that each - species can have their own data variable in the L1A CDF file. + The science data within the packet is a compressed, binary string of + values. + + Parameters + ---------- + science_values : str + A string of binary data representing the science values of the data. + """ + self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id] + + # Decompress the binary string + science_values = decompress(science_values, self.compression_algorithm) + + # Divide up the data by the number of priorities or species + chunk_size = len(science_values) // self.num_counters + science_values_unpacked = [ + science_values[i : i + chunk_size] + for i in range(0, len(science_values), chunk_size) + ] + + # TODO: Determine how to properly divide up hi data. For now, just use + # arrays for each counter + self.data = science_values_unpacked + + def unpack_lo_science_data(self, science_values: str) -> None: + """ + Unpack the CoDICE-Lo science data from the packet. + + The science data within the packet is a compressed, binary string of + values. These data need to be divided up by species or priorities, + and re-arranged into 2D arrays representing energy and spin angle. Parameters ---------- @@ -277,18 +313,27 @@ def unpack_science_data(self, science_values: str) -> None: A string of binary data representing the science values of the data. """ self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id] - self.collapse_table_id = constants.LO_COLLAPSE_TABLE_ID_LOOKUP[self.view_id] - # TODO: Turn this back on after SIT-3 - # For SIT-3, just create appropriate length data arrays of all ones + # Decompress the binary string + science_values = decompress(science_values, self.compression_algorithm) + # Divide up the data by the number of priorities or species - # science_values = packets[0].data["DATA"].raw_value - # num_bits = len(science_values) - # chunk_size = len(science_values) // self.num_counters - # self.data = [ - # science_values[i : i + chunk_size] for i in range(0, num_bits, chunk_size) - # ] - self.data = [["1"] * 128] * self.num_counters + chunk_size = len(science_values) // self.num_counters + science_values_unpacked = [ + science_values[i : i + chunk_size] + for i in range(0, len(science_values), chunk_size) + ] + + # Further divide up the data by energy levels + # The result is a [12,128] array representing 12 spin angles and 128 + # energy levels + self.data = [] + for counter_data in science_values_unpacked: + data_array = [ + counter_data[i : i + self.num_energy_steps] + for i in range(0, len(counter_data), self.num_energy_steps) + ] + self.data.append(data_array) def create_event_dataset( @@ -334,9 +379,6 @@ def create_event_dataset( attrs=cdf_attrs.get_global_attributes(dataset_name), ) - # TODO: Determine what should go in event data CDF and how it should be - # structured. - return dataset @@ -385,13 +427,15 @@ def create_hskp_dataset( ) # TODO: Change 'TBD' catdesc and fieldname - # Once housekeeping packet definition file is re-generated with updated - # version of space_packet_parser, can get fieldname and catdesc info via: - # for key, value in (packet.header | packet.data).items(): - # fieldname = value.short_description - # catdesc = value.long_description - # I am holding off making this change until I acquire updated housekeeping - # packets/validation data that match the latest telemetry definitions + # Once housekeeping packet definition file is re-generated with + # updated version of space_packet_parser, can get fieldname and + # catdesc info via: + # for key, value in (packet.header | packet.data).items(): + # fieldname = value.short_description + # catdesc = value.long_description + # I am holding off making this change until I acquire updated + # housekeeping packets/validation data that match the latest telemetry + # definitions for key, value in metadata_arrays.items(): attrs = cdf_attrs.get_variable_attributes("codice_support_attrs") attrs["CATDESC"] = "TBD" @@ -457,8 +501,6 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: dataset : xarray.Dataset The ``xarray`` dataset containing the science data and supporting metadata. """ - # TODO: Use new packet_file_to_dataset() function to simplify things - # Decom the packets, group data by APID, and sort by time packets = decom_packets(file_path) grouped_data = group_by_apid(packets) @@ -496,7 +538,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: # Determine the start time of the packet met = packets[0].data["ACQ_START_SECONDS"].raw_value - met = [met, met + 1] # TODO: Remove after cdflib fix + # Extract the data science_values = packets[0].data["DATA"].raw_value @@ -506,8 +548,12 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: # Run the pipeline to create a dataset for the product pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id) pipeline.configure_data_products(apid) - pipeline.unpack_science_data(science_values) + if "_lo_" in pipeline.dataset_name: + pipeline.unpack_lo_science_data(science_values) + elif "_hi_" in pipeline.dataset_name: + pipeline.unpack_hi_science_data(science_values) dataset = pipeline.create_science_dataset(met, data_version) logger.info(f"\nFinal data product:\n{dataset}\n") + return dataset diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py index 383a07b9b..1bef0363a 100644 --- a/imap_processing/tests/codice/test_codice_l1a.py +++ b/imap_processing/tests/codice/test_codice_l1a.py @@ -19,19 +19,19 @@ EXPECTED_ARRAY_SHAPES = [ (99,), # hskp - (1, 128), # hi-counters-aggregated - (1, 128), # hi-counters-singles - (1, 128), # hi-omni - (1, 128), # hi-sectored - (1, 128), # hi-pha - (1, 128), # lo-counters-aggregated - (1, 128), # lo-counters-aggregated - (1, 128), # lo-sw-angular - (1, 128), # lo-nsw-angular - (1, 128), # lo-sw-priority - (1, 128), # lo-nsw-priority - (1, 128), # lo-sw-species - (1, 128), # lo-nsw-species + (1, 6, 1), # hi-counters-aggregated + (1, 16, 1), # hi-counters-singles + (1, 60, 1), # hi-omni + (1, 1152, 1), # hi-sectored + (1, 1), # hi-pha + (1, 36, 128), # lo-counters-aggregated + (1, 144, 128), # lo-counters-aggregated + (1, 60, 128), # lo-sw-angular + (1, 228, 128), # lo-nsw-angular + (1, 12, 128), # lo-sw-priority + (1, 12, 128), # lo-nsw-priority + (1, 1, 128), # lo-sw-species + (1, 1, 128), # lo-nsw-species (1, 128), # lo-pha ] EXPECTED_ARRAY_SIZES = [ @@ -110,9 +110,6 @@ def test_l1a_cdf_filenames(test_l1a_data: xr.Dataset, expected_logical_source: s assert dataset.attrs["Logical_source"] == expected_logical_source -@pytest.mark.xfail( - reason="Currently failing due to cdflib/epoch issue. See https://github.com/MAVENSDC/cdflib/issues/268" -) @pytest.mark.parametrize( "test_l1a_data, expected_shape", list(zip(TEST_PACKETS, EXPECTED_ARRAY_SHAPES)),