Updated code to more accurately unpack science data

IMAP-Science-Operations-Center · Aug 20, 2024 · 6f81bef · 6f81bef
1 parent bb9cb30
commit 6f81bef
Show file tree

Hide file tree

Showing 2 changed files with 98 additions and 55 deletions.
diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
@@ -27,16 +27,21 @@
 from imap_processing.cdf.utils import met_to_j2000ns
 from imap_processing.codice import constants
 from imap_processing.codice.codice_l0 import decom_packets
+from imap_processing.codice.decompress import decompress
 from imap_processing.codice.utils import CODICEAPID, add_metadata_to_array
 from imap_processing.utils import group_by_apid, sort_by_time
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
-# TODO: Decom data arrays need to be decompressed
 # TODO: In decommutation, how to have a variable length data and then a checksum
 #       after it? (Might be fixed with new XTCE script updates)
 # TODO: Add support for decomming multiple APIDs from a single file
+# TODO: Add these as variables in CDF: SPIN_PERIOD, ST_BIAS_GAIN_MODE,
+#       SW_BIAS_GAIN_MODE, RGFO_HALF_SPIN, NSO_HALF_SPIN, DATA_QUALITY
+# TODO: Use new packet_file_to_dataset() function to simplify things
+# TODO: Determine what should go in event data CDF and how it should be
+#       structured.
 
 
 class CoDICEL1aPipeline:
@@ -92,6 +97,7 @@ def configure_data_products(self, apid: int) -> None:
         config = constants.DATA_PRODUCT_CONFIGURATIONS.get(apid)  # type: ignore[call-overload]
         self.num_counters = config["num_counters"]
         self.num_energy_steps = config["num_energy_steps"]
+        self.num_spin_sectors = config["num_spin_sectors"]
         self.variable_names = config["variable_names"]
         self.dataset_name = config["dataset_name"]
 
@@ -121,11 +127,17 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
 
         # Define coordinates
         epoch = xr.DataArray(
-            met_to_j2000ns(met),  # TODO: Fix after SIT-3 (see note below)
+            [met_to_j2000ns(met)],
             name="epoch",
             dims=["epoch"],
             attrs=cdf_attrs.get_variable_attributes("epoch"),
         )
+        spin_sector = xr.DataArray(
+            np.arange(self.num_spin_sectors),
+            name="spin_sector",
+            dims=["spin_sector"],
+            attrs=cdf_attrs.get_variable_attributes("spin_sector_attrs"),
+        )
         energy_steps = xr.DataArray(
             np.arange(self.num_energy_steps),
             name="energy",
@@ -145,6 +157,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
         dataset = xr.Dataset(
             coords={
                 "epoch": epoch,
+                "spin_sector": spin_sector,
                 "energy": energy_steps,
                 "energy_label": energy_label,
             },
@@ -153,20 +166,16 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
 
         # Create a data variable for each counter
         for variable_data, variable_name in zip(self.data, self.variable_names):
-            # TODO: Currently, cdflib doesn't properly write/read CDF files that
-            #       have a single epoch value. To get around this for now, use
-            #       two epoch values and reshape accordingly. Revisit this after
-            #       SIT-3. See https://github.com/MAVENSDC/cdflib/issues/268
-            variable_data_arr = np.array(list(variable_data) * 2, dtype=int).reshape(
-                2, self.num_energy_steps
+            variable_data_arr = np.array(variable_data).reshape(
+                1, self.num_spin_sectors, self.num_energy_steps
             )
             cdf_attrs_key = (
                 f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}"
             )
             dataset[variable_name] = xr.DataArray(
                 variable_data_arr,
                 name=variable_name,
-                dims=["epoch", "energy"],
+                dims=["epoch", "spin_sector", "energy"],
                 attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key),
             )
 
@@ -262,33 +271,69 @@ def get_esa_sweep_values(self) -> None:
         sweep_table = sweep_data[sweep_data["table_idx"] == sweep_table_id]
         self.esa_sweep_values = sweep_table["esa_v"].values
 
-    def unpack_science_data(self, science_values: str) -> None:
+    def unpack_hi_science_data(self, science_values: str) -> None:
         """
-        Unpack the science data from the packet.
+        Unpack the CoDICE-Hi science data from the packet.
 
-        For LO SW Species Counts data, the science data within the packet is a
-        blob of compressed values of length 2048 bits (16 species * 128 energy
-        levels). These data need to be divided up by species so that each
-        species can have their own data variable in the L1A CDF file.
+        The science data within the packet is a compressed, binary string of
+        values.
+
+        Parameters
+        ----------
+        science_values : str
+            A string of binary data representing the science values of the data.
+        """
+        self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id]
+
+        # Decompress the binary string
+        science_values = decompress(science_values, self.compression_algorithm)
+
+        # Divide up the data by the number of priorities or species
+        chunk_size = len(science_values) // self.num_counters
+        science_values_unpacked = [
+            science_values[i : i + chunk_size]
+            for i in range(0, len(science_values), chunk_size)
+        ]
+
+        # TODO: Determine how to properly divide up hi data. For now, just use
+        #       arrays for each counter
+        self.data = science_values_unpacked
+
+    def unpack_lo_science_data(self, science_values: str) -> None:
+        """
+        Unpack the CoDICE-Lo science data from the packet.
+
+        The science data within the packet is a compressed, binary string of
+        values. These data need to be divided up by species or priorities,
+        and re-arranged into 2D arrays representing energy and spin angle.
 
         Parameters
         ----------
         science_values : str
             A string of binary data representing the science values of the data.
         """
         self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id]
-        self.collapse_table_id = constants.LO_COLLAPSE_TABLE_ID_LOOKUP[self.view_id]
 
-        # TODO: Turn this back on after SIT-3
-        # For SIT-3, just create appropriate length data arrays of all ones
+        # Decompress the binary string
+        science_values = decompress(science_values, self.compression_algorithm)
+
         # Divide up the data by the number of priorities or species
-        # science_values = packets[0].data["DATA"].raw_value
-        # num_bits = len(science_values)
-        # chunk_size = len(science_values) // self.num_counters
-        # self.data = [
-        #     science_values[i : i + chunk_size] for i in range(0, num_bits, chunk_size)
-        # ]
-        self.data = [["1"] * 128] * self.num_counters
+        chunk_size = len(science_values) // self.num_counters
+        science_values_unpacked = [
+            science_values[i : i + chunk_size]
+            for i in range(0, len(science_values), chunk_size)
+        ]
+
+        # Further divide up the data by energy levels
+        # The result is a [12,128] array representing 12 spin angles and 128
+        # energy levels
+        self.data = []
+        for counter_data in science_values_unpacked:
+            data_array = [
+                counter_data[i : i + self.num_energy_steps]
+                for i in range(0, len(counter_data), self.num_energy_steps)
+            ]
+            self.data.append(data_array)
 
 
 def create_event_dataset(
@@ -334,9 +379,6 @@ def create_event_dataset(
         attrs=cdf_attrs.get_global_attributes(dataset_name),
     )
 
-    # TODO: Determine what should go in event data CDF and how it should be
-    # structured.
-
     return dataset
 
 
@@ -385,13 +427,15 @@ def create_hskp_dataset(
     )
 
     # TODO: Change 'TBD' catdesc and fieldname
-    # Once housekeeping packet definition file is re-generated with updated
-    # version of space_packet_parser, can get fieldname and catdesc info via:
-    #    for key, value in (packet.header | packet.data).items():
-    #      fieldname = value.short_description
-    #      catdesc = value.long_description
-    # I am holding off making this change until I acquire updated housekeeping
-    # packets/validation data that match the latest telemetry definitions
+    #       Once housekeeping packet definition file is re-generated with
+    #       updated version of space_packet_parser, can get fieldname and
+    #       catdesc info via:
+    #           for key, value in (packet.header | packet.data).items():
+    #               fieldname = value.short_description
+    #              catdesc = value.long_description
+    #       I am holding off making this change until I acquire updated
+    #       housekeeping packets/validation data that match the latest telemetry
+    #       definitions
     for key, value in metadata_arrays.items():
         attrs = cdf_attrs.get_variable_attributes("codice_support_attrs")
         attrs["CATDESC"] = "TBD"
@@ -457,8 +501,6 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
     dataset : xarray.Dataset
         The ``xarray`` dataset containing the science data and supporting metadata.
     """
-    # TODO: Use new packet_file_to_dataset() function to simplify things
-
     # Decom the packets, group data by APID, and sort by time
     packets = decom_packets(file_path)
     grouped_data = group_by_apid(packets)
@@ -496,7 +538,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
 
             # Determine the start time of the packet
             met = packets[0].data["ACQ_START_SECONDS"].raw_value
-            met = [met, met + 1]  # TODO: Remove after cdflib fix
+
             # Extract the data
             science_values = packets[0].data["DATA"].raw_value
 
@@ -506,8 +548,12 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
             # Run the pipeline to create a dataset for the product
             pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id)
             pipeline.configure_data_products(apid)
-            pipeline.unpack_science_data(science_values)
+            if "_lo_" in pipeline.dataset_name:
+                pipeline.unpack_lo_science_data(science_values)
+            elif "_hi_" in pipeline.dataset_name:
+                pipeline.unpack_hi_science_data(science_values)
             dataset = pipeline.create_science_dataset(met, data_version)
 
     logger.info(f"\nFinal data product:\n{dataset}\n")
+
     return dataset
diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py
@@ -19,19 +19,19 @@
 
 EXPECTED_ARRAY_SHAPES = [
     (99,),  # hskp
-    (1, 128),  # hi-counters-aggregated
-    (1, 128),  # hi-counters-singles
-    (1, 128),  # hi-omni
-    (1, 128),  # hi-sectored
-    (1, 128),  # hi-pha
-    (1, 128),  # lo-counters-aggregated
-    (1, 128),  # lo-counters-aggregated
-    (1, 128),  # lo-sw-angular
-    (1, 128),  # lo-nsw-angular
-    (1, 128),  # lo-sw-priority
-    (1, 128),  # lo-nsw-priority
-    (1, 128),  # lo-sw-species
-    (1, 128),  # lo-nsw-species
+    (1, 6, 1),  # hi-counters-aggregated
+    (1, 16, 1),  # hi-counters-singles
+    (1, 60, 1),  # hi-omni
+    (1, 1152, 1),  # hi-sectored
+    (1, 1),  # hi-pha
+    (1, 36, 128),  # lo-counters-aggregated
+    (1, 144, 128),  # lo-counters-aggregated
+    (1, 60, 128),  # lo-sw-angular
+    (1, 228, 128),  # lo-nsw-angular
+    (1, 12, 128),  # lo-sw-priority
+    (1, 12, 128),  # lo-nsw-priority
+    (1, 1, 128),  # lo-sw-species
+    (1, 1, 128),  # lo-nsw-species
     (1, 128),  # lo-pha
 ]
 EXPECTED_ARRAY_SIZES = [
@@ -110,9 +110,6 @@ def test_l1a_cdf_filenames(test_l1a_data: xr.Dataset, expected_logical_source: s
     assert dataset.attrs["Logical_source"] == expected_logical_source
 
 
-@pytest.mark.xfail(
-    reason="Currently failing due to cdflib/epoch issue. See https://github.com/MAVENSDC/cdflib/issues/268"
-)
 @pytest.mark.parametrize(
     "test_l1a_data, expected_shape",
     list(zip(TEST_PACKETS, EXPECTED_ARRAY_SHAPES)),