From 0f2247831aa0ce8cb116b440fd19bf7bbf695551 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 10:49:40 -0600
Subject: [PATCH 01/15] Updated decompression algorithm to read in a binary
 string instead of individual integer values

---
 imap_processing/codice/decompress.py          | 90 ++++++++++---------
 .../tests/codice/test_decompress.py           | 31 ++++---
 2 files changed, 63 insertions(+), 58 deletions(-)

diff --git a/imap_processing/codice/decompress.py b/imap_processing/codice/decompress.py
index bbbad4b6c..e4abf83ea 100644
--- a/imap_processing/codice/decompress.py
+++ b/imap_processing/codice/decompress.py
@@ -26,18 +26,16 @@
     This information was provided via email from Greg Dunn on Oct 23, 2023
 """
 
-# TODO: Add support for performing decompression of a list of values instead of
-# a single value
-
 import lzma
 from enum import IntEnum
-from typing import Union
+
+import bitarray
 
 from imap_processing.codice.constants import LOSSY_A_TABLE, LOSSY_B_TABLE
 from imap_processing.codice.utils import CoDICECompression
 
 
-def _apply_lossy_a(compressed_value: int) -> int:
+def _apply_lossy_a(compressed_bytes: bytes) -> list[int]:
     """
     Apply 8-bit to 32-bit Lossy A decompression algorithm.
 
@@ -45,63 +43,62 @@ def _apply_lossy_a(compressed_value: int) -> int:
 
     Parameters
     ----------
-    compressed_value : int
-        The compressed 8-bit value.
+    compressed_bytes : bytes
+        The compressed byte stream.
 
     Returns
     -------
-    int
-        The 24- or 32-bit decompressed value.
+    decompressed_values : list[int]
+        The 24- or 32-bit decompressed values.
     """
-    return LOSSY_A_TABLE[compressed_value]
+    compressed_values = list(compressed_bytes)
+    decompressed_values = [LOSSY_A_TABLE[item] for item in compressed_values]
+    return decompressed_values
 
 
-def _apply_lossy_b(compressed_value: int) -> int:
+def _apply_lossy_b(compressed_bytes: bytes) -> list[int]:
     """
-    Apply 8-bit to 32-bit Lossy B decompression algorithm.
+    Apply 8-bit to 32-bit Lossy A decompression algorithm.
 
     The Lossy B algorithm uses a lookup table imported into this module.
 
     Parameters
     ----------
-    compressed_value : int
-        The compressed 8-bit value.
+    compressed_bytes : bytes
+        The compressed byte stream.
 
     Returns
     -------
-    int
-        The 24- or 32-bit decompressed value.
+    decompressed_values : list[int]
+        The 24- or 32-bit decompressed values.
     """
-    return LOSSY_B_TABLE[compressed_value]
+    compressed_values = list(compressed_bytes)
+    decompressed_values = [LOSSY_B_TABLE[item] for item in compressed_values]
+    return decompressed_values
 
 
-def _apply_lzma_lossless(compressed_value: Union[int, bytes]) -> int:
+def _apply_lzma_lossless(compressed_bytes: bytes) -> bytes:
     """
     Apply LZMA lossless decompression algorithm.
 
     Parameters
     ----------
-    compressed_value : int or bytes
-        The compressed 8-bit value.
+    compressed_bytes : bytes
+        The compressed byte stream.
 
     Returns
     -------
-    decompressed_value : int
-        The 24- or 32-bit decompressed value.
+    lzma_decompressed_values : bytes
+        The 24- or 32-bit lzma decompressed values.
     """
-    if isinstance(compressed_value, int):
-        bytes_compressed_value = compressed_value.to_bytes(compressed_value, "big")
-    else:
-        bytes_compressed_value = compressed_value
-    decompressed_value = lzma.decompress(bytes_compressed_value)
-    decompressed_value_int = int.from_bytes(decompressed_value, byteorder="big")
+    lzma_decompressed_values = lzma.decompress(compressed_bytes)
 
-    return decompressed_value_int
+    return lzma_decompressed_values
 
 
-def decompress(compressed_value: int, algorithm: IntEnum) -> int:
+def decompress(compressed_binary: str, algorithm: IntEnum) -> list[int]:
     """
-    Will decompress the value.
+    Perform decompression on a binary string into a list of integers.
 
     Apply the appropriate decompression algorithm(s) based on the value
     of the ``algorithm`` attribute. One or more individual algorithms may be
@@ -109,32 +106,37 @@ def decompress(compressed_value: int, algorithm: IntEnum) -> int:
 
     Parameters
     ----------
-    compressed_value : int
-        The 8-bit compressed value to decompress.
+    compressed_binary : str
+        The compressed binary string.
     algorithm : int
         The algorithm to apply. Supported algorithms are provided in the
         ``codice_utils.CoDICECompression`` class.
 
     Returns
     -------
-    decompressed_value : int
-        The 24- or 32-bit decompressed value.
+    decompressed_values : list[int]
+        The 24- or 32-bit decompressed values.
     """
+    # Convert the binary string to a byte stream
+    compressed_bytes = bitarray.bitarray(compressed_binary).tobytes()
+
+    # Apply the appropriate decompression algorithm
     if algorithm == CoDICECompression.NO_COMPRESSION:
-        decompressed_value = compressed_value
+        decompressed_values = list(compressed_bytes)
     elif algorithm == CoDICECompression.LOSSY_A:
-        decompressed_value = _apply_lossy_a(compressed_value)
+        decompressed_values = _apply_lossy_a(compressed_bytes)
     elif algorithm == CoDICECompression.LOSSY_B:
-        decompressed_value = _apply_lossy_b(compressed_value)
+        decompressed_values = _apply_lossy_b(compressed_bytes)
     elif algorithm == CoDICECompression.LOSSLESS:
-        decompressed_value = _apply_lzma_lossless(compressed_value)
+        decompressed_bytes = _apply_lzma_lossless(compressed_bytes)
+        decompressed_values = list(decompressed_bytes)
     elif algorithm == CoDICECompression.LOSSY_A_LOSSLESS:
-        decompressed_value = _apply_lzma_lossless(compressed_value)
-        decompressed_value = _apply_lossy_a(decompressed_value)
+        decompressed_bytes = _apply_lzma_lossless(compressed_bytes)
+        decompressed_values = _apply_lossy_a(decompressed_bytes)
     elif algorithm == CoDICECompression.LOSSY_B_LOSSLESS:
-        decompressed_value = _apply_lzma_lossless(compressed_value)
-        decompressed_value = _apply_lossy_b(decompressed_value)
+        decompressed_bytes = _apply_lzma_lossless(compressed_bytes)
+        decompressed_values = _apply_lossy_b(decompressed_bytes)
     else:
         raise ValueError(f"{algorithm} is not supported")
 
-    return decompressed_value
+    return decompressed_values
diff --git a/imap_processing/tests/codice/test_decompress.py b/imap_processing/tests/codice/test_decompress.py
index e74f60d73..853a94ccc 100644
--- a/imap_processing/tests/codice/test_decompress.py
+++ b/imap_processing/tests/codice/test_decompress.py
@@ -9,34 +9,37 @@
 from imap_processing.codice.utils import CoDICECompression
 
 # Test the algorithms using input value of 234 (picked randomly)
-LZMA_EXAMPLE = lzma.compress((234).to_bytes(1, byteorder="big"))
+lzma_bytes = lzma.compress((234).to_bytes(1, byteorder="big"))
+LZMA_EXAMPLE = "".join(format(byte, "08b") for byte in lzma_bytes)
 TEST_DATA = [
-    (234, CoDICECompression.NO_COMPRESSION, 234),
-    (234, CoDICECompression.LOSSY_A, 221184),
-    (234, CoDICECompression.LOSSY_B, 1441792),
-    (LZMA_EXAMPLE, CoDICECompression.LOSSLESS, 234),
-    (LZMA_EXAMPLE, CoDICECompression.LOSSY_A_LOSSLESS, 221184),
-    (LZMA_EXAMPLE, CoDICECompression.LOSSY_B_LOSSLESS, 1441792),
+    ("11101010", CoDICECompression.NO_COMPRESSION, [234]),
+    ("11101010", CoDICECompression.LOSSY_A, [221184]),
+    ("11101010", CoDICECompression.LOSSY_B, [1441792]),
+    (LZMA_EXAMPLE, CoDICECompression.LOSSLESS, [234]),
+    (LZMA_EXAMPLE, CoDICECompression.LOSSY_A_LOSSLESS, [221184]),
+    (LZMA_EXAMPLE, CoDICECompression.LOSSY_B_LOSSLESS, [1441792]),
 ]
 
 
 @pytest.mark.parametrize(
-    ("compressed_value", "algorithm", "expected_result"), TEST_DATA
+    ("compressed_binary", "algorithm", "expected_result"), TEST_DATA
 )
-def test_decompress(compressed_value: int, algorithm: IntEnum, expected_result: int):
+def test_decompress(
+    compressed_binary: str, algorithm: IntEnum, expected_result: list[int]
+):
     """Tests the ``decompress`` function
 
     Parameters
     ----------
-    compressed_value : int
-        The compressed value to test decompression on
+    compressed_binary : str
+        The compressed binary string to test decompression on
     algorithm : IntEnum
         The algorithm to use in decompression
-    expected_result : int
+    expected_result : list[int]
         The expected, decompressed value
     """
 
-    decompressed_value = decompress(compressed_value, algorithm)
+    decompressed_value = decompress(compressed_binary, algorithm)
     assert decompressed_value == expected_result
 
 
@@ -44,4 +47,4 @@ def test_decompress_raises():
     """Tests that the ``decompress`` function raises with an unknown algorithm"""
 
     with pytest.raises(ValueError, match="some_unsupported_algorithm"):
-        decompress(234, "some_unsupported_algorithm")
+        decompress("11101010", "some_unsupported_algorithm")

From caf55ec3427fa3c13abea74292ebb7bbd1154a3f Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 10:51:20 -0600
Subject: [PATCH 02/15] Removed collapse table lookup, as the info needed for
 these is instead "hard coded" into the configuration dictionary; Added spin
 sector config variable

---
 imap_processing/codice/constants.py | 44 ++++++++++++++---------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/imap_processing/codice/constants.py b/imap_processing/codice/constants.py
index a8bcf3503..9054431f4 100644
--- a/imap_processing/codice/constants.py
+++ b/imap_processing/codice/constants.py
@@ -12,6 +12,9 @@
 ESA = ElectroStatic Analyzer
 """
 
+# TODO: What to do in the case of a value of 255 in LOSSY_A and LOSSY_B
+#       compression?
+
 from imap_processing.codice.utils import CODICEAPID, CoDICECompression
 
 APIDS_FOR_SCIENCE_PROCESSING = [
@@ -76,73 +79,85 @@
 DATA_PRODUCT_CONFIGURATIONS = {
     CODICEAPID.COD_HI_INST_COUNTS_AGGREGATED: {
         "num_counters": 1,
-        "num_energy_steps": 128,
+        "num_energy_steps": 1,
+        "num_spin_sectors": 6,
         "variable_names": HI_INST_COUNTS_AGGREGATED_NAMES,
         "dataset_name": "imap_codice_l1a_hi_counters_aggregated",
     },
     CODICEAPID.COD_HI_INST_COUNTS_SINGLES: {
         "num_counters": 3,
-        "num_energy_steps": 128,
+        "num_energy_steps": 1,
+        "num_spin_sectors": 16,
         "variable_names": HI_INST_COUNTS_SINGLES_NAMES,
         "dataset_name": "imap_codice_l1a_hi_counters_singles",
     },
     CODICEAPID.COD_HI_OMNI_SPECIES_COUNTS: {
         "num_counters": 8,
-        "num_energy_steps": 128,
+        "num_energy_steps": 1,
+        "num_spin_sectors": 60,  # TODO: Double-check this
         "variable_names": HI_OMNI_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_hi_omni",
     },
     CODICEAPID.COD_HI_SECT_SPECIES_COUNTS: {
         "num_counters": 4,
-        "num_energy_steps": 128,
+        "num_energy_steps": 1,
+        "num_spin_sectors": 1152,  # TODO: Double-check this
         "variable_names": HI_SECT_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_hi_sectored",
     },
     CODICEAPID.COD_LO_INST_COUNTS_AGGREGATED: {
         "num_counters": 1,
         "num_energy_steps": 128,
+        "num_spin_sectors": 36,
         "variable_names": LO_INST_COUNTS_AGGREGATED_NAMES,
         "dataset_name": "imap_codice_l1a_lo_counters_aggregated",
     },
     CODICEAPID.COD_LO_INST_COUNTS_SINGLES: {
         "num_counters": 1,
         "num_energy_steps": 128,
+        "num_spin_sectors": 144,
         "variable_names": LO_INST_COUNTS_SINGLES_NAMES,
         "dataset_name": "imap_codice_l1a_lo_counters_singles",
     },
     CODICEAPID.COD_LO_SW_ANGULAR_COUNTS: {
         "num_counters": 4,
         "num_energy_steps": 128,
+        "num_spin_sectors": 60,
         "variable_names": LO_SW_ANGULAR_NAMES,
         "dataset_name": "imap_codice_l1a_lo_sw_angular",
     },
     CODICEAPID.COD_LO_NSW_ANGULAR_COUNTS: {
         "num_counters": 1,
         "num_energy_steps": 128,
+        "num_spin_sectors": 228,
         "variable_names": LO_NSW_ANGULAR_NAMES,
         "dataset_name": "imap_codice_l1a_lo_nsw_angular",
     },
     CODICEAPID.COD_LO_SW_PRIORITY_COUNTS: {
         "num_counters": 5,
         "num_energy_steps": 128,
+        "num_spin_sectors": 12,
         "variable_names": LO_SW_PRIORITY_NAMES,
         "dataset_name": "imap_codice_l1a_lo_sw_priority",
     },
     CODICEAPID.COD_LO_NSW_PRIORITY_COUNTS: {
         "num_counters": 2,
         "num_energy_steps": 128,
+        "num_spin_sectors": 12,
         "variable_names": LO_NSW_PRIORITY_NAMES,
         "dataset_name": "imap_codice_l1a_lo_nsw_priority",
     },
     CODICEAPID.COD_LO_SW_SPECIES_COUNTS: {
         "num_counters": 16,
         "num_energy_steps": 128,
+        "num_spin_sectors": 1,
         "variable_names": LO_SW_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_lo_sw_species",
     },
     CODICEAPID.COD_LO_NSW_SPECIES_COUNTS: {
         "num_counters": 8,
         "num_energy_steps": 128,
+        "num_spin_sectors": 1,
         "variable_names": LO_NSW_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_lo_nsw_species",
     },
@@ -179,25 +194,6 @@
     9: CoDICECompression.LOSSY_A_LOSSLESS,
 }
 
-# Collapse table ID lookup table for Lo data products
-# The key is the view_id and the value is the ID for the collapse table
-LO_COLLAPSE_TABLE_ID_LOOKUP = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8}
-
-# Collapse table ID lookup table for Hi data products
-# The key is the view_id and the value is the ID for the collapse table
-Hi_COLLAPSE_TABLE_ID_LOOKUP = {
-    0: 8,
-    1: 9,
-    2: 10,
-    3: 0,
-    4: 1,
-    5: 2,
-    6: 4,
-    7: 5,
-    8: 6,
-    9: 7,
-}
-
 # ESA Sweep table ID lookup table
 # The combination of plan_id and plan_step determine the ESA sweep Table to use
 # Currently, ESA sweep table 0 is used for every plan_id/plan_step combination,
@@ -538,6 +534,7 @@
     252: 475136,
     253: 491520,
     254: 507904,
+    255: 999999,
 }
 
 LOSSY_B_TABLE = {
@@ -796,4 +793,5 @@
     252: 6815744,
     253: 7340032,
     254: 7864320,
+    255: 999999,
 }

From bb9cb3060d085ff46ed5d5e32c762747b4ddf86b Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 10:53:24 -0600
Subject: [PATCH 03/15] Added spin sector attribute definition

---
 .../cdf/config/imap_codice_l1a_variable_attrs.yaml    | 11 +++++++++++
 .../cdf/config/imap_codice_l1b_variable_attrs.yaml    | 11 +++++++++++
 2 files changed, 22 insertions(+)

diff --git a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
index 54701c71c..aee0d7acd 100644
--- a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
+++ b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
@@ -36,6 +36,17 @@ energy_attrs:
     VALIDMAX: 127
     VAR_TYPE: support_data
 
+spin_sector_attrs:
+    <<: *default
+    CATDESC: Spin sector indicating range of spin angles
+    FIELDNAM: Spin sector
+    FORMAT: I4
+    LABLAXIS: spin sector
+    UNITS: ' '
+    VALIDMIN: 0
+    VALIDMAX: 1152
+    VAR_TYPE: support_data
+
 # <=== Labels ===>
 energy_label:
     CATDESC: Energy per charge (E/q) sweeping step
diff --git a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
index 1d5d44eb5..c9de1c451 100644
--- a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
+++ b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
@@ -32,6 +32,17 @@ energy_attrs:
     VALIDMAX: 127
     VAR_TYPE: support_data
 
+spin_sector_attrs:
+    <<: *default
+    CATDESC: Spin sector indicating range of spin angles
+    FIELDNAM: Spin sector
+    FORMAT: I4
+    LABLAXIS: spin sector
+    UNITS: ' '
+    VALIDMIN: 0
+    VALIDMAX: 1152
+    VAR_TYPE: support_data
+
 # <=== Labels ===>
 energy_label:
     CATDESC: Energy per charge (E/q) sweeping step

From 6f81befdb4261a61f7ac826095e527137023b1c1 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 10:57:28 -0600
Subject: [PATCH 04/15] Updated code to more accurately unpack science data

---
 imap_processing/codice/codice_l1a.py          | 124 ++++++++++++------
 .../tests/codice/test_codice_l1a.py           |  29 ++--
 2 files changed, 98 insertions(+), 55 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index 0c925fee7..f9032930a 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -27,16 +27,21 @@
 from imap_processing.cdf.utils import met_to_j2000ns
 from imap_processing.codice import constants
 from imap_processing.codice.codice_l0 import decom_packets
+from imap_processing.codice.decompress import decompress
 from imap_processing.codice.utils import CODICEAPID, add_metadata_to_array
 from imap_processing.utils import group_by_apid, sort_by_time
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
-# TODO: Decom data arrays need to be decompressed
 # TODO: In decommutation, how to have a variable length data and then a checksum
 #       after it? (Might be fixed with new XTCE script updates)
 # TODO: Add support for decomming multiple APIDs from a single file
+# TODO: Add these as variables in CDF: SPIN_PERIOD, ST_BIAS_GAIN_MODE,
+#       SW_BIAS_GAIN_MODE, RGFO_HALF_SPIN, NSO_HALF_SPIN, DATA_QUALITY
+# TODO: Use new packet_file_to_dataset() function to simplify things
+# TODO: Determine what should go in event data CDF and how it should be
+#       structured.
 
 
 class CoDICEL1aPipeline:
@@ -92,6 +97,7 @@ def configure_data_products(self, apid: int) -> None:
         config = constants.DATA_PRODUCT_CONFIGURATIONS.get(apid)  # type: ignore[call-overload]
         self.num_counters = config["num_counters"]
         self.num_energy_steps = config["num_energy_steps"]
+        self.num_spin_sectors = config["num_spin_sectors"]
         self.variable_names = config["variable_names"]
         self.dataset_name = config["dataset_name"]
 
@@ -121,11 +127,17 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
 
         # Define coordinates
         epoch = xr.DataArray(
-            met_to_j2000ns(met),  # TODO: Fix after SIT-3 (see note below)
+            [met_to_j2000ns(met)],
             name="epoch",
             dims=["epoch"],
             attrs=cdf_attrs.get_variable_attributes("epoch"),
         )
+        spin_sector = xr.DataArray(
+            np.arange(self.num_spin_sectors),
+            name="spin_sector",
+            dims=["spin_sector"],
+            attrs=cdf_attrs.get_variable_attributes("spin_sector_attrs"),
+        )
         energy_steps = xr.DataArray(
             np.arange(self.num_energy_steps),
             name="energy",
@@ -145,6 +157,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
         dataset = xr.Dataset(
             coords={
                 "epoch": epoch,
+                "spin_sector": spin_sector,
                 "energy": energy_steps,
                 "energy_label": energy_label,
             },
@@ -153,12 +166,8 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
 
         # Create a data variable for each counter
         for variable_data, variable_name in zip(self.data, self.variable_names):
-            # TODO: Currently, cdflib doesn't properly write/read CDF files that
-            #       have a single epoch value. To get around this for now, use
-            #       two epoch values and reshape accordingly. Revisit this after
-            #       SIT-3. See https://github.com/MAVENSDC/cdflib/issues/268
-            variable_data_arr = np.array(list(variable_data) * 2, dtype=int).reshape(
-                2, self.num_energy_steps
+            variable_data_arr = np.array(variable_data).reshape(
+                1, self.num_spin_sectors, self.num_energy_steps
             )
             cdf_attrs_key = (
                 f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}"
@@ -166,7 +175,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
             dataset[variable_name] = xr.DataArray(
                 variable_data_arr,
                 name=variable_name,
-                dims=["epoch", "energy"],
+                dims=["epoch", "spin_sector", "energy"],
                 attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key),
             )
 
@@ -262,14 +271,41 @@ def get_esa_sweep_values(self) -> None:
         sweep_table = sweep_data[sweep_data["table_idx"] == sweep_table_id]
         self.esa_sweep_values = sweep_table["esa_v"].values
 
-    def unpack_science_data(self, science_values: str) -> None:
+    def unpack_hi_science_data(self, science_values: str) -> None:
         """
-        Unpack the science data from the packet.
+        Unpack the CoDICE-Hi science data from the packet.
 
-        For LO SW Species Counts data, the science data within the packet is a
-        blob of compressed values of length 2048 bits (16 species * 128 energy
-        levels). These data need to be divided up by species so that each
-        species can have their own data variable in the L1A CDF file.
+        The science data within the packet is a compressed, binary string of
+        values.
+
+        Parameters
+        ----------
+        science_values : str
+            A string of binary data representing the science values of the data.
+        """
+        self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id]
+
+        # Decompress the binary string
+        science_values = decompress(science_values, self.compression_algorithm)
+
+        # Divide up the data by the number of priorities or species
+        chunk_size = len(science_values) // self.num_counters
+        science_values_unpacked = [
+            science_values[i : i + chunk_size]
+            for i in range(0, len(science_values), chunk_size)
+        ]
+
+        # TODO: Determine how to properly divide up hi data. For now, just use
+        #       arrays for each counter
+        self.data = science_values_unpacked
+
+    def unpack_lo_science_data(self, science_values: str) -> None:
+        """
+        Unpack the CoDICE-Lo science data from the packet.
+
+        The science data within the packet is a compressed, binary string of
+        values. These data need to be divided up by species or priorities,
+        and re-arranged into 2D arrays representing energy and spin angle.
 
         Parameters
         ----------
@@ -277,18 +313,27 @@ def unpack_science_data(self, science_values: str) -> None:
             A string of binary data representing the science values of the data.
         """
         self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id]
-        self.collapse_table_id = constants.LO_COLLAPSE_TABLE_ID_LOOKUP[self.view_id]
 
-        # TODO: Turn this back on after SIT-3
-        # For SIT-3, just create appropriate length data arrays of all ones
+        # Decompress the binary string
+        science_values = decompress(science_values, self.compression_algorithm)
+
         # Divide up the data by the number of priorities or species
-        # science_values = packets[0].data["DATA"].raw_value
-        # num_bits = len(science_values)
-        # chunk_size = len(science_values) // self.num_counters
-        # self.data = [
-        #     science_values[i : i + chunk_size] for i in range(0, num_bits, chunk_size)
-        # ]
-        self.data = [["1"] * 128] * self.num_counters
+        chunk_size = len(science_values) // self.num_counters
+        science_values_unpacked = [
+            science_values[i : i + chunk_size]
+            for i in range(0, len(science_values), chunk_size)
+        ]
+
+        # Further divide up the data by energy levels
+        # The result is a [12,128] array representing 12 spin angles and 128
+        # energy levels
+        self.data = []
+        for counter_data in science_values_unpacked:
+            data_array = [
+                counter_data[i : i + self.num_energy_steps]
+                for i in range(0, len(counter_data), self.num_energy_steps)
+            ]
+            self.data.append(data_array)
 
 
 def create_event_dataset(
@@ -334,9 +379,6 @@ def create_event_dataset(
         attrs=cdf_attrs.get_global_attributes(dataset_name),
     )
 
-    # TODO: Determine what should go in event data CDF and how it should be
-    # structured.
-
     return dataset
 
 
@@ -385,13 +427,15 @@ def create_hskp_dataset(
     )
 
     # TODO: Change 'TBD' catdesc and fieldname
-    # Once housekeeping packet definition file is re-generated with updated
-    # version of space_packet_parser, can get fieldname and catdesc info via:
-    #    for key, value in (packet.header | packet.data).items():
-    #      fieldname = value.short_description
-    #      catdesc = value.long_description
-    # I am holding off making this change until I acquire updated housekeeping
-    # packets/validation data that match the latest telemetry definitions
+    #       Once housekeeping packet definition file is re-generated with
+    #       updated version of space_packet_parser, can get fieldname and
+    #       catdesc info via:
+    #           for key, value in (packet.header | packet.data).items():
+    #               fieldname = value.short_description
+    #              catdesc = value.long_description
+    #       I am holding off making this change until I acquire updated
+    #       housekeeping packets/validation data that match the latest telemetry
+    #       definitions
     for key, value in metadata_arrays.items():
         attrs = cdf_attrs.get_variable_attributes("codice_support_attrs")
         attrs["CATDESC"] = "TBD"
@@ -457,8 +501,6 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
     dataset : xarray.Dataset
         The ``xarray`` dataset containing the science data and supporting metadata.
     """
-    # TODO: Use new packet_file_to_dataset() function to simplify things
-
     # Decom the packets, group data by APID, and sort by time
     packets = decom_packets(file_path)
     grouped_data = group_by_apid(packets)
@@ -496,7 +538,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
 
             # Determine the start time of the packet
             met = packets[0].data["ACQ_START_SECONDS"].raw_value
-            met = [met, met + 1]  # TODO: Remove after cdflib fix
+
             # Extract the data
             science_values = packets[0].data["DATA"].raw_value
 
@@ -506,8 +548,12 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
             # Run the pipeline to create a dataset for the product
             pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id)
             pipeline.configure_data_products(apid)
-            pipeline.unpack_science_data(science_values)
+            if "_lo_" in pipeline.dataset_name:
+                pipeline.unpack_lo_science_data(science_values)
+            elif "_hi_" in pipeline.dataset_name:
+                pipeline.unpack_hi_science_data(science_values)
             dataset = pipeline.create_science_dataset(met, data_version)
 
     logger.info(f"\nFinal data product:\n{dataset}\n")
+
     return dataset
diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py
index 383a07b9b..1bef0363a 100644
--- a/imap_processing/tests/codice/test_codice_l1a.py
+++ b/imap_processing/tests/codice/test_codice_l1a.py
@@ -19,19 +19,19 @@
 
 EXPECTED_ARRAY_SHAPES = [
     (99,),  # hskp
-    (1, 128),  # hi-counters-aggregated
-    (1, 128),  # hi-counters-singles
-    (1, 128),  # hi-omni
-    (1, 128),  # hi-sectored
-    (1, 128),  # hi-pha
-    (1, 128),  # lo-counters-aggregated
-    (1, 128),  # lo-counters-aggregated
-    (1, 128),  # lo-sw-angular
-    (1, 128),  # lo-nsw-angular
-    (1, 128),  # lo-sw-priority
-    (1, 128),  # lo-nsw-priority
-    (1, 128),  # lo-sw-species
-    (1, 128),  # lo-nsw-species
+    (1, 6, 1),  # hi-counters-aggregated
+    (1, 16, 1),  # hi-counters-singles
+    (1, 60, 1),  # hi-omni
+    (1, 1152, 1),  # hi-sectored
+    (1, 1),  # hi-pha
+    (1, 36, 128),  # lo-counters-aggregated
+    (1, 144, 128),  # lo-counters-aggregated
+    (1, 60, 128),  # lo-sw-angular
+    (1, 228, 128),  # lo-nsw-angular
+    (1, 12, 128),  # lo-sw-priority
+    (1, 12, 128),  # lo-nsw-priority
+    (1, 1, 128),  # lo-sw-species
+    (1, 1, 128),  # lo-nsw-species
     (1, 128),  # lo-pha
 ]
 EXPECTED_ARRAY_SIZES = [
@@ -110,9 +110,6 @@ def test_l1a_cdf_filenames(test_l1a_data: xr.Dataset, expected_logical_source: s
     assert dataset.attrs["Logical_source"] == expected_logical_source
 
 
-@pytest.mark.xfail(
-    reason="Currently failing due to cdflib/epoch issue. See https://github.com/MAVENSDC/cdflib/issues/268"
-)
 @pytest.mark.parametrize(
     "test_l1a_data, expected_shape",
     list(zip(TEST_PACKETS, EXPECTED_ARRAY_SHAPES)),

From 7ae84a46a55ceffcb72891e242448d4b8baf78ef Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 15:01:13 -0600
Subject: [PATCH 05/15] Fixed mypy errors

---
 imap_processing/codice/codice_l1a.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index f9032930a..c2efd1ee7 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -42,6 +42,7 @@
 # TODO: Use new packet_file_to_dataset() function to simplify things
 # TODO: Determine what should go in event data CDF and how it should be
 #       structured.
+# TODO: Make sure CDF attributes match expected nomenclature
 
 
 class CoDICEL1aPipeline:
@@ -286,13 +287,15 @@ def unpack_hi_science_data(self, science_values: str) -> None:
         self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id]
 
         # Decompress the binary string
-        science_values = decompress(science_values, self.compression_algorithm)
+        science_values_decompressed = decompress(
+            science_values, self.compression_algorithm
+        )
 
         # Divide up the data by the number of priorities or species
-        chunk_size = len(science_values) // self.num_counters
+        chunk_size = len(science_values_decompressed) // self.num_counters
         science_values_unpacked = [
-            science_values[i : i + chunk_size]
-            for i in range(0, len(science_values), chunk_size)
+            science_values_decompressed[i : i + chunk_size]
+            for i in range(0, len(science_values_decompressed), chunk_size)
         ]
 
         # TODO: Determine how to properly divide up hi data. For now, just use
@@ -315,13 +318,15 @@ def unpack_lo_science_data(self, science_values: str) -> None:
         self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id]
 
         # Decompress the binary string
-        science_values = decompress(science_values, self.compression_algorithm)
+        science_values_decompressed = decompress(
+            science_values, self.compression_algorithm
+        )
 
         # Divide up the data by the number of priorities or species
-        chunk_size = len(science_values) // self.num_counters
+        chunk_size = len(science_values_decompressed) // self.num_counters
         science_values_unpacked = [
-            science_values[i : i + chunk_size]
-            for i in range(0, len(science_values), chunk_size)
+            science_values_decompressed[i : i + chunk_size]
+            for i in range(0, len(science_values_decompressed), chunk_size)
         ]
 
         # Further divide up the data by energy levels

From dc2602c157ce6a70c5e4086871fc039593285987 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 15:06:01 -0600
Subject: [PATCH 06/15] Fixed mypy errors

---
 imap_processing/codice/codice_l1a.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index c2efd1ee7..7f036c06c 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -334,7 +334,7 @@ def unpack_lo_science_data(self, science_values: str) -> None:
         # energy levels
         self.data = []
         for counter_data in science_values_unpacked:
-            data_array = [
+            data_array: list[list[int]] = [
                 counter_data[i : i + self.num_energy_steps]
                 for i in range(0, len(counter_data), self.num_energy_steps)
             ]

From 953c10c274833baa031fa6a2cf525b49d695290c Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 15:13:55 -0600
Subject: [PATCH 07/15] Fixed mypy errors

---
 imap_processing/codice/codice_l1a.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index 7f036c06c..f1a0888d5 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -334,11 +334,11 @@ def unpack_lo_science_data(self, science_values: str) -> None:
         # energy levels
         self.data = []
         for counter_data in science_values_unpacked:
-            data_array: list[list[int]] = [
+            data_array = [
                 counter_data[i : i + self.num_energy_steps]
                 for i in range(0, len(counter_data), self.num_energy_steps)
             ]
-            self.data.append(data_array)
+            self.data.append(data_array)  # type: ignore[arg-type]
 
 
 def create_event_dataset(

From 69814dafecae3b5d76bc1c2354d72bbd174a5007 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 15:19:58 -0600
Subject: [PATCH 08/15] Fixed doc build errors

---
 imap_processing/codice/codice_l1a.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index f1a0888d5..1e857c0e3 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -75,8 +75,10 @@ class CoDICEL1aPipeline:
         Retrieve the acquisition times via the Lo stepping table.
     get_esa_sweep_values()
         Retrieve the ESA sweep values.
-    unpack_science_data()
-        Make 4D L1a data product from the decompressed science data.
+    unpack_hi_science_data()
+        Decompress, unpack, and restructure CoDICE-Hi data arrays.
+    unpack_lo_science_data()
+        Decompress, unpack, and restructure CoDICE-Lo data arrays.
     """
 
     def __init__(self, table_id: int, plan_id: int, plan_step: int, view_id: int):
@@ -274,7 +276,7 @@ def get_esa_sweep_values(self) -> None:
 
     def unpack_hi_science_data(self, science_values: str) -> None:
         """
-        Unpack the CoDICE-Hi science data from the packet.
+        Decompress, unpack, and restructure CoDICE-Hi data arrays.
 
         The science data within the packet is a compressed, binary string of
         values.
@@ -304,7 +306,7 @@ def unpack_hi_science_data(self, science_values: str) -> None:
 
     def unpack_lo_science_data(self, science_values: str) -> None:
         """
-        Unpack the CoDICE-Lo science data from the packet.
+        Decompress, unpack, and restructure CoDICE-Lo data arrays.
 
         The science data within the packet is a compressed, binary string of
         values. These data need to be divided up by species or priorities,

From 4e9252f2edd01cd892586eab658a8d7656f6fe67 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Wed, 28 Aug 2024 11:50:14 -0600
Subject: [PATCH 09/15] Updated expected array shapes

---
 .../tests/codice/test_codice_l1a.py           | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py
index 1bef0363a..d9fd7207c 100644
--- a/imap_processing/tests/codice/test_codice_l1a.py
+++ b/imap_processing/tests/codice/test_codice_l1a.py
@@ -19,19 +19,19 @@
 
 EXPECTED_ARRAY_SHAPES = [
     (99,),  # hskp
-    (1, 6, 1),  # hi-counters-aggregated
-    (1, 16, 1),  # hi-counters-singles
-    (1, 60, 1),  # hi-omni
-    (1, 1152, 1),  # hi-sectored
+    (1, 1, 6, 1),  # hi-counters-aggregated  # TODO: Double check with Joey
+    (1, 1, 16, 1),  # hi-counters-singles  # TODO: Double check with Joey
+    (1, 15, 4, 1),  # hi-omni  # TODO: Double check with Joey
+    (1, 8, 12, 12),  # hi-sectored
     (1, 1),  # hi-pha
-    (1, 36, 128),  # lo-counters-aggregated
-    (1, 144, 128),  # lo-counters-aggregated
-    (1, 60, 128),  # lo-sw-angular
-    (1, 228, 128),  # lo-nsw-angular
-    (1, 12, 128),  # lo-sw-priority
-    (1, 12, 128),  # lo-nsw-priority
-    (1, 1, 128),  # lo-sw-species
-    (1, 1, 128),  # lo-nsw-species
+    (1, 6, 6, 128),  # lo-counters-aggregated
+    (1, 24, 6, 128),  # lo-counters-singles
+    (1, 5, 12, 128),  # lo-sw-angular
+    (1, 19, 12, 128),  # lo-nsw-angular
+    (1, 1, 12, 128),  # lo-sw-priority
+    (1, 1, 12, 128),  # lo-nsw-priority
+    (1, 1, 1, 128),  # lo-sw-species
+    (1, 1, 1, 128),  # lo-nsw-species
     (1, 128),  # lo-pha
 ]
 EXPECTED_ARRAY_SIZES = [

From c7a8d09bc97b7571c9f422d2e629521f969e553e Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Wed, 28 Aug 2024 11:51:03 -0600
Subject: [PATCH 10/15] Avoiding bitarray dependency

---
 imap_processing/codice/decompress.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/imap_processing/codice/decompress.py b/imap_processing/codice/decompress.py
index e4abf83ea..bb92a75c4 100644
--- a/imap_processing/codice/decompress.py
+++ b/imap_processing/codice/decompress.py
@@ -29,8 +29,6 @@
 import lzma
 from enum import IntEnum
 
-import bitarray
-
 from imap_processing.codice.constants import LOSSY_A_TABLE, LOSSY_B_TABLE
 from imap_processing.codice.utils import CoDICECompression
 
@@ -118,7 +116,9 @@ def decompress(compressed_binary: str, algorithm: IntEnum) -> list[int]:
         The 24- or 32-bit decompressed values.
     """
     # Convert the binary string to a byte stream
-    compressed_bytes = bitarray.bitarray(compressed_binary).tobytes()
+    compressed_bytes = int(compressed_binary, 2).to_bytes(
+        (len(compressed_binary) + 7) // 8, byteorder="big"
+    )
 
     # Apply the appropriate decompression algorithm
     if algorithm == CoDICECompression.NO_COMPRESSION:

From 9bfb8169b22bd911b7b3677375719224275dad67 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Wed, 28 Aug 2024 11:52:28 -0600
Subject: [PATCH 11/15] Added instrument config key to make some conditional
 areas of the processing pipeline a bit more readable; added proper numbers
 for positions/energies/spin_sectors

---
 imap_processing/codice/constants.py | 50 +++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/imap_processing/codice/constants.py b/imap_processing/codice/constants.py
index 9054431f4..f063087f2 100644
--- a/imap_processing/codice/constants.py
+++ b/imap_processing/codice/constants.py
@@ -79,87 +79,111 @@
 DATA_PRODUCT_CONFIGURATIONS = {
     CODICEAPID.COD_HI_INST_COUNTS_AGGREGATED: {
         "num_counters": 1,
-        "num_energy_steps": 1,
-        "num_spin_sectors": 6,
+        "num_energy_steps": 1,  # TODO: Double check with Joey
+        "num_positions": 6,  # TODO: Double check with Joey
+        "num_spin_sectors": 1,
         "variable_names": HI_INST_COUNTS_AGGREGATED_NAMES,
         "dataset_name": "imap_codice_l1a_hi_counters_aggregated",
+        "instrument": "hi",
     },
     CODICEAPID.COD_HI_INST_COUNTS_SINGLES: {
         "num_counters": 3,
-        "num_energy_steps": 1,
-        "num_spin_sectors": 16,
+        "num_energy_steps": 1,  # TODO: Double check with Joey
+        "num_positions": 16,  # TODO: Double check with Joey
+        "num_spin_sectors": 1,
         "variable_names": HI_INST_COUNTS_SINGLES_NAMES,
         "dataset_name": "imap_codice_l1a_hi_counters_singles",
+        "instrument": "hi",
     },
     CODICEAPID.COD_HI_OMNI_SPECIES_COUNTS: {
         "num_counters": 8,
-        "num_energy_steps": 1,
-        "num_spin_sectors": 60,  # TODO: Double-check this
+        "num_energy_steps": 15,  # TODO: Double check with Joey
+        "num_positions": 4,  # TODO: Double check with Joey
+        "num_spin_sectors": 1,
         "variable_names": HI_OMNI_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_hi_omni",
+        "instrument": "hi",
     },
     CODICEAPID.COD_HI_SECT_SPECIES_COUNTS: {
         "num_counters": 4,
-        "num_energy_steps": 1,
-        "num_spin_sectors": 1152,  # TODO: Double-check this
+        "num_energy_steps": 8,
+        "num_positions": 12,
+        "num_spin_sectors": 12,
         "variable_names": HI_SECT_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_hi_sectored",
+        "instrument": "hi",
     },
     CODICEAPID.COD_LO_INST_COUNTS_AGGREGATED: {
         "num_counters": 1,
         "num_energy_steps": 128,
-        "num_spin_sectors": 36,
+        "num_positions": 6,
+        "num_spin_sectors": 6,
         "variable_names": LO_INST_COUNTS_AGGREGATED_NAMES,
         "dataset_name": "imap_codice_l1a_lo_counters_aggregated",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_INST_COUNTS_SINGLES: {
         "num_counters": 1,
         "num_energy_steps": 128,
-        "num_spin_sectors": 144,
+        "num_positions": 24,
+        "num_spin_sectors": 6,
         "variable_names": LO_INST_COUNTS_SINGLES_NAMES,
         "dataset_name": "imap_codice_l1a_lo_counters_singles",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_SW_ANGULAR_COUNTS: {
         "num_counters": 4,
         "num_energy_steps": 128,
-        "num_spin_sectors": 60,
+        "num_positions": 5,
+        "num_spin_sectors": 12,
         "variable_names": LO_SW_ANGULAR_NAMES,
         "dataset_name": "imap_codice_l1a_lo_sw_angular",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_NSW_ANGULAR_COUNTS: {
         "num_counters": 1,
         "num_energy_steps": 128,
-        "num_spin_sectors": 228,
+        "num_positions": 19,
+        "num_spin_sectors": 12,
         "variable_names": LO_NSW_ANGULAR_NAMES,
         "dataset_name": "imap_codice_l1a_lo_nsw_angular",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_SW_PRIORITY_COUNTS: {
         "num_counters": 5,
         "num_energy_steps": 128,
+        "num_positions": 1,
         "num_spin_sectors": 12,
         "variable_names": LO_SW_PRIORITY_NAMES,
         "dataset_name": "imap_codice_l1a_lo_sw_priority",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_NSW_PRIORITY_COUNTS: {
         "num_counters": 2,
         "num_energy_steps": 128,
+        "num_positions": 1,
         "num_spin_sectors": 12,
         "variable_names": LO_NSW_PRIORITY_NAMES,
         "dataset_name": "imap_codice_l1a_lo_nsw_priority",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_SW_SPECIES_COUNTS: {
         "num_counters": 16,
         "num_energy_steps": 128,
+        "num_positions": 1,
         "num_spin_sectors": 1,
         "variable_names": LO_SW_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_lo_sw_species",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_NSW_SPECIES_COUNTS: {
         "num_counters": 8,
         "num_energy_steps": 128,
+        "num_positions": 1,
         "num_spin_sectors": 1,
         "variable_names": LO_NSW_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_lo_nsw_species",
+        "instrument": "lo",
     },
 }
 
@@ -793,5 +817,5 @@
     252: 6815744,
     253: 7340032,
     254: 7864320,
-    255: 999999,
+    255: 9999999,
 }

From c4259100d87ec2741778bf7a1e9bfc602abfd1a5 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Wed, 28 Aug 2024 11:53:02 -0600
Subject: [PATCH 12/15] Added attrs for inst_az coordinate

---
 .../config/imap_codice_l1a_variable_attrs.yaml    | 15 +++++++++++++--
 .../config/imap_codice_l1b_variable_attrs.yaml    | 11 +++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
index aee0d7acd..9d7a535d9 100644
--- a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
+++ b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
@@ -36,15 +36,26 @@ energy_attrs:
     VALIDMAX: 127
     VAR_TYPE: support_data
 
+inst_az_attrs:
+    <<: *default
+    CATDESC: Azimuth
+    FIELDNAM: Azimuth
+    FORMAT: I2
+    LABLAXIS: Azimuth
+    UNITS: ' '
+    VALIDMIN: 0
+    VALIDMAX: 31
+    VAR_TYPE: support_data
+
 spin_sector_attrs:
     <<: *default
     CATDESC: Spin sector indicating range of spin angles
     FIELDNAM: Spin sector
-    FORMAT: I4
+    FORMAT: I2
     LABLAXIS: spin sector
     UNITS: ' '
     VALIDMIN: 0
-    VALIDMAX: 1152
+    VALIDMAX: 11
     VAR_TYPE: support_data
 
 # <=== Labels ===>
diff --git a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
index c9de1c451..cbb14205f 100644
--- a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
+++ b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
@@ -32,6 +32,17 @@ energy_attrs:
     VALIDMAX: 127
     VAR_TYPE: support_data
 
+inst_az_attrs:
+    <<: *default
+    CATDESC: Azimuth
+    FIELDNAM: Azimuth
+    FORMAT: I2
+    LABLAXIS: Azimuth
+    UNITS: ' '
+    VALIDMIN: 0
+    VALIDMAX: 31
+    VAR_TYPE: support_data
+
 spin_sector_attrs:
     <<: *default
     CATDESC: Spin sector indicating range of spin angles

From a21727c9274994ae5d4959602fa92220594384cd Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Wed, 28 Aug 2024 11:54:49 -0600
Subject: [PATCH 13/15] Added further unpacking of science data to properly
 restructure data arrays by positions, spin_sectors, and energies

---
 imap_processing/codice/codice_l1a.py | 116 +++++++++++++--------------
 1 file changed, 55 insertions(+), 61 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index 1e857c0e3..a37671c7b 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -101,8 +101,10 @@ def configure_data_products(self, apid: int) -> None:
         self.num_counters = config["num_counters"]
         self.num_energy_steps = config["num_energy_steps"]
         self.num_spin_sectors = config["num_spin_sectors"]
+        self.num_positions = config["num_positions"]
         self.variable_names = config["variable_names"]
         self.dataset_name = config["dataset_name"]
+        self.instrument = config["instrument"]
 
     def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset:
         """
@@ -135,6 +137,12 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
             dims=["epoch"],
             attrs=cdf_attrs.get_variable_attributes("epoch"),
         )
+        inst_az = xr.DataArray(
+            np.arange(self.num_positions),
+            name="inst_az",
+            dims=["inst_az"],
+            attrs=cdf_attrs.get_variable_attributes("inst_az_attrs"),
+        )
         spin_sector = xr.DataArray(
             np.arange(self.num_spin_sectors),
             name="spin_sector",
@@ -160,6 +168,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
         dataset = xr.Dataset(
             coords={
                 "epoch": epoch,
+                "inst_az": inst_az,
                 "spin_sector": spin_sector,
                 "energy": energy_steps,
                 "energy_label": energy_label,
@@ -169,21 +178,34 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
 
         # Create a data variable for each counter
         for variable_data, variable_name in zip(self.data, self.variable_names):
-            variable_data_arr = np.array(variable_data).reshape(
-                1, self.num_spin_sectors, self.num_energy_steps
-            )
+            # Data arrays are structured depending on the instrument
+            if self.instrument == "lo":
+                variable_data_arr = np.array(variable_data).reshape(
+                    1, self.num_positions, self.num_spin_sectors, self.num_energy_steps
+                )
+                dims = ["epoch", "inst_az", "spin_sector", "energy"]
+            elif self.instrument == "hi":
+                variable_data_arr = np.array(variable_data).reshape(
+                    1, self.num_energy_steps, self.num_positions, self.num_spin_sectors
+                )
+                dims = ["epoch", "energy", "inst_az", "spin_sector"]
+
+            # Get the CDF attributes
             cdf_attrs_key = (
                 f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}"
             )
+            attrs = cdf_attrs.get_variable_attributes(cdf_attrs_key)
+
+            # Create the CDF data variable
             dataset[variable_name] = xr.DataArray(
                 variable_data_arr,
                 name=variable_name,
-                dims=["epoch", "spin_sector", "energy"],
-                attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key),
+                dims=dims,
+                attrs=attrs,
             )
 
         # Add ESA Sweep Values and acquisition times (lo only)
-        if "_lo_" in self.dataset_name:
+        if self.instrument == "lo":
             self.get_esa_sweep_values()
             self.get_acquisition_times()
             dataset["esa_sweep_values"] = xr.DataArray(
@@ -274,43 +296,15 @@ def get_esa_sweep_values(self) -> None:
         sweep_table = sweep_data[sweep_data["table_idx"] == sweep_table_id]
         self.esa_sweep_values = sweep_table["esa_v"].values
 
-    def unpack_hi_science_data(self, science_values: str) -> None:
+    def unpack_science_data(self, science_values: str) -> None:
         """
-        Decompress, unpack, and restructure CoDICE-Hi data arrays.
+        Decompress, unpack, and restructure science data arrays.
 
         The science data within the packet is a compressed, binary string of
-        values.
-
-        Parameters
-        ----------
-        science_values : str
-            A string of binary data representing the science values of the data.
-        """
-        self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id]
-
-        # Decompress the binary string
-        science_values_decompressed = decompress(
-            science_values, self.compression_algorithm
-        )
-
-        # Divide up the data by the number of priorities or species
-        chunk_size = len(science_values_decompressed) // self.num_counters
-        science_values_unpacked = [
-            science_values_decompressed[i : i + chunk_size]
-            for i in range(0, len(science_values_decompressed), chunk_size)
-        ]
-
-        # TODO: Determine how to properly divide up hi data. For now, just use
-        #       arrays for each counter
-        self.data = science_values_unpacked
-
-    def unpack_lo_science_data(self, science_values: str) -> None:
-        """
-        Decompress, unpack, and restructure CoDICE-Lo data arrays.
-
-        The science data within the packet is a compressed, binary string of
-        values. These data need to be divided up by species or priorities,
-        and re-arranged into 2D arrays representing energy and spin angle.
+        values. These data need to be divided up by species or priorities (or
+        what I am calling "counters" as a general term), and re-arranged into
+        3D arrays representing spin sectors, positions, and energies (the order
+        of which depends on the instrument).
 
         Parameters
         ----------
@@ -319,28 +313,31 @@ def unpack_lo_science_data(self, science_values: str) -> None:
         """
         self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id]
 
-        # Decompress the binary string
+        # Decompress the binary string into a list of integers
         science_values_decompressed = decompress(
             science_values, self.compression_algorithm
         )
 
-        # Divide up the data by the number of priorities or species
-        chunk_size = len(science_values_decompressed) // self.num_counters
-        science_values_unpacked = [
-            science_values_decompressed[i : i + chunk_size]
-            for i in range(0, len(science_values_decompressed), chunk_size)
-        ]
+        # Re-arrange the counter data
+        # For CoDICE-lo, data are a 3D arrays with a shape representing
+        # [<num_positions>,<num_spin_sectors>,<num_energy_steps>]
+        if self.instrument == "lo":
+            self.data = np.array(science_values_decompressed, dtype=np.uint).reshape(
+                self.num_counters,
+                self.num_positions,
+                self.num_spin_sectors,
+                self.num_energy_steps,
+            )
 
-        # Further divide up the data by energy levels
-        # The result is a [12,128] array representing 12 spin angles and 128
-        # energy levels
-        self.data = []
-        for counter_data in science_values_unpacked:
-            data_array = [
-                counter_data[i : i + self.num_energy_steps]
-                for i in range(0, len(counter_data), self.num_energy_steps)
-            ]
-            self.data.append(data_array)  # type: ignore[arg-type]
+        # For CoDICE-hi, data are a 3D array with a shape representing
+        # [<num_energy_steps>,<num_positions>,<num_spin_sectors>]
+        elif self.instrument == "hi":
+            self.data = np.array(science_values_decompressed, dtype=np.uint).reshape(
+                self.num_counters,
+                self.num_energy_steps,
+                self.num_positions,
+                self.num_spin_sectors,
+            )
 
 
 def create_event_dataset(
@@ -555,10 +552,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
             # Run the pipeline to create a dataset for the product
             pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id)
             pipeline.configure_data_products(apid)
-            if "_lo_" in pipeline.dataset_name:
-                pipeline.unpack_lo_science_data(science_values)
-            elif "_hi_" in pipeline.dataset_name:
-                pipeline.unpack_hi_science_data(science_values)
+            pipeline.unpack_science_data(science_values)
             dataset = pipeline.create_science_dataset(met, data_version)
 
     logger.info(f"\nFinal data product:\n{dataset}\n")

From 38704daf2690c2db679034c456504d5dae30150c Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Wed, 28 Aug 2024 11:58:21 -0600
Subject: [PATCH 14/15] fixed doc build error

---
 imap_processing/codice/codice_l1a.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index a37671c7b..08c55ff1e 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -75,10 +75,8 @@ class CoDICEL1aPipeline:
         Retrieve the acquisition times via the Lo stepping table.
     get_esa_sweep_values()
         Retrieve the ESA sweep values.
-    unpack_hi_science_data()
-        Decompress, unpack, and restructure CoDICE-Hi data arrays.
-    unpack_lo_science_data()
-        Decompress, unpack, and restructure CoDICE-Lo data arrays.
+    unpack_science_data()
+        Decompress, unpack, and restructure science data arrays.
     """
 
     def __init__(self, table_id: int, plan_id: int, plan_step: int, view_id: int):

From 4d58231d74c9d2f9d4a389a7fb5ae75259f66682 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Fri, 30 Aug 2024 10:23:52 -0600
Subject: [PATCH 15/15] Addressed review comments

---
 imap_processing/codice/codice_l1a.py | 4 ++--
 imap_processing/codice/decompress.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index 08c55ff1e..bc59ace81 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -320,7 +320,7 @@ def unpack_science_data(self, science_values: str) -> None:
         # For CoDICE-lo, data are a 3D arrays with a shape representing
         # [<num_positions>,<num_spin_sectors>,<num_energy_steps>]
         if self.instrument == "lo":
-            self.data = np.array(science_values_decompressed, dtype=np.uint).reshape(
+            self.data = np.array(science_values_decompressed, dtype=np.uint32).reshape(
                 self.num_counters,
                 self.num_positions,
                 self.num_spin_sectors,
@@ -330,7 +330,7 @@ def unpack_science_data(self, science_values: str) -> None:
         # For CoDICE-hi, data are a 3D array with a shape representing
         # [<num_energy_steps>,<num_positions>,<num_spin_sectors>]
         elif self.instrument == "hi":
-            self.data = np.array(science_values_decompressed, dtype=np.uint).reshape(
+            self.data = np.array(science_values_decompressed, dtype=np.uint32).reshape(
                 self.num_counters,
                 self.num_energy_steps,
                 self.num_positions,
diff --git a/imap_processing/codice/decompress.py b/imap_processing/codice/decompress.py
index bb92a75c4..9a48b72b2 100644
--- a/imap_processing/codice/decompress.py
+++ b/imap_processing/codice/decompress.py
@@ -56,7 +56,7 @@ def _apply_lossy_a(compressed_bytes: bytes) -> list[int]:
 
 def _apply_lossy_b(compressed_bytes: bytes) -> list[int]:
     """
-    Apply 8-bit to 32-bit Lossy A decompression algorithm.
+    Apply 8-bit to 32-bit Lossy B decompression algorithm.
 
     The Lossy B algorithm uses a lookup table imported into this module.