From 0f2247831aa0ce8cb116b440fd19bf7bbf695551 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 10:49:40 -0600
Subject: [PATCH 01/22] Updated decompression algorithm to read in a binary
 string instead of individual integer values

---
 imap_processing/codice/decompress.py          | 90 ++++++++++---------
 .../tests/codice/test_decompress.py           | 31 ++++---
 2 files changed, 63 insertions(+), 58 deletions(-)

diff --git a/imap_processing/codice/decompress.py b/imap_processing/codice/decompress.py
index bbbad4b6c..e4abf83ea 100644
--- a/imap_processing/codice/decompress.py
+++ b/imap_processing/codice/decompress.py
@@ -26,18 +26,16 @@
     This information was provided via email from Greg Dunn on Oct 23, 2023
 """
 
-# TODO: Add support for performing decompression of a list of values instead of
-# a single value
-
 import lzma
 from enum import IntEnum
-from typing import Union
+
+import bitarray
 
 from imap_processing.codice.constants import LOSSY_A_TABLE, LOSSY_B_TABLE
 from imap_processing.codice.utils import CoDICECompression
 
 
-def _apply_lossy_a(compressed_value: int) -> int:
+def _apply_lossy_a(compressed_bytes: bytes) -> list[int]:
     """
     Apply 8-bit to 32-bit Lossy A decompression algorithm.
 
@@ -45,63 +43,62 @@ def _apply_lossy_a(compressed_value: int) -> int:
 
     Parameters
     ----------
-    compressed_value : int
-        The compressed 8-bit value.
+    compressed_bytes : bytes
+        The compressed byte stream.
 
     Returns
     -------
-    int
-        The 24- or 32-bit decompressed value.
+    decompressed_values : list[int]
+        The 24- or 32-bit decompressed values.
     """
-    return LOSSY_A_TABLE[compressed_value]
+    compressed_values = list(compressed_bytes)
+    decompressed_values = [LOSSY_A_TABLE[item] for item in compressed_values]
+    return decompressed_values
 
 
-def _apply_lossy_b(compressed_value: int) -> int:
+def _apply_lossy_b(compressed_bytes: bytes) -> list[int]:
     """
-    Apply 8-bit to 32-bit Lossy B decompression algorithm.
+    Apply 8-bit to 32-bit Lossy A decompression algorithm.
 
     The Lossy B algorithm uses a lookup table imported into this module.
 
     Parameters
     ----------
-    compressed_value : int
-        The compressed 8-bit value.
+    compressed_bytes : bytes
+        The compressed byte stream.
 
     Returns
     -------
-    int
-        The 24- or 32-bit decompressed value.
+    decompressed_values : list[int]
+        The 24- or 32-bit decompressed values.
     """
-    return LOSSY_B_TABLE[compressed_value]
+    compressed_values = list(compressed_bytes)
+    decompressed_values = [LOSSY_B_TABLE[item] for item in compressed_values]
+    return decompressed_values
 
 
-def _apply_lzma_lossless(compressed_value: Union[int, bytes]) -> int:
+def _apply_lzma_lossless(compressed_bytes: bytes) -> bytes:
     """
     Apply LZMA lossless decompression algorithm.
 
     Parameters
     ----------
-    compressed_value : int or bytes
-        The compressed 8-bit value.
+    compressed_bytes : bytes
+        The compressed byte stream.
 
     Returns
     -------
-    decompressed_value : int
-        The 24- or 32-bit decompressed value.
+    lzma_decompressed_values : bytes
+        The 24- or 32-bit lzma decompressed values.
     """
-    if isinstance(compressed_value, int):
-        bytes_compressed_value = compressed_value.to_bytes(compressed_value, "big")
-    else:
-        bytes_compressed_value = compressed_value
-    decompressed_value = lzma.decompress(bytes_compressed_value)
-    decompressed_value_int = int.from_bytes(decompressed_value, byteorder="big")
+    lzma_decompressed_values = lzma.decompress(compressed_bytes)
 
-    return decompressed_value_int
+    return lzma_decompressed_values
 
 
-def decompress(compressed_value: int, algorithm: IntEnum) -> int:
+def decompress(compressed_binary: str, algorithm: IntEnum) -> list[int]:
     """
-    Will decompress the value.
+    Perform decompression on a binary string into a list of integers.
 
     Apply the appropriate decompression algorithm(s) based on the value
     of the ``algorithm`` attribute. One or more individual algorithms may be
@@ -109,32 +106,37 @@ def decompress(compressed_value: int, algorithm: IntEnum) -> int:
 
     Parameters
     ----------
-    compressed_value : int
-        The 8-bit compressed value to decompress.
+    compressed_binary : str
+        The compressed binary string.
     algorithm : int
         The algorithm to apply. Supported algorithms are provided in the
         ``codice_utils.CoDICECompression`` class.
 
     Returns
     -------
-    decompressed_value : int
-        The 24- or 32-bit decompressed value.
+    decompressed_values : list[int]
+        The 24- or 32-bit decompressed values.
     """
+    # Convert the binary string to a byte stream
+    compressed_bytes = bitarray.bitarray(compressed_binary).tobytes()
+
+    # Apply the appropriate decompression algorithm
     if algorithm == CoDICECompression.NO_COMPRESSION:
-        decompressed_value = compressed_value
+        decompressed_values = list(compressed_bytes)
     elif algorithm == CoDICECompression.LOSSY_A:
-        decompressed_value = _apply_lossy_a(compressed_value)
+        decompressed_values = _apply_lossy_a(compressed_bytes)
     elif algorithm == CoDICECompression.LOSSY_B:
-        decompressed_value = _apply_lossy_b(compressed_value)
+        decompressed_values = _apply_lossy_b(compressed_bytes)
     elif algorithm == CoDICECompression.LOSSLESS:
-        decompressed_value = _apply_lzma_lossless(compressed_value)
+        decompressed_bytes = _apply_lzma_lossless(compressed_bytes)
+        decompressed_values = list(decompressed_bytes)
     elif algorithm == CoDICECompression.LOSSY_A_LOSSLESS:
-        decompressed_value = _apply_lzma_lossless(compressed_value)
-        decompressed_value = _apply_lossy_a(decompressed_value)
+        decompressed_bytes = _apply_lzma_lossless(compressed_bytes)
+        decompressed_values = _apply_lossy_a(decompressed_bytes)
     elif algorithm == CoDICECompression.LOSSY_B_LOSSLESS:
-        decompressed_value = _apply_lzma_lossless(compressed_value)
-        decompressed_value = _apply_lossy_b(decompressed_value)
+        decompressed_bytes = _apply_lzma_lossless(compressed_bytes)
+        decompressed_values = _apply_lossy_b(decompressed_bytes)
     else:
         raise ValueError(f"{algorithm} is not supported")
 
-    return decompressed_value
+    return decompressed_values
diff --git a/imap_processing/tests/codice/test_decompress.py b/imap_processing/tests/codice/test_decompress.py
index e74f60d73..853a94ccc 100644
--- a/imap_processing/tests/codice/test_decompress.py
+++ b/imap_processing/tests/codice/test_decompress.py
@@ -9,34 +9,37 @@
 from imap_processing.codice.utils import CoDICECompression
 
 # Test the algorithms using input value of 234 (picked randomly)
-LZMA_EXAMPLE = lzma.compress((234).to_bytes(1, byteorder="big"))
+lzma_bytes = lzma.compress((234).to_bytes(1, byteorder="big"))
+LZMA_EXAMPLE = "".join(format(byte, "08b") for byte in lzma_bytes)
 TEST_DATA = [
-    (234, CoDICECompression.NO_COMPRESSION, 234),
-    (234, CoDICECompression.LOSSY_A, 221184),
-    (234, CoDICECompression.LOSSY_B, 1441792),
-    (LZMA_EXAMPLE, CoDICECompression.LOSSLESS, 234),
-    (LZMA_EXAMPLE, CoDICECompression.LOSSY_A_LOSSLESS, 221184),
-    (LZMA_EXAMPLE, CoDICECompression.LOSSY_B_LOSSLESS, 1441792),
+    ("11101010", CoDICECompression.NO_COMPRESSION, [234]),
+    ("11101010", CoDICECompression.LOSSY_A, [221184]),
+    ("11101010", CoDICECompression.LOSSY_B, [1441792]),
+    (LZMA_EXAMPLE, CoDICECompression.LOSSLESS, [234]),
+    (LZMA_EXAMPLE, CoDICECompression.LOSSY_A_LOSSLESS, [221184]),
+    (LZMA_EXAMPLE, CoDICECompression.LOSSY_B_LOSSLESS, [1441792]),
 ]
 
 
 @pytest.mark.parametrize(
-    ("compressed_value", "algorithm", "expected_result"), TEST_DATA
+    ("compressed_binary", "algorithm", "expected_result"), TEST_DATA
 )
-def test_decompress(compressed_value: int, algorithm: IntEnum, expected_result: int):
+def test_decompress(
+    compressed_binary: str, algorithm: IntEnum, expected_result: list[int]
+):
     """Tests the ``decompress`` function
 
     Parameters
     ----------
-    compressed_value : int
-        The compressed value to test decompression on
+    compressed_binary : str
+        The compressed binary string to test decompression on
     algorithm : IntEnum
         The algorithm to use in decompression
-    expected_result : int
+    expected_result : list[int]
         The expected, decompressed value
     """
 
-    decompressed_value = decompress(compressed_value, algorithm)
+    decompressed_value = decompress(compressed_binary, algorithm)
     assert decompressed_value == expected_result
 
 
@@ -44,4 +47,4 @@ def test_decompress_raises():
     """Tests that the ``decompress`` function raises with an unknown algorithm"""
 
     with pytest.raises(ValueError, match="some_unsupported_algorithm"):
-        decompress(234, "some_unsupported_algorithm")
+        decompress("11101010", "some_unsupported_algorithm")

From caf55ec3427fa3c13abea74292ebb7bbd1154a3f Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 10:51:20 -0600
Subject: [PATCH 02/22] Removed collapse table lookup, as the info needed for
 these is instead "hard coded" into the configuration dictionary; Added spin
 sector config variable

---
 imap_processing/codice/constants.py | 44 ++++++++++++++---------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/imap_processing/codice/constants.py b/imap_processing/codice/constants.py
index a8bcf3503..9054431f4 100644
--- a/imap_processing/codice/constants.py
+++ b/imap_processing/codice/constants.py
@@ -12,6 +12,9 @@
 ESA = ElectroStatic Analyzer
 """
 
+# TODO: What to do in the case of a value of 255 in LOSSY_A and LOSSY_B
+#       compression?
+
 from imap_processing.codice.utils import CODICEAPID, CoDICECompression
 
 APIDS_FOR_SCIENCE_PROCESSING = [
@@ -76,73 +79,85 @@
 DATA_PRODUCT_CONFIGURATIONS = {
     CODICEAPID.COD_HI_INST_COUNTS_AGGREGATED: {
         "num_counters": 1,
-        "num_energy_steps": 128,
+        "num_energy_steps": 1,
+        "num_spin_sectors": 6,
         "variable_names": HI_INST_COUNTS_AGGREGATED_NAMES,
         "dataset_name": "imap_codice_l1a_hi_counters_aggregated",
     },
     CODICEAPID.COD_HI_INST_COUNTS_SINGLES: {
         "num_counters": 3,
-        "num_energy_steps": 128,
+        "num_energy_steps": 1,
+        "num_spin_sectors": 16,
         "variable_names": HI_INST_COUNTS_SINGLES_NAMES,
         "dataset_name": "imap_codice_l1a_hi_counters_singles",
     },
     CODICEAPID.COD_HI_OMNI_SPECIES_COUNTS: {
         "num_counters": 8,
-        "num_energy_steps": 128,
+        "num_energy_steps": 1,
+        "num_spin_sectors": 60,  # TODO: Double-check this
         "variable_names": HI_OMNI_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_hi_omni",
     },
     CODICEAPID.COD_HI_SECT_SPECIES_COUNTS: {
         "num_counters": 4,
-        "num_energy_steps": 128,
+        "num_energy_steps": 1,
+        "num_spin_sectors": 1152,  # TODO: Double-check this
         "variable_names": HI_SECT_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_hi_sectored",
     },
     CODICEAPID.COD_LO_INST_COUNTS_AGGREGATED: {
         "num_counters": 1,
         "num_energy_steps": 128,
+        "num_spin_sectors": 36,
         "variable_names": LO_INST_COUNTS_AGGREGATED_NAMES,
         "dataset_name": "imap_codice_l1a_lo_counters_aggregated",
     },
     CODICEAPID.COD_LO_INST_COUNTS_SINGLES: {
         "num_counters": 1,
         "num_energy_steps": 128,
+        "num_spin_sectors": 144,
         "variable_names": LO_INST_COUNTS_SINGLES_NAMES,
         "dataset_name": "imap_codice_l1a_lo_counters_singles",
     },
     CODICEAPID.COD_LO_SW_ANGULAR_COUNTS: {
         "num_counters": 4,
         "num_energy_steps": 128,
+        "num_spin_sectors": 60,
         "variable_names": LO_SW_ANGULAR_NAMES,
         "dataset_name": "imap_codice_l1a_lo_sw_angular",
     },
     CODICEAPID.COD_LO_NSW_ANGULAR_COUNTS: {
         "num_counters": 1,
         "num_energy_steps": 128,
+        "num_spin_sectors": 228,
         "variable_names": LO_NSW_ANGULAR_NAMES,
         "dataset_name": "imap_codice_l1a_lo_nsw_angular",
     },
     CODICEAPID.COD_LO_SW_PRIORITY_COUNTS: {
         "num_counters": 5,
         "num_energy_steps": 128,
+        "num_spin_sectors": 12,
         "variable_names": LO_SW_PRIORITY_NAMES,
         "dataset_name": "imap_codice_l1a_lo_sw_priority",
     },
     CODICEAPID.COD_LO_NSW_PRIORITY_COUNTS: {
         "num_counters": 2,
         "num_energy_steps": 128,
+        "num_spin_sectors": 12,
         "variable_names": LO_NSW_PRIORITY_NAMES,
         "dataset_name": "imap_codice_l1a_lo_nsw_priority",
     },
     CODICEAPID.COD_LO_SW_SPECIES_COUNTS: {
         "num_counters": 16,
         "num_energy_steps": 128,
+        "num_spin_sectors": 1,
         "variable_names": LO_SW_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_lo_sw_species",
     },
     CODICEAPID.COD_LO_NSW_SPECIES_COUNTS: {
         "num_counters": 8,
         "num_energy_steps": 128,
+        "num_spin_sectors": 1,
         "variable_names": LO_NSW_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_lo_nsw_species",
     },
@@ -179,25 +194,6 @@
     9: CoDICECompression.LOSSY_A_LOSSLESS,
 }
 
-# Collapse table ID lookup table for Lo data products
-# The key is the view_id and the value is the ID for the collapse table
-LO_COLLAPSE_TABLE_ID_LOOKUP = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8}
-
-# Collapse table ID lookup table for Hi data products
-# The key is the view_id and the value is the ID for the collapse table
-Hi_COLLAPSE_TABLE_ID_LOOKUP = {
-    0: 8,
-    1: 9,
-    2: 10,
-    3: 0,
-    4: 1,
-    5: 2,
-    6: 4,
-    7: 5,
-    8: 6,
-    9: 7,
-}
-
 # ESA Sweep table ID lookup table
 # The combination of plan_id and plan_step determine the ESA sweep Table to use
 # Currently, ESA sweep table 0 is used for every plan_id/plan_step combination,
@@ -538,6 +534,7 @@
     252: 475136,
     253: 491520,
     254: 507904,
+    255: 999999,
 }
 
 LOSSY_B_TABLE = {
@@ -796,4 +793,5 @@
     252: 6815744,
     253: 7340032,
     254: 7864320,
+    255: 999999,
 }

From bb9cb3060d085ff46ed5d5e32c762747b4ddf86b Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 10:53:24 -0600
Subject: [PATCH 03/22] Added spin sector attribute definition

---
 .../cdf/config/imap_codice_l1a_variable_attrs.yaml    | 11 +++++++++++
 .../cdf/config/imap_codice_l1b_variable_attrs.yaml    | 11 +++++++++++
 2 files changed, 22 insertions(+)

diff --git a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
index 54701c71c..aee0d7acd 100644
--- a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
+++ b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
@@ -36,6 +36,17 @@ energy_attrs:
     VALIDMAX: 127
     VAR_TYPE: support_data
 
+spin_sector_attrs:
+    <<: *default
+    CATDESC: Spin sector indicating range of spin angles
+    FIELDNAM: Spin sector
+    FORMAT: I4
+    LABLAXIS: spin sector
+    UNITS: ' '
+    VALIDMIN: 0
+    VALIDMAX: 1152
+    VAR_TYPE: support_data
+
 # <=== Labels ===>
 energy_label:
     CATDESC: Energy per charge (E/q) sweeping step
diff --git a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
index 1d5d44eb5..c9de1c451 100644
--- a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
+++ b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
@@ -32,6 +32,17 @@ energy_attrs:
     VALIDMAX: 127
     VAR_TYPE: support_data
 
+spin_sector_attrs:
+    <<: *default
+    CATDESC: Spin sector indicating range of spin angles
+    FIELDNAM: Spin sector
+    FORMAT: I4
+    LABLAXIS: spin sector
+    UNITS: ' '
+    VALIDMIN: 0
+    VALIDMAX: 1152
+    VAR_TYPE: support_data
+
 # <=== Labels ===>
 energy_label:
     CATDESC: Energy per charge (E/q) sweeping step

From 6f81befdb4261a61f7ac826095e527137023b1c1 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 10:57:28 -0600
Subject: [PATCH 04/22] Updated code to more accurately unpack science data

---
 imap_processing/codice/codice_l1a.py          | 124 ++++++++++++------
 .../tests/codice/test_codice_l1a.py           |  29 ++--
 2 files changed, 98 insertions(+), 55 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index 0c925fee7..f9032930a 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -27,16 +27,21 @@
 from imap_processing.cdf.utils import met_to_j2000ns
 from imap_processing.codice import constants
 from imap_processing.codice.codice_l0 import decom_packets
+from imap_processing.codice.decompress import decompress
 from imap_processing.codice.utils import CODICEAPID, add_metadata_to_array
 from imap_processing.utils import group_by_apid, sort_by_time
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
-# TODO: Decom data arrays need to be decompressed
 # TODO: In decommutation, how to have a variable length data and then a checksum
 #       after it? (Might be fixed with new XTCE script updates)
 # TODO: Add support for decomming multiple APIDs from a single file
+# TODO: Add these as variables in CDF: SPIN_PERIOD, ST_BIAS_GAIN_MODE,
+#       SW_BIAS_GAIN_MODE, RGFO_HALF_SPIN, NSO_HALF_SPIN, DATA_QUALITY
+# TODO: Use new packet_file_to_dataset() function to simplify things
+# TODO: Determine what should go in event data CDF and how it should be
+#       structured.
 
 
 class CoDICEL1aPipeline:
@@ -92,6 +97,7 @@ def configure_data_products(self, apid: int) -> None:
         config = constants.DATA_PRODUCT_CONFIGURATIONS.get(apid)  # type: ignore[call-overload]
         self.num_counters = config["num_counters"]
         self.num_energy_steps = config["num_energy_steps"]
+        self.num_spin_sectors = config["num_spin_sectors"]
         self.variable_names = config["variable_names"]
         self.dataset_name = config["dataset_name"]
 
@@ -121,11 +127,17 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
 
         # Define coordinates
         epoch = xr.DataArray(
-            met_to_j2000ns(met),  # TODO: Fix after SIT-3 (see note below)
+            [met_to_j2000ns(met)],
             name="epoch",
             dims=["epoch"],
             attrs=cdf_attrs.get_variable_attributes("epoch"),
         )
+        spin_sector = xr.DataArray(
+            np.arange(self.num_spin_sectors),
+            name="spin_sector",
+            dims=["spin_sector"],
+            attrs=cdf_attrs.get_variable_attributes("spin_sector_attrs"),
+        )
         energy_steps = xr.DataArray(
             np.arange(self.num_energy_steps),
             name="energy",
@@ -145,6 +157,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
         dataset = xr.Dataset(
             coords={
                 "epoch": epoch,
+                "spin_sector": spin_sector,
                 "energy": energy_steps,
                 "energy_label": energy_label,
             },
@@ -153,12 +166,8 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
 
         # Create a data variable for each counter
         for variable_data, variable_name in zip(self.data, self.variable_names):
-            # TODO: Currently, cdflib doesn't properly write/read CDF files that
-            #       have a single epoch value. To get around this for now, use
-            #       two epoch values and reshape accordingly. Revisit this after
-            #       SIT-3. See https://github.com/MAVENSDC/cdflib/issues/268
-            variable_data_arr = np.array(list(variable_data) * 2, dtype=int).reshape(
-                2, self.num_energy_steps
+            variable_data_arr = np.array(variable_data).reshape(
+                1, self.num_spin_sectors, self.num_energy_steps
             )
             cdf_attrs_key = (
                 f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}"
@@ -166,7 +175,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
             dataset[variable_name] = xr.DataArray(
                 variable_data_arr,
                 name=variable_name,
-                dims=["epoch", "energy"],
+                dims=["epoch", "spin_sector", "energy"],
                 attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key),
             )
 
@@ -262,14 +271,41 @@ def get_esa_sweep_values(self) -> None:
         sweep_table = sweep_data[sweep_data["table_idx"] == sweep_table_id]
         self.esa_sweep_values = sweep_table["esa_v"].values
 
-    def unpack_science_data(self, science_values: str) -> None:
+    def unpack_hi_science_data(self, science_values: str) -> None:
         """
-        Unpack the science data from the packet.
+        Unpack the CoDICE-Hi science data from the packet.
 
-        For LO SW Species Counts data, the science data within the packet is a
-        blob of compressed values of length 2048 bits (16 species * 128 energy
-        levels). These data need to be divided up by species so that each
-        species can have their own data variable in the L1A CDF file.
+        The science data within the packet is a compressed, binary string of
+        values.
+
+        Parameters
+        ----------
+        science_values : str
+            A string of binary data representing the science values of the data.
+        """
+        self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id]
+
+        # Decompress the binary string
+        science_values = decompress(science_values, self.compression_algorithm)
+
+        # Divide up the data by the number of priorities or species
+        chunk_size = len(science_values) // self.num_counters
+        science_values_unpacked = [
+            science_values[i : i + chunk_size]
+            for i in range(0, len(science_values), chunk_size)
+        ]
+
+        # TODO: Determine how to properly divide up hi data. For now, just use
+        #       arrays for each counter
+        self.data = science_values_unpacked
+
+    def unpack_lo_science_data(self, science_values: str) -> None:
+        """
+        Unpack the CoDICE-Lo science data from the packet.
+
+        The science data within the packet is a compressed, binary string of
+        values. These data need to be divided up by species or priorities,
+        and re-arranged into 2D arrays representing energy and spin angle.
 
         Parameters
         ----------
@@ -277,18 +313,27 @@ def unpack_science_data(self, science_values: str) -> None:
             A string of binary data representing the science values of the data.
         """
         self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id]
-        self.collapse_table_id = constants.LO_COLLAPSE_TABLE_ID_LOOKUP[self.view_id]
 
-        # TODO: Turn this back on after SIT-3
-        # For SIT-3, just create appropriate length data arrays of all ones
+        # Decompress the binary string
+        science_values = decompress(science_values, self.compression_algorithm)
+
         # Divide up the data by the number of priorities or species
-        # science_values = packets[0].data["DATA"].raw_value
-        # num_bits = len(science_values)
-        # chunk_size = len(science_values) // self.num_counters
-        # self.data = [
-        #     science_values[i : i + chunk_size] for i in range(0, num_bits, chunk_size)
-        # ]
-        self.data = [["1"] * 128] * self.num_counters
+        chunk_size = len(science_values) // self.num_counters
+        science_values_unpacked = [
+            science_values[i : i + chunk_size]
+            for i in range(0, len(science_values), chunk_size)
+        ]
+
+        # Further divide up the data by energy levels
+        # The result is a [12,128] array representing 12 spin angles and 128
+        # energy levels
+        self.data = []
+        for counter_data in science_values_unpacked:
+            data_array = [
+                counter_data[i : i + self.num_energy_steps]
+                for i in range(0, len(counter_data), self.num_energy_steps)
+            ]
+            self.data.append(data_array)
 
 
 def create_event_dataset(
@@ -334,9 +379,6 @@ def create_event_dataset(
         attrs=cdf_attrs.get_global_attributes(dataset_name),
     )
 
-    # TODO: Determine what should go in event data CDF and how it should be
-    # structured.
-
     return dataset
 
 
@@ -385,13 +427,15 @@ def create_hskp_dataset(
     )
 
     # TODO: Change 'TBD' catdesc and fieldname
-    # Once housekeeping packet definition file is re-generated with updated
-    # version of space_packet_parser, can get fieldname and catdesc info via:
-    #    for key, value in (packet.header | packet.data).items():
-    #      fieldname = value.short_description
-    #      catdesc = value.long_description
-    # I am holding off making this change until I acquire updated housekeeping
-    # packets/validation data that match the latest telemetry definitions
+    #       Once housekeeping packet definition file is re-generated with
+    #       updated version of space_packet_parser, can get fieldname and
+    #       catdesc info via:
+    #           for key, value in (packet.header | packet.data).items():
+    #               fieldname = value.short_description
+    #              catdesc = value.long_description
+    #       I am holding off making this change until I acquire updated
+    #       housekeeping packets/validation data that match the latest telemetry
+    #       definitions
     for key, value in metadata_arrays.items():
         attrs = cdf_attrs.get_variable_attributes("codice_support_attrs")
         attrs["CATDESC"] = "TBD"
@@ -457,8 +501,6 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
     dataset : xarray.Dataset
         The ``xarray`` dataset containing the science data and supporting metadata.
     """
-    # TODO: Use new packet_file_to_dataset() function to simplify things
-
     # Decom the packets, group data by APID, and sort by time
     packets = decom_packets(file_path)
     grouped_data = group_by_apid(packets)
@@ -496,7 +538,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
 
             # Determine the start time of the packet
             met = packets[0].data["ACQ_START_SECONDS"].raw_value
-            met = [met, met + 1]  # TODO: Remove after cdflib fix
+
             # Extract the data
             science_values = packets[0].data["DATA"].raw_value
 
@@ -506,8 +548,12 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
             # Run the pipeline to create a dataset for the product
             pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id)
             pipeline.configure_data_products(apid)
-            pipeline.unpack_science_data(science_values)
+            if "_lo_" in pipeline.dataset_name:
+                pipeline.unpack_lo_science_data(science_values)
+            elif "_hi_" in pipeline.dataset_name:
+                pipeline.unpack_hi_science_data(science_values)
             dataset = pipeline.create_science_dataset(met, data_version)
 
     logger.info(f"\nFinal data product:\n{dataset}\n")
+
     return dataset
diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py
index 383a07b9b..1bef0363a 100644
--- a/imap_processing/tests/codice/test_codice_l1a.py
+++ b/imap_processing/tests/codice/test_codice_l1a.py
@@ -19,19 +19,19 @@
 
 EXPECTED_ARRAY_SHAPES = [
     (99,),  # hskp
-    (1, 128),  # hi-counters-aggregated
-    (1, 128),  # hi-counters-singles
-    (1, 128),  # hi-omni
-    (1, 128),  # hi-sectored
-    (1, 128),  # hi-pha
-    (1, 128),  # lo-counters-aggregated
-    (1, 128),  # lo-counters-aggregated
-    (1, 128),  # lo-sw-angular
-    (1, 128),  # lo-nsw-angular
-    (1, 128),  # lo-sw-priority
-    (1, 128),  # lo-nsw-priority
-    (1, 128),  # lo-sw-species
-    (1, 128),  # lo-nsw-species
+    (1, 6, 1),  # hi-counters-aggregated
+    (1, 16, 1),  # hi-counters-singles
+    (1, 60, 1),  # hi-omni
+    (1, 1152, 1),  # hi-sectored
+    (1, 1),  # hi-pha
+    (1, 36, 128),  # lo-counters-aggregated
+    (1, 144, 128),  # lo-counters-aggregated
+    (1, 60, 128),  # lo-sw-angular
+    (1, 228, 128),  # lo-nsw-angular
+    (1, 12, 128),  # lo-sw-priority
+    (1, 12, 128),  # lo-nsw-priority
+    (1, 1, 128),  # lo-sw-species
+    (1, 1, 128),  # lo-nsw-species
     (1, 128),  # lo-pha
 ]
 EXPECTED_ARRAY_SIZES = [
@@ -110,9 +110,6 @@ def test_l1a_cdf_filenames(test_l1a_data: xr.Dataset, expected_logical_source: s
     assert dataset.attrs["Logical_source"] == expected_logical_source
 
 
-@pytest.mark.xfail(
-    reason="Currently failing due to cdflib/epoch issue. See https://github.com/MAVENSDC/cdflib/issues/268"
-)
 @pytest.mark.parametrize(
     "test_l1a_data, expected_shape",
     list(zip(TEST_PACKETS, EXPECTED_ARRAY_SHAPES)),

From 7ae84a46a55ceffcb72891e242448d4b8baf78ef Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 15:01:13 -0600
Subject: [PATCH 05/22] Fixed mypy errors

---
 imap_processing/codice/codice_l1a.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index f9032930a..c2efd1ee7 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -42,6 +42,7 @@
 # TODO: Use new packet_file_to_dataset() function to simplify things
 # TODO: Determine what should go in event data CDF and how it should be
 #       structured.
+# TODO: Make sure CDF attributes match expected nomenclature
 
 
 class CoDICEL1aPipeline:
@@ -286,13 +287,15 @@ def unpack_hi_science_data(self, science_values: str) -> None:
         self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id]
 
         # Decompress the binary string
-        science_values = decompress(science_values, self.compression_algorithm)
+        science_values_decompressed = decompress(
+            science_values, self.compression_algorithm
+        )
 
         # Divide up the data by the number of priorities or species
-        chunk_size = len(science_values) // self.num_counters
+        chunk_size = len(science_values_decompressed) // self.num_counters
         science_values_unpacked = [
-            science_values[i : i + chunk_size]
-            for i in range(0, len(science_values), chunk_size)
+            science_values_decompressed[i : i + chunk_size]
+            for i in range(0, len(science_values_decompressed), chunk_size)
         ]
 
         # TODO: Determine how to properly divide up hi data. For now, just use
@@ -315,13 +318,15 @@ def unpack_lo_science_data(self, science_values: str) -> None:
         self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id]
 
         # Decompress the binary string
-        science_values = decompress(science_values, self.compression_algorithm)
+        science_values_decompressed = decompress(
+            science_values, self.compression_algorithm
+        )
 
         # Divide up the data by the number of priorities or species
-        chunk_size = len(science_values) // self.num_counters
+        chunk_size = len(science_values_decompressed) // self.num_counters
         science_values_unpacked = [
-            science_values[i : i + chunk_size]
-            for i in range(0, len(science_values), chunk_size)
+            science_values_decompressed[i : i + chunk_size]
+            for i in range(0, len(science_values_decompressed), chunk_size)
         ]
 
         # Further divide up the data by energy levels

From dc2602c157ce6a70c5e4086871fc039593285987 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 15:06:01 -0600
Subject: [PATCH 06/22] Fixed mypy errors

---
 imap_processing/codice/codice_l1a.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index c2efd1ee7..7f036c06c 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -334,7 +334,7 @@ def unpack_lo_science_data(self, science_values: str) -> None:
         # energy levels
         self.data = []
         for counter_data in science_values_unpacked:
-            data_array = [
+            data_array: list[list[int]] = [
                 counter_data[i : i + self.num_energy_steps]
                 for i in range(0, len(counter_data), self.num_energy_steps)
             ]

From 953c10c274833baa031fa6a2cf525b49d695290c Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 15:13:55 -0600
Subject: [PATCH 07/22] Fixed mypy errors

---
 imap_processing/codice/codice_l1a.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index 7f036c06c..f1a0888d5 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -334,11 +334,11 @@ def unpack_lo_science_data(self, science_values: str) -> None:
         # energy levels
         self.data = []
         for counter_data in science_values_unpacked:
-            data_array: list[list[int]] = [
+            data_array = [
                 counter_data[i : i + self.num_energy_steps]
                 for i in range(0, len(counter_data), self.num_energy_steps)
             ]
-            self.data.append(data_array)
+            self.data.append(data_array)  # type: ignore[arg-type]
 
 
 def create_event_dataset(

From 69814dafecae3b5d76bc1c2354d72bbd174a5007 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 20 Aug 2024 15:19:58 -0600
Subject: [PATCH 08/22] Fixed doc build errors

---
 imap_processing/codice/codice_l1a.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index f1a0888d5..1e857c0e3 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -75,8 +75,10 @@ class CoDICEL1aPipeline:
         Retrieve the acquisition times via the Lo stepping table.
     get_esa_sweep_values()
         Retrieve the ESA sweep values.
-    unpack_science_data()
-        Make 4D L1a data product from the decompressed science data.
+    unpack_hi_science_data()
+        Decompress, unpack, and restructure CoDICE-Hi data arrays.
+    unpack_lo_science_data()
+        Decompress, unpack, and restructure CoDICE-Lo data arrays.
     """
 
     def __init__(self, table_id: int, plan_id: int, plan_step: int, view_id: int):
@@ -274,7 +276,7 @@ def get_esa_sweep_values(self) -> None:
 
     def unpack_hi_science_data(self, science_values: str) -> None:
         """
-        Unpack the CoDICE-Hi science data from the packet.
+        Decompress, unpack, and restructure CoDICE-Hi data arrays.
 
         The science data within the packet is a compressed, binary string of
         values.
@@ -304,7 +306,7 @@ def unpack_hi_science_data(self, science_values: str) -> None:
 
     def unpack_lo_science_data(self, science_values: str) -> None:
         """
-        Unpack the CoDICE-Lo science data from the packet.
+        Decompress, unpack, and restructure CoDICE-Lo data arrays.
 
         The science data within the packet is a compressed, binary string of
         values. These data need to be divided up by species or priorities,

From 64467beeb0d02a524ff4cb9745cbca7f16146187 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Wed, 21 Aug 2024 09:15:17 -0600
Subject: [PATCH 09/22] Implemented packet_file_to_datasets utility function

---
 imap_processing/codice/codice_l0.py           |  21 +--
 imap_processing/codice/codice_l1a.py          | 132 ++++++++++--------
 imap_processing/codice/constants.py           |  19 +++
 .../tests/codice/test_codice_l1a.py           |   2 +-
 4 files changed, 93 insertions(+), 81 deletions(-)

diff --git a/imap_processing/codice/codice_l0.py b/imap_processing/codice/codice_l0.py
index 4515af8fb..a5cc62c6f 100644
--- a/imap_processing/codice/codice_l0.py
+++ b/imap_processing/codice/codice_l0.py
@@ -17,6 +17,7 @@
 from pathlib import Path
 
 from imap_processing import decom, imap_module_directory
+from imap_processing.codice import constants
 
 
 def decom_packets(packet_file: Path) -> list:
@@ -33,26 +34,8 @@ def decom_packets(packet_file: Path) -> list:
     list : list
         All the unpacked data.
     """
-    packet_to_xtce_mapping = {
-        "imap_codice_l0_hi-counters-aggregated_20240429_v001.pkts": "P_COD_HI_INST_COUNTS_AGGREGATED.xml",  # noqa
-        "imap_codice_l0_hi-counters-singles_20240429_v001.pkts": "P_COD_HI_INST_COUNTS_SINGLES.xml",  # noqa
-        "imap_codice_l0_hi-omni_20240429_v001.pkts": "P_COD_HI_OMNI_SPECIES_COUNTS.xml",
-        "imap_codice_l0_hi-sectored_20240429_v001.pkts": "P_COD_HI_SECT_SPECIES_COUNTS.xml",  # noqa
-        "imap_codice_l0_hi-pha_20240429_v001.pkts": "P_COD_HI_PHA.xml",
-        "imap_codice_l0_hskp_20100101_v001.pkts": "P_COD_NHK.xml",
-        "imap_codice_l0_lo-counters-aggregated_20240429_v001.pkts": "P_COD_LO_INST_COUNTS_AGGREGATED.xml",  # noqa
-        "imap_codice_l0_lo-counters-singles_20240429_v001.pkts": "P_COD_LO_INST_COUNTS_SINGLES.xml",  # noqa
-        "imap_codice_l0_lo-sw-angular_20240429_v001.pkts": "P_COD_LO_SW_ANGULAR_COUNTS.xml",  # noqa
-        "imap_codice_l0_lo-nsw-angular_20240429_v001.pkts": "P_COD_LO_NSW_ANGULAR_COUNTS.xml",  # noqa
-        "imap_codice_l0_lo-sw-priority_20240429_v001.pkts": "P_COD_LO_SW_PRIORITY_COUNTS.xml",  # noqa
-        "imap_codice_l0_lo-nsw-priority_20240429_v001.pkts": "P_COD_LO_NSW_PRIORITY_COUNTS.xml",  # noqa
-        "imap_codice_l0_lo-sw-species_20240429_v001.pkts": "P_COD_LO_SW_SPECIES_COUNTS.xml",  # noqa
-        "imap_codice_l0_lo-nsw-species_20240429_v001.pkts": "P_COD_LO_NSW_SPECIES_COUNTS.xml",  # noqa
-        "imap_codice_l0_lo-pha_20240429_v001.pkts": "P_COD_LO_PHA.xml",
-    }
-
     xtce_document = Path(
-        f"{imap_module_directory}/codice/packet_definitions/{packet_to_xtce_mapping[packet_file.name]}"
+        f"{imap_module_directory}/codice/packet_definitions/{constants.PACKET_TO_XTCE_MAPPING[packet_file.name]}"
     )
     decom_packet_list: list = decom.decom_packets(packet_file, xtce_document)
     return decom_packet_list
diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index 1e857c0e3..602a229da 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -13,23 +13,20 @@
 
 from __future__ import annotations
 
-import collections
 import logging
 from pathlib import Path
 
 import numpy as np
 import pandas as pd
-import space_packet_parser
 import xarray as xr
 
 from imap_processing import imap_module_directory
 from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
 from imap_processing.cdf.utils import met_to_j2000ns
 from imap_processing.codice import constants
-from imap_processing.codice.codice_l0 import decom_packets
 from imap_processing.codice.decompress import decompress
-from imap_processing.codice.utils import CODICEAPID, add_metadata_to_array
-from imap_processing.utils import group_by_apid, sort_by_time
+from imap_processing.codice.utils import CODICEAPID
+from imap_processing.utils import packet_file_to_datasets
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -39,7 +36,6 @@
 # TODO: Add support for decomming multiple APIDs from a single file
 # TODO: Add these as variables in CDF: SPIN_PERIOD, ST_BIAS_GAIN_MODE,
 #       SW_BIAS_GAIN_MODE, RGFO_HALF_SPIN, NSO_HALF_SPIN, DATA_QUALITY
-# TODO: Use new packet_file_to_dataset() function to simplify things
 # TODO: Determine what should go in event data CDF and how it should be
 #       structured.
 # TODO: Make sure CDF attributes match expected nomenclature
@@ -344,19 +340,17 @@ def unpack_lo_science_data(self, science_values: str) -> None:
 
 
 def create_event_dataset(
-    met: list[int], event_data: str, dataset_name: str, data_version: str
+    apid: int, packet: xr.Dataset, data_version: str
 ) -> xr.Dataset:
     """
     Create dataset for event data.
 
     Parameters
     ----------
-    met : list[int]
-        The Mission Elapsed Time of the data.
-    event_data : str
-        A string of binary numbers representing the event data.
-    dataset_name : str
-        The name for the dataset.
+    apid : int
+        The APID of the packet.
+    packet : xarray.Dataset
+        The packet to process.
     data_version : str
         Version of the data product being created.
 
@@ -365,6 +359,17 @@ def create_event_dataset(
     dataset : xarray.Dataset
         Xarray dataset containing the event data.
     """
+    if apid == CODICEAPID.COD_LO_PHA:
+        dataset_name = "imap_codice_l1a_lo_pha"
+    elif apid == CODICEAPID.COD_HI_PHA:
+        dataset_name = "imap_codice_l1a_hi_pha"
+
+    # Determine the start time of the packet
+    met = packet.acq_start_seconds.data[0]
+
+    # Extract the data
+    # event_data = packet.event_data.data (Currently turned off, see TODO)
+
     cdf_attrs = ImapCdfAttributes()
     cdf_attrs.add_instrument_global_attrs("codice")
     cdf_attrs.add_instrument_variable_attrs("codice", "l1a")
@@ -372,7 +377,7 @@ def create_event_dataset(
 
     # Define coordinates
     epoch = xr.DataArray(
-        met_to_j2000ns(met),  # TODO: Fix after SIT-3 (see note below)
+        met_to_j2000ns([met]),
         name="epoch",
         dims=["epoch"],
         attrs=cdf_attrs.get_variable_attributes("epoch"),
@@ -390,7 +395,7 @@ def create_event_dataset(
 
 
 def create_hskp_dataset(
-    packets: list[space_packet_parser.parser.Packet],
+    packet: xr.Dataset,
     data_version: str,
 ) -> xr.Dataset:
     """
@@ -398,8 +403,8 @@ def create_hskp_dataset(
 
     Parameters
     ----------
-    packets : list[space_packet_parser.parser.Packet]
-        The list of packets to process.
+    packet : xarray.Dataset
+        The packet to process.
     data_version : str
         Version of the data product being created.
 
@@ -413,14 +418,9 @@ def create_hskp_dataset(
     cdf_attrs.add_instrument_variable_attrs("codice", "l1a")
     cdf_attrs.add_global_attribute("Data_version", data_version)
 
-    metadata_arrays: dict = collections.defaultdict(list)
-
-    for packet in packets:
-        add_metadata_to_array(packet, metadata_arrays)
-
     epoch = xr.DataArray(
         met_to_j2000ns(
-            metadata_arrays["SHCOARSE"],
+            packet.shcoarse.data,
             reference_epoch=np.datetime64("2010-01-01T00:01:06.184", "ns"),
         ),
         name="epoch",
@@ -443,19 +443,21 @@ def create_hskp_dataset(
     #       I am holding off making this change until I acquire updated
     #       housekeeping packets/validation data that match the latest telemetry
     #       definitions
-    for key, value in metadata_arrays.items():
+    for variable in packet:
         attrs = cdf_attrs.get_variable_attributes("codice_support_attrs")
         attrs["CATDESC"] = "TBD"
         attrs["DEPEND_0"] = "epoch"
         attrs["FIELDNAM"] = "TBD"
-        attrs["LABLAXIS"] = key
+        attrs["LABLAXIS"] = variable
 
-        dataset[key] = xr.DataArray(value, dims=["epoch"], attrs=attrs)
+        dataset[variable] = xr.DataArray(
+            packet[variable].data, dims=["epoch"], attrs=attrs
+        )
 
     return dataset
 
 
-def get_params(packet: space_packet_parser.parser.Packet) -> tuple[int, int, int, int]:
+def get_params(packet: xr.Dataset) -> tuple[int, int, int, int]:
     """
     Return the four 'main' parameters used for l1a processing.
 
@@ -465,7 +467,7 @@ def get_params(packet: space_packet_parser.parser.Packet) -> tuple[int, int, int
 
     Parameters
     ----------
-    packet : space_packet_parser.parser.Packet
+    packet : xarray.Dataset
         A packet for the APID of interest.
 
     Returns
@@ -484,10 +486,10 @@ def get_params(packet: space_packet_parser.parser.Packet) -> tuple[int, int, int
     view_id : int
         Provides information about how data was collapsed and/or compressed.
     """
-    table_id = packet.data["TABLE_ID"].raw_value
-    plan_id = packet.data["PLAN_ID"].raw_value
-    plan_step = packet.data["PLAN_STEP"].raw_value
-    view_id = packet.data["VIEW_ID"].raw_value
+    table_id = packet.table_id.data[0]
+    plan_id = packet.plan_id.data[0]
+    plan_step = packet.plan_step.data[0]
+    view_id = packet.view_id.data[0]
 
     return table_id, plan_id, plan_step, view_id
 
@@ -509,48 +511,30 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
         The ``xarray`` dataset containing the science data and supporting metadata.
     """
     # Decom the packets, group data by APID, and sort by time
-    packets = decom_packets(file_path)
-    grouped_data = group_by_apid(packets)
+    xtce_packet_definition = Path(
+        f"{imap_module_directory}/codice/packet_definitions/{constants.PACKET_TO_XTCE_MAPPING[file_path.name]}"
+    )
+    packets = packet_file_to_datasets(file_path, xtce_packet_definition)
 
-    for apid in grouped_data:
+    for apid in packets:
+        packet = packets[apid]
         logger.info(f"\nProcessing {CODICEAPID(apid).name} packet")
 
         if apid == CODICEAPID.COD_NHK:
-            packets = grouped_data[apid]
-            sorted_packets = sort_by_time(packets, "SHCOARSE")
-            dataset = create_hskp_dataset(sorted_packets, data_version)
+            dataset = create_hskp_dataset(packet, data_version)
 
         elif apid in [CODICEAPID.COD_LO_PHA, CODICEAPID.COD_HI_PHA]:
-            if apid == CODICEAPID.COD_LO_PHA:
-                dataset_name = "imap_codice_l1a_lo_pha"
-            elif apid == CODICEAPID.COD_HI_PHA:
-                dataset_name = "imap_codice_l1a_hi_pha"
-
-            # Sort the packets by time
-            packets = sort_by_time(grouped_data[apid], "SHCOARSE")
-
-            # Determine the start time of the packet
-            met = packets[0].data["ACQ_START_SECONDS"].raw_value
-            met = [met, met + 1]  # TODO: Remove after cdflib fix
-
-            # Extract the data
-            event_data = packets[0].data["EVENT_DATA"].raw_value
-
-            # Create the dataset
-            dataset = create_event_dataset(met, event_data, dataset_name, data_version)
+            dataset = create_event_dataset(apid, packet, data_version)
 
         elif apid in constants.APIDS_FOR_SCIENCE_PROCESSING:
-            # Sort the packets by time
-            packets = sort_by_time(grouped_data[apid], "SHCOARSE")
-
             # Determine the start time of the packet
-            met = packets[0].data["ACQ_START_SECONDS"].raw_value
+            met = packet.acq_start_seconds.data[0]
 
             # Extract the data
-            science_values = packets[0].data["DATA"].raw_value
+            science_values = packet.data.data[0]
 
             # Get the four "main" parameters for processing
-            table_id, plan_id, plan_step, view_id = get_params(packets[0])
+            table_id, plan_id, plan_step, view_id = get_params(packet)
 
             # Run the pipeline to create a dataset for the product
             pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id)
@@ -564,3 +548,29 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
     logger.info(f"\nFinal data product:\n{dataset}\n")
 
     return dataset
+
+
+if __name__ == "__main__":
+    TEST_DATA_PATH = imap_module_directory / "tests" / "codice" / "data"
+
+    TEST_PACKETS = [
+        TEST_DATA_PATH / "imap_codice_l0_hskp_20100101_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_hi-counters-aggregated_20240429_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_hi-counters-singles_20240429_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_hi-omni_20240429_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_hi-sectored_20240429_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_hi-pha_20240429_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_lo-counters-aggregated_20240429_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_lo-counters-singles_20240429_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_lo-sw-angular_20240429_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_lo-nsw-angular_20240429_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_lo-sw-priority_20240429_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_lo-nsw-priority_20240429_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_lo-sw-species_20240429_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_lo-nsw-species_20240429_v001.pkts",
+        TEST_DATA_PATH / "imap_codice_l0_lo-pha_20240429_v001.pkts",
+    ]
+
+    for file_path in TEST_PACKETS:
+        dataset = process_codice_l1a(file_path, "001")
+        print(dataset)
diff --git a/imap_processing/codice/constants.py b/imap_processing/codice/constants.py
index 9054431f4..f6aaa41ce 100644
--- a/imap_processing/codice/constants.py
+++ b/imap_processing/codice/constants.py
@@ -14,9 +14,28 @@
 
 # TODO: What to do in the case of a value of 255 in LOSSY_A and LOSSY_B
 #       compression?
+# TODO: Improve PACKET_TO_XTCE_MAPPING to not have hard-coded dates/versions
 
 from imap_processing.codice.utils import CODICEAPID, CoDICECompression
 
+PACKET_TO_XTCE_MAPPING = {
+    "imap_codice_l0_hi-counters-aggregated_20240429_v001.pkts": "P_COD_HI_INST_COUNTS_AGGREGATED.xml",  # noqa
+    "imap_codice_l0_hi-counters-singles_20240429_v001.pkts": "P_COD_HI_INST_COUNTS_SINGLES.xml",  # noqa
+    "imap_codice_l0_hi-omni_20240429_v001.pkts": "P_COD_HI_OMNI_SPECIES_COUNTS.xml",
+    "imap_codice_l0_hi-sectored_20240429_v001.pkts": "P_COD_HI_SECT_SPECIES_COUNTS.xml",
+    "imap_codice_l0_hi-pha_20240429_v001.pkts": "P_COD_HI_PHA.xml",
+    "imap_codice_l0_hskp_20100101_v001.pkts": "P_COD_NHK.xml",
+    "imap_codice_l0_lo-counters-aggregated_20240429_v001.pkts": "P_COD_LO_INST_COUNTS_AGGREGATED.xml",  # noqa
+    "imap_codice_l0_lo-counters-singles_20240429_v001.pkts": "P_COD_LO_INST_COUNTS_SINGLES.xml",  # noqa
+    "imap_codice_l0_lo-sw-angular_20240429_v001.pkts": "P_COD_LO_SW_ANGULAR_COUNTS.xml",
+    "imap_codice_l0_lo-nsw-angular_20240429_v001.pkts": "P_COD_LO_NSW_ANGULAR_COUNTS.xml",  # noqa
+    "imap_codice_l0_lo-sw-priority_20240429_v001.pkts": "P_COD_LO_SW_PRIORITY_COUNTS.xml",  # noqa
+    "imap_codice_l0_lo-nsw-priority_20240429_v001.pkts": "P_COD_LO_NSW_PRIORITY_COUNTS.xml",  # noqa
+    "imap_codice_l0_lo-sw-species_20240429_v001.pkts": "P_COD_LO_SW_SPECIES_COUNTS.xml",
+    "imap_codice_l0_lo-nsw-species_20240429_v001.pkts": "P_COD_LO_NSW_SPECIES_COUNTS.xml",  # noqa
+    "imap_codice_l0_lo-pha_20240429_v001.pkts": "P_COD_LO_PHA.xml",
+}
+
 APIDS_FOR_SCIENCE_PROCESSING = [
     CODICEAPID.COD_HI_INST_COUNTS_AGGREGATED,
     CODICEAPID.COD_HI_INST_COUNTS_SINGLES,
diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py
index 1bef0363a..1883e6fa1 100644
--- a/imap_processing/tests/codice/test_codice_l1a.py
+++ b/imap_processing/tests/codice/test_codice_l1a.py
@@ -35,7 +35,7 @@
     (1, 128),  # lo-pha
 ]
 EXPECTED_ARRAY_SIZES = [
-    123,  # hskp
+    129,  # hskp
     1,  # hi-counters-aggregated
     3,  # hi-counters-singles
     8,  # hi-omni

From 4e9252f2edd01cd892586eab658a8d7656f6fe67 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Wed, 28 Aug 2024 11:50:14 -0600
Subject: [PATCH 10/22] Updated expected array shapes

---
 .../tests/codice/test_codice_l1a.py           | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py
index 1bef0363a..d9fd7207c 100644
--- a/imap_processing/tests/codice/test_codice_l1a.py
+++ b/imap_processing/tests/codice/test_codice_l1a.py
@@ -19,19 +19,19 @@
 
 EXPECTED_ARRAY_SHAPES = [
     (99,),  # hskp
-    (1, 6, 1),  # hi-counters-aggregated
-    (1, 16, 1),  # hi-counters-singles
-    (1, 60, 1),  # hi-omni
-    (1, 1152, 1),  # hi-sectored
+    (1, 1, 6, 1),  # hi-counters-aggregated  # TODO: Double check with Joey
+    (1, 1, 16, 1),  # hi-counters-singles  # TODO: Double check with Joey
+    (1, 15, 4, 1),  # hi-omni  # TODO: Double check with Joey
+    (1, 8, 12, 12),  # hi-sectored
     (1, 1),  # hi-pha
-    (1, 36, 128),  # lo-counters-aggregated
-    (1, 144, 128),  # lo-counters-aggregated
-    (1, 60, 128),  # lo-sw-angular
-    (1, 228, 128),  # lo-nsw-angular
-    (1, 12, 128),  # lo-sw-priority
-    (1, 12, 128),  # lo-nsw-priority
-    (1, 1, 128),  # lo-sw-species
-    (1, 1, 128),  # lo-nsw-species
+    (1, 6, 6, 128),  # lo-counters-aggregated
+    (1, 24, 6, 128),  # lo-counters-singles
+    (1, 5, 12, 128),  # lo-sw-angular
+    (1, 19, 12, 128),  # lo-nsw-angular
+    (1, 1, 12, 128),  # lo-sw-priority
+    (1, 1, 12, 128),  # lo-nsw-priority
+    (1, 1, 1, 128),  # lo-sw-species
+    (1, 1, 1, 128),  # lo-nsw-species
     (1, 128),  # lo-pha
 ]
 EXPECTED_ARRAY_SIZES = [

From c7a8d09bc97b7571c9f422d2e629521f969e553e Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Wed, 28 Aug 2024 11:51:03 -0600
Subject: [PATCH 11/22] Avoiding bitarray dependency

---
 imap_processing/codice/decompress.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/imap_processing/codice/decompress.py b/imap_processing/codice/decompress.py
index e4abf83ea..bb92a75c4 100644
--- a/imap_processing/codice/decompress.py
+++ b/imap_processing/codice/decompress.py
@@ -29,8 +29,6 @@
 import lzma
 from enum import IntEnum
 
-import bitarray
-
 from imap_processing.codice.constants import LOSSY_A_TABLE, LOSSY_B_TABLE
 from imap_processing.codice.utils import CoDICECompression
 
@@ -118,7 +116,9 @@ def decompress(compressed_binary: str, algorithm: IntEnum) -> list[int]:
         The 24- or 32-bit decompressed values.
     """
     # Convert the binary string to a byte stream
-    compressed_bytes = bitarray.bitarray(compressed_binary).tobytes()
+    compressed_bytes = int(compressed_binary, 2).to_bytes(
+        (len(compressed_binary) + 7) // 8, byteorder="big"
+    )
 
     # Apply the appropriate decompression algorithm
     if algorithm == CoDICECompression.NO_COMPRESSION:

From 9bfb8169b22bd911b7b3677375719224275dad67 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Wed, 28 Aug 2024 11:52:28 -0600
Subject: [PATCH 12/22] Added instrument config key to make some conditional
 areas of the processing pipeline a bit more readable; added proper numbers
 for positions/energies/spin_sectors

---
 imap_processing/codice/constants.py | 50 +++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/imap_processing/codice/constants.py b/imap_processing/codice/constants.py
index 9054431f4..f063087f2 100644
--- a/imap_processing/codice/constants.py
+++ b/imap_processing/codice/constants.py
@@ -79,87 +79,111 @@
 DATA_PRODUCT_CONFIGURATIONS = {
     CODICEAPID.COD_HI_INST_COUNTS_AGGREGATED: {
         "num_counters": 1,
-        "num_energy_steps": 1,
-        "num_spin_sectors": 6,
+        "num_energy_steps": 1,  # TODO: Double check with Joey
+        "num_positions": 6,  # TODO: Double check with Joey
+        "num_spin_sectors": 1,
         "variable_names": HI_INST_COUNTS_AGGREGATED_NAMES,
         "dataset_name": "imap_codice_l1a_hi_counters_aggregated",
+        "instrument": "hi",
     },
     CODICEAPID.COD_HI_INST_COUNTS_SINGLES: {
         "num_counters": 3,
-        "num_energy_steps": 1,
-        "num_spin_sectors": 16,
+        "num_energy_steps": 1,  # TODO: Double check with Joey
+        "num_positions": 16,  # TODO: Double check with Joey
+        "num_spin_sectors": 1,
         "variable_names": HI_INST_COUNTS_SINGLES_NAMES,
         "dataset_name": "imap_codice_l1a_hi_counters_singles",
+        "instrument": "hi",
     },
     CODICEAPID.COD_HI_OMNI_SPECIES_COUNTS: {
         "num_counters": 8,
-        "num_energy_steps": 1,
-        "num_spin_sectors": 60,  # TODO: Double-check this
+        "num_energy_steps": 15,  # TODO: Double check with Joey
+        "num_positions": 4,  # TODO: Double check with Joey
+        "num_spin_sectors": 1,
         "variable_names": HI_OMNI_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_hi_omni",
+        "instrument": "hi",
     },
     CODICEAPID.COD_HI_SECT_SPECIES_COUNTS: {
         "num_counters": 4,
-        "num_energy_steps": 1,
-        "num_spin_sectors": 1152,  # TODO: Double-check this
+        "num_energy_steps": 8,
+        "num_positions": 12,
+        "num_spin_sectors": 12,
         "variable_names": HI_SECT_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_hi_sectored",
+        "instrument": "hi",
     },
     CODICEAPID.COD_LO_INST_COUNTS_AGGREGATED: {
         "num_counters": 1,
         "num_energy_steps": 128,
-        "num_spin_sectors": 36,
+        "num_positions": 6,
+        "num_spin_sectors": 6,
         "variable_names": LO_INST_COUNTS_AGGREGATED_NAMES,
         "dataset_name": "imap_codice_l1a_lo_counters_aggregated",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_INST_COUNTS_SINGLES: {
         "num_counters": 1,
         "num_energy_steps": 128,
-        "num_spin_sectors": 144,
+        "num_positions": 24,
+        "num_spin_sectors": 6,
         "variable_names": LO_INST_COUNTS_SINGLES_NAMES,
         "dataset_name": "imap_codice_l1a_lo_counters_singles",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_SW_ANGULAR_COUNTS: {
         "num_counters": 4,
         "num_energy_steps": 128,
-        "num_spin_sectors": 60,
+        "num_positions": 5,
+        "num_spin_sectors": 12,
         "variable_names": LO_SW_ANGULAR_NAMES,
         "dataset_name": "imap_codice_l1a_lo_sw_angular",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_NSW_ANGULAR_COUNTS: {
         "num_counters": 1,
         "num_energy_steps": 128,
-        "num_spin_sectors": 228,
+        "num_positions": 19,
+        "num_spin_sectors": 12,
         "variable_names": LO_NSW_ANGULAR_NAMES,
         "dataset_name": "imap_codice_l1a_lo_nsw_angular",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_SW_PRIORITY_COUNTS: {
         "num_counters": 5,
         "num_energy_steps": 128,
+        "num_positions": 1,
         "num_spin_sectors": 12,
         "variable_names": LO_SW_PRIORITY_NAMES,
         "dataset_name": "imap_codice_l1a_lo_sw_priority",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_NSW_PRIORITY_COUNTS: {
         "num_counters": 2,
         "num_energy_steps": 128,
+        "num_positions": 1,
         "num_spin_sectors": 12,
         "variable_names": LO_NSW_PRIORITY_NAMES,
         "dataset_name": "imap_codice_l1a_lo_nsw_priority",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_SW_SPECIES_COUNTS: {
         "num_counters": 16,
         "num_energy_steps": 128,
+        "num_positions": 1,
         "num_spin_sectors": 1,
         "variable_names": LO_SW_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_lo_sw_species",
+        "instrument": "lo",
     },
     CODICEAPID.COD_LO_NSW_SPECIES_COUNTS: {
         "num_counters": 8,
         "num_energy_steps": 128,
+        "num_positions": 1,
         "num_spin_sectors": 1,
         "variable_names": LO_NSW_SPECIES_NAMES,
         "dataset_name": "imap_codice_l1a_lo_nsw_species",
+        "instrument": "lo",
     },
 }
 
@@ -793,5 +817,5 @@
     252: 6815744,
     253: 7340032,
     254: 7864320,
-    255: 999999,
+    255: 9999999,
 }

From c4259100d87ec2741778bf7a1e9bfc602abfd1a5 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Wed, 28 Aug 2024 11:53:02 -0600
Subject: [PATCH 13/22] Added attrs for inst_az coordinate

---
 .../config/imap_codice_l1a_variable_attrs.yaml    | 15 +++++++++++++--
 .../config/imap_codice_l1b_variable_attrs.yaml    | 11 +++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
index aee0d7acd..9d7a535d9 100644
--- a/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
+++ b/imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
@@ -36,15 +36,26 @@ energy_attrs:
     VALIDMAX: 127
     VAR_TYPE: support_data
 
+inst_az_attrs:
+    <<: *default
+    CATDESC: Azimuth
+    FIELDNAM: Azimuth
+    FORMAT: I2
+    LABLAXIS: Azimuth
+    UNITS: ' '
+    VALIDMIN: 0
+    VALIDMAX: 31
+    VAR_TYPE: support_data
+
 spin_sector_attrs:
     <<: *default
     CATDESC: Spin sector indicating range of spin angles
     FIELDNAM: Spin sector
-    FORMAT: I4
+    FORMAT: I2
     LABLAXIS: spin sector
     UNITS: ' '
     VALIDMIN: 0
-    VALIDMAX: 1152
+    VALIDMAX: 11
     VAR_TYPE: support_data
 
 # <=== Labels ===>
diff --git a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
index c9de1c451..cbb14205f 100644
--- a/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
+++ b/imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
@@ -32,6 +32,17 @@ energy_attrs:
     VALIDMAX: 127
     VAR_TYPE: support_data
 
+inst_az_attrs:
+    <<: *default
+    CATDESC: Azimuth
+    FIELDNAM: Azimuth
+    FORMAT: I2
+    LABLAXIS: Azimuth
+    UNITS: ' '
+    VALIDMIN: 0
+    VALIDMAX: 31
+    VAR_TYPE: support_data
+
 spin_sector_attrs:
     <<: *default
     CATDESC: Spin sector indicating range of spin angles

From a21727c9274994ae5d4959602fa92220594384cd Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Wed, 28 Aug 2024 11:54:49 -0600
Subject: [PATCH 14/22] Added further unpacking of science data to properly
 restructure data arrays by positions, spin_sectors, and energies

---
 imap_processing/codice/codice_l1a.py | 116 +++++++++++++--------------
 1 file changed, 55 insertions(+), 61 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index 1e857c0e3..a37671c7b 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -101,8 +101,10 @@ def configure_data_products(self, apid: int) -> None:
         self.num_counters = config["num_counters"]
         self.num_energy_steps = config["num_energy_steps"]
         self.num_spin_sectors = config["num_spin_sectors"]
+        self.num_positions = config["num_positions"]
         self.variable_names = config["variable_names"]
         self.dataset_name = config["dataset_name"]
+        self.instrument = config["instrument"]
 
     def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset:
         """
@@ -135,6 +137,12 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
             dims=["epoch"],
             attrs=cdf_attrs.get_variable_attributes("epoch"),
         )
+        inst_az = xr.DataArray(
+            np.arange(self.num_positions),
+            name="inst_az",
+            dims=["inst_az"],
+            attrs=cdf_attrs.get_variable_attributes("inst_az_attrs"),
+        )
         spin_sector = xr.DataArray(
             np.arange(self.num_spin_sectors),
             name="spin_sector",
@@ -160,6 +168,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
         dataset = xr.Dataset(
             coords={
                 "epoch": epoch,
+                "inst_az": inst_az,
                 "spin_sector": spin_sector,
                 "energy": energy_steps,
                 "energy_label": energy_label,
@@ -169,21 +178,34 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
 
         # Create a data variable for each counter
         for variable_data, variable_name in zip(self.data, self.variable_names):
-            variable_data_arr = np.array(variable_data).reshape(
-                1, self.num_spin_sectors, self.num_energy_steps
-            )
+            # Data arrays are structured depending on the instrument
+            if self.instrument == "lo":
+                variable_data_arr = np.array(variable_data).reshape(
+                    1, self.num_positions, self.num_spin_sectors, self.num_energy_steps
+                )
+                dims = ["epoch", "inst_az", "spin_sector", "energy"]
+            elif self.instrument == "hi":
+                variable_data_arr = np.array(variable_data).reshape(
+                    1, self.num_energy_steps, self.num_positions, self.num_spin_sectors
+                )
+                dims = ["epoch", "energy", "inst_az", "spin_sector"]
+
+            # Get the CDF attributes
             cdf_attrs_key = (
                 f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}"
             )
+            attrs = cdf_attrs.get_variable_attributes(cdf_attrs_key)
+
+            # Create the CDF data variable
             dataset[variable_name] = xr.DataArray(
                 variable_data_arr,
                 name=variable_name,
-                dims=["epoch", "spin_sector", "energy"],
-                attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key),
+                dims=dims,
+                attrs=attrs,
             )
 
         # Add ESA Sweep Values and acquisition times (lo only)
-        if "_lo_" in self.dataset_name:
+        if self.instrument == "lo":
             self.get_esa_sweep_values()
             self.get_acquisition_times()
             dataset["esa_sweep_values"] = xr.DataArray(
@@ -274,43 +296,15 @@ def get_esa_sweep_values(self) -> None:
         sweep_table = sweep_data[sweep_data["table_idx"] == sweep_table_id]
         self.esa_sweep_values = sweep_table["esa_v"].values
 
-    def unpack_hi_science_data(self, science_values: str) -> None:
+    def unpack_science_data(self, science_values: str) -> None:
         """
-        Decompress, unpack, and restructure CoDICE-Hi data arrays.
+        Decompress, unpack, and restructure science data arrays.
 
         The science data within the packet is a compressed, binary string of
-        values.
-
-        Parameters
-        ----------
-        science_values : str
-            A string of binary data representing the science values of the data.
-        """
-        self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id]
-
-        # Decompress the binary string
-        science_values_decompressed = decompress(
-            science_values, self.compression_algorithm
-        )
-
-        # Divide up the data by the number of priorities or species
-        chunk_size = len(science_values_decompressed) // self.num_counters
-        science_values_unpacked = [
-            science_values_decompressed[i : i + chunk_size]
-            for i in range(0, len(science_values_decompressed), chunk_size)
-        ]
-
-        # TODO: Determine how to properly divide up hi data. For now, just use
-        #       arrays for each counter
-        self.data = science_values_unpacked
-
-    def unpack_lo_science_data(self, science_values: str) -> None:
-        """
-        Decompress, unpack, and restructure CoDICE-Lo data arrays.
-
-        The science data within the packet is a compressed, binary string of
-        values. These data need to be divided up by species or priorities,
-        and re-arranged into 2D arrays representing energy and spin angle.
+        values. These data need to be divided up by species or priorities (or
+        what I am calling "counters" as a general term), and re-arranged into
+        3D arrays representing spin sectors, positions, and energies (the order
+        of which depends on the instrument).
 
         Parameters
         ----------
@@ -319,28 +313,31 @@ def unpack_lo_science_data(self, science_values: str) -> None:
         """
         self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id]
 
-        # Decompress the binary string
+        # Decompress the binary string into a list of integers
         science_values_decompressed = decompress(
             science_values, self.compression_algorithm
         )
 
-        # Divide up the data by the number of priorities or species
-        chunk_size = len(science_values_decompressed) // self.num_counters
-        science_values_unpacked = [
-            science_values_decompressed[i : i + chunk_size]
-            for i in range(0, len(science_values_decompressed), chunk_size)
-        ]
+        # Re-arrange the counter data
+        # For CoDICE-lo, data are a 3D arrays with a shape representing
+        # [<num_positions>,<num_spin_sectors>,<num_energy_steps>]
+        if self.instrument == "lo":
+            self.data = np.array(science_values_decompressed, dtype=np.uint).reshape(
+                self.num_counters,
+                self.num_positions,
+                self.num_spin_sectors,
+                self.num_energy_steps,
+            )
 
-        # Further divide up the data by energy levels
-        # The result is a [12,128] array representing 12 spin angles and 128
-        # energy levels
-        self.data = []
-        for counter_data in science_values_unpacked:
-            data_array = [
-                counter_data[i : i + self.num_energy_steps]
-                for i in range(0, len(counter_data), self.num_energy_steps)
-            ]
-            self.data.append(data_array)  # type: ignore[arg-type]
+        # For CoDICE-hi, data are a 3D array with a shape representing
+        # [<num_energy_steps>,<num_positions>,<num_spin_sectors>]
+        elif self.instrument == "hi":
+            self.data = np.array(science_values_decompressed, dtype=np.uint).reshape(
+                self.num_counters,
+                self.num_energy_steps,
+                self.num_positions,
+                self.num_spin_sectors,
+            )
 
 
 def create_event_dataset(
@@ -555,10 +552,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
             # Run the pipeline to create a dataset for the product
             pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id)
             pipeline.configure_data_products(apid)
-            if "_lo_" in pipeline.dataset_name:
-                pipeline.unpack_lo_science_data(science_values)
-            elif "_hi_" in pipeline.dataset_name:
-                pipeline.unpack_hi_science_data(science_values)
+            pipeline.unpack_science_data(science_values)
             dataset = pipeline.create_science_dataset(met, data_version)
 
     logger.info(f"\nFinal data product:\n{dataset}\n")

From 38704daf2690c2db679034c456504d5dae30150c Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Wed, 28 Aug 2024 11:58:21 -0600
Subject: [PATCH 15/22] fixed doc build error

---
 imap_processing/codice/codice_l1a.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index a37671c7b..08c55ff1e 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -75,10 +75,8 @@ class CoDICEL1aPipeline:
         Retrieve the acquisition times via the Lo stepping table.
     get_esa_sweep_values()
         Retrieve the ESA sweep values.
-    unpack_hi_science_data()
-        Decompress, unpack, and restructure CoDICE-Hi data arrays.
-    unpack_lo_science_data()
-        Decompress, unpack, and restructure CoDICE-Lo data arrays.
+    unpack_science_data()
+        Decompress, unpack, and restructure science data arrays.
     """
 
     def __init__(self, table_id: int, plan_id: int, plan_step: int, view_id: int):

From 6e46d1869e6649033e7200ea86e4182543f47b81 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Fri, 30 Aug 2024 10:44:38 -0600
Subject: [PATCH 16/22] Fixed reshape methods to avoid unexpected argument
 warning

---
 imap_processing/codice/codice_l1a.py | 34 ++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index b3411ee49..84758f631 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -175,12 +175,22 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
             # Data arrays are structured depending on the instrument
             if self.instrument == "lo":
                 variable_data_arr = np.array(variable_data).reshape(
-                    1, self.num_positions, self.num_spin_sectors, self.num_energy_steps
+                    (
+                        1,
+                        self.num_positions,
+                        self.num_spin_sectors,
+                        self.num_energy_steps,
+                    )
                 )
                 dims = ["epoch", "inst_az", "spin_sector", "energy"]
             elif self.instrument == "hi":
                 variable_data_arr = np.array(variable_data).reshape(
-                    1, self.num_energy_steps, self.num_positions, self.num_spin_sectors
+                    (
+                        1,
+                        self.num_energy_steps,
+                        self.num_positions,
+                        self.num_spin_sectors,
+                    )
                 )
                 dims = ["epoch", "energy", "inst_az", "spin_sector"]
 
@@ -317,20 +327,24 @@ def unpack_science_data(self, science_values: str) -> None:
         # [<num_positions>,<num_spin_sectors>,<num_energy_steps>]
         if self.instrument == "lo":
             self.data = np.array(science_values_decompressed, dtype=np.uint32).reshape(
-                self.num_counters,
-                self.num_positions,
-                self.num_spin_sectors,
-                self.num_energy_steps,
+                (
+                    self.num_counters,
+                    self.num_positions,
+                    self.num_spin_sectors,
+                    self.num_energy_steps,
+                )
             )
 
         # For CoDICE-hi, data are a 3D array with a shape representing
         # [<num_energy_steps>,<num_positions>,<num_spin_sectors>]
         elif self.instrument == "hi":
             self.data = np.array(science_values_decompressed, dtype=np.uint32).reshape(
-                self.num_counters,
-                self.num_energy_steps,
-                self.num_positions,
-                self.num_spin_sectors,
+                (
+                    self.num_counters,
+                    self.num_energy_steps,
+                    self.num_positions,
+                    self.num_spin_sectors,
+                )
             )
 
 

From 907439a33edacf3fc7feb88f679b43b19b0aab42 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Fri, 30 Aug 2024 12:13:37 -0600
Subject: [PATCH 17/22] compression_algorithm variable need not be a class
 attribute

---
 imap_processing/codice/codice_l1a.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index 84758f631..d2760f851 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -315,12 +315,10 @@ def unpack_science_data(self, science_values: str) -> None:
         science_values : str
             A string of binary data representing the science values of the data.
         """
-        self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id]
+        compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id]
 
         # Decompress the binary string into a list of integers
-        science_values_decompressed = decompress(
-            science_values, self.compression_algorithm
-        )
+        science_values_decompressed = decompress(science_values, compression_algorithm)
 
         # Re-arrange the counter data
         # For CoDICE-lo, data are a 3D arrays with a shape representing

From 10ba2fa351e075edb0475c2dd8512fec092e431b Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Fri, 30 Aug 2024 14:58:37 -0600
Subject: [PATCH 18/22] Updated L0 code to use new packet_file_to_datasets
 function

---
 imap_processing/codice/codice_l0.py           |  18 +--
 .../tests/codice/test_codice_l0.py            | 103 ++++++++----------
 2 files changed, 54 insertions(+), 67 deletions(-)

diff --git a/imap_processing/codice/codice_l0.py b/imap_processing/codice/codice_l0.py
index a5cc62c6f..8b8c29968 100644
--- a/imap_processing/codice/codice_l0.py
+++ b/imap_processing/codice/codice_l0.py
@@ -16,11 +16,14 @@
 
 from pathlib import Path
 
-from imap_processing import decom, imap_module_directory
+import xarray as xr
+
+from imap_processing import imap_module_directory
 from imap_processing.codice import constants
+from imap_processing.utils import packet_file_to_datasets
 
 
-def decom_packets(packet_file: Path) -> list:
+def decom_packets(packet_file: Path) -> dict[int, xr.Dataset]:
     """
     Decom CoDICE data packets using CoDICE packet definition.
 
@@ -31,11 +34,12 @@ def decom_packets(packet_file: Path) -> list:
 
     Returns
     -------
-    list : list
-        All the unpacked data.
+    packets : dict[int, xr.Dataset]
+        Mapping from apid to ``xarray`` dataset, one dataset per apid.
     """
-    xtce_document = Path(
+    xtce_packet_definition = Path(
         f"{imap_module_directory}/codice/packet_definitions/{constants.PACKET_TO_XTCE_MAPPING[packet_file.name]}"
     )
-    decom_packet_list: list = decom.decom_packets(packet_file, xtce_document)
-    return decom_packet_list
+    packets = packet_file_to_datasets(packet_file, xtce_packet_definition)
+
+    return packets
diff --git a/imap_processing/tests/codice/test_codice_l0.py b/imap_processing/tests/codice/test_codice_l0.py
index 6920fdd74..9f99d4f53 100644
--- a/imap_processing/tests/codice/test_codice_l0.py
+++ b/imap_processing/tests/codice/test_codice_l0.py
@@ -5,22 +5,34 @@
 
 import pandas as pd
 import pytest
-import space_packet_parser
+import xarray as xr
 
 from imap_processing import imap_module_directory
 from imap_processing.codice import codice_l0
 from imap_processing.codice.codice_l1a import create_hskp_dataset
 from imap_processing.utils import convert_raw_to_eu
 
+# Define the CCSDS header fields (which will be ignored in these tests))
+CCSDS_HEADER_FIELDS = [
+    "shcoarse",
+    "version",
+    "type",
+    "sec_hdr_flg",
+    "pkt_apid",
+    "seq_flgs",
+    "src_seq_ctr",
+    "pkt_len",
+]
+
 
 @pytest.fixture(scope="session")
-def decom_test_data() -> list:
-    """Read test data from file
+def decom_test_data() -> xr.Dataset:
+    """Read test data from file and return a decommutated housekeeping packet.
 
     Returns
     -------
-    data_packet_list : list[space_packet_parser.parser.Packet]
-        The list of decommutated packets
+    packet : xr.Dataset
+        A decommutated housekeeping packet
     """
 
     packet_file = Path(
@@ -28,14 +40,9 @@ def decom_test_data() -> list:
         f"imap_codice_l0_hskp_20100101_v001.pkts"
     )
 
-    data_packet_list = codice_l0.decom_packets(packet_file)
-    data_packet_list = [
-        packet
-        for packet in data_packet_list
-        if packet.header["PKT_APID"].raw_value == 1136
-    ]
+    packet = codice_l0.decom_packets(packet_file)[1136]
 
-    return data_packet_list
+    return packet
 
 
 @pytest.fixture(scope="session")
@@ -63,15 +70,15 @@ def validation_data() -> pd.core.frame.DataFrame:
 
 
 def test_eu_hskp_data(
-    decom_test_data: list[space_packet_parser.parser.Packet],
+    decom_test_data: xr.Dataset,
     validation_data: pd.core.frame.DataFrame,
 ):
     """Compare the engineering unit (EU) housekeeping data to the validation data.
 
     Parameters
     ----------
-    decom_test_data : list[space_packet_parser.parser.Packet]
-        The decommutated housekeeping packet data
+    decom_test_data : xr.Dataset
+        The decommutated housekeeping packet
     validation_data : pandas.core.frame.DataFrame
         The validation data to compare against
     """
@@ -82,80 +89,56 @@ def test_eu_hskp_data(
         imap_module_directory / "tests/codice/data/eu_unit_lookup_table.csv",
         "P_COD_NHK",
     )
-    first_data = decom_test_data[0]
-    validation_row = validation_data.loc[first_data.data["SHCOARSE"].raw_value]
 
-    # Determine the number of CCSDS header fields (7 is standard)
-    num_ccsds_header_fields = 7
+    validation_row = validation_data.loc[decom_test_data.shcoarse]
 
     # Compare EU values of housekeeping data, skipping CCSDS header fields
-    for idx, field in enumerate(eu_hk_data):
-        # Skip the first num_ccsds_header_fields fields
-        if idx < num_ccsds_header_fields:
-            continue
-        # Skip SHCOARSE
-        if field == "SHCOARSE":
+    for field in eu_hk_data:
+        # Skip header values
+        if field in CCSDS_HEADER_FIELDS:
             continue
 
-        eu_values = eu_hk_data[field].data
-        validation_values = validation_row[field]
+        eu_values = getattr(eu_hk_data, field).data
+        validation_values = validation_row[field.upper()]
 
         # Compare each individual element
-        for eu_val, validation_val in zip(eu_values, [validation_values]):
+        for eu_val, validation_val in zip(eu_values, validation_values):
             assert round(eu_val, 5) == round(validation_val, 5)
 
 
 def test_raw_hskp_data(
-    decom_test_data: list[space_packet_parser.parser.Packet],
+    decom_test_data: xr.Dataset,
     validation_data: pd.core.frame.DataFrame,
 ):
     """Compare the raw housekeeping data to the validation data.
 
     Parameters
     ----------
-    decom_test_data : list[space_packet_parser.parser.Packet]
-        The decommutated housekeeping packet data
+    decom_test_data : xr.Dataset
+        The decommutated housekeeping packet
     validation_data : pandas.core.frame.DataFrame
         The validation data to compare against
     """
 
-    first_data = decom_test_data[0]
-    validation_row = validation_data.loc[first_data.data["SHCOARSE"].raw_value]
+    validation_row = validation_data.loc[decom_test_data.shcoarse]
 
     # Compare raw values of housekeeping data
-    for key, value in first_data.data.items():
-        if key == "SHCOARSE":
-            assert value.raw_value == validation_row.name
-            continue
-        assert value.raw_value == validation_row[key]
+    for field in decom_test_data:
+        if field not in CCSDS_HEADER_FIELDS:
+            raw_values = getattr(decom_test_data, field).data
+            validation_values = validation_row[field.upper()]
+            for raw_value, validation_value in zip(raw_values, validation_values):
+                assert raw_value == validation_value
 
 
-def test_total_packets_in_data_file(
-    decom_test_data: list[space_packet_parser.parser.Packet],
-):
+def test_total_packets_in_data_file(decom_test_data: xr.Dataset):
     """Test if total packets in data file is correct
 
     Parameters
     ----------
-    decom_test_data : list[space_packet_parser.parser.Packet]
-        The decommutated housekeeping packet data
+    decom_test_data : xr.Dataset
+        The decommutated housekeeping packet
     """
 
     total_packets = 99
-    assert len(decom_test_data) == total_packets
-
-
-def test_ways_to_get_data(decom_test_data: list[space_packet_parser.parser.Packet]):
-    """Test if data can be retrieved using different ways
-
-    Parameters
-    ----------
-    decom_test_data : list[space_packet_parser.parser.Packet]
-        The decommutated housekeeping packet data
-    """
-
-    data_value_using_key = decom_test_data[0].data
-    data_value_using_list = decom_test_data[0][1]
-
-    # Check if data is same
-    assert data_value_using_key == data_value_using_list
+    assert len(decom_test_data.epoch) == total_packets

From c55c8e03289a42c30c12442fdf1a2a7b0f9c8661 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Fri, 30 Aug 2024 15:04:50 -0600
Subject: [PATCH 19/22] Removed temporary if __name__ == __main__ code

---
 imap_processing/codice/codice_l1a.py | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index d2760f851..78a5a626f 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -552,29 +552,3 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
     logger.info(f"\nFinal data product:\n{dataset}\n")
 
     return dataset
-
-
-if __name__ == "__main__":
-    TEST_DATA_PATH = imap_module_directory / "tests" / "codice" / "data"
-
-    TEST_PACKETS = [
-        TEST_DATA_PATH / "imap_codice_l0_hskp_20100101_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_hi-counters-aggregated_20240429_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_hi-counters-singles_20240429_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_hi-omni_20240429_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_hi-sectored_20240429_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_hi-pha_20240429_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_lo-counters-aggregated_20240429_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_lo-counters-singles_20240429_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_lo-sw-angular_20240429_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_lo-nsw-angular_20240429_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_lo-sw-priority_20240429_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_lo-nsw-priority_20240429_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_lo-sw-species_20240429_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_lo-nsw-species_20240429_v001.pkts",
-        TEST_DATA_PATH / "imap_codice_l0_lo-pha_20240429_v001.pkts",
-    ]
-
-    for file_path in TEST_PACKETS:
-        dataset = process_codice_l1a(file_path, "001")
-        print(dataset)

From 0a19ca288fb165d3f22495308588bac8d54268b5 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Fri, 30 Aug 2024 15:09:27 -0600
Subject: [PATCH 20/22] Fixed typo

---
 imap_processing/codice/decompress.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/imap_processing/codice/decompress.py b/imap_processing/codice/decompress.py
index bb92a75c4..9a48b72b2 100644
--- a/imap_processing/codice/decompress.py
+++ b/imap_processing/codice/decompress.py
@@ -56,7 +56,7 @@ def _apply_lossy_a(compressed_bytes: bytes) -> list[int]:
 
 def _apply_lossy_b(compressed_bytes: bytes) -> list[int]:
     """
-    Apply 8-bit to 32-bit Lossy A decompression algorithm.
+    Apply 8-bit to 32-bit Lossy B decompression algorithm.
 
     The Lossy B algorithm uses a lookup table imported into this module.
 

From ba75ec4cb5b6216db99e31da2585abf1c35c6208 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Fri, 30 Aug 2024 15:14:39 -0600
Subject: [PATCH 21/22] Fixed doc build error

---
 imap_processing/codice/codice_l0.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/imap_processing/codice/codice_l0.py b/imap_processing/codice/codice_l0.py
index 8b8c29968..6f132d580 100644
--- a/imap_processing/codice/codice_l0.py
+++ b/imap_processing/codice/codice_l0.py
@@ -34,7 +34,7 @@ def decom_packets(packet_file: Path) -> dict[int, xr.Dataset]:
 
     Returns
     -------
-    packets : dict[int, xr.Dataset]
+    packets : dict[int, xarray.Dataset]
         Mapping from apid to ``xarray`` dataset, one dataset per apid.
     """
     xtce_packet_definition = Path(

From 940348a0dc8a5ad382e8fe385ea0d28fba2b7dd7 Mon Sep 17 00:00:00 2001
From: Matthew Bourque <matthew.bourque@lasp.colorado.edu>
Date: Tue, 3 Sep 2024 11:46:16 -0600
Subject: [PATCH 22/22] Addressed review comments

---
 imap_processing/codice/codice_l1a.py          | 34 ++++-----
 .../tests/codice/test_codice_l1a.py           | 74 +++++++++----------
 2 files changed, 50 insertions(+), 58 deletions(-)

diff --git a/imap_processing/codice/codice_l1a.py b/imap_processing/codice/codice_l1a.py
index 78a5a626f..f1776a4fa 100644
--- a/imap_processing/codice/codice_l1a.py
+++ b/imap_processing/codice/codice_l1a.py
@@ -22,7 +22,6 @@
 
 from imap_processing import imap_module_directory
 from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
-from imap_processing.cdf.utils import met_to_j2000ns
 from imap_processing.codice import constants
 from imap_processing.codice.decompress import decompress
 from imap_processing.codice.utils import CODICEAPID
@@ -100,7 +99,9 @@ def configure_data_products(self, apid: int) -> None:
         self.dataset_name = config["dataset_name"]
         self.instrument = config["instrument"]
 
-    def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset:
+    def create_science_dataset(
+        self, packet: xr.Dataset, data_version: str
+    ) -> xr.Dataset:
         """
         Create an ``xarray`` dataset for the unpacked science data.
 
@@ -108,8 +109,8 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
 
         Parameters
         ----------
-        met : numpy.int64
-            The mission elapsed time of the packet, used to determine epoch data.
+        packet : xarray.Dataset
+            The packet to process.
         data_version : str
             Version of the data product being created.
 
@@ -126,7 +127,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
 
         # Define coordinates
         epoch = xr.DataArray(
-            [met_to_j2000ns(met)],
+            packet.epoch,
             name="epoch",
             dims=["epoch"],
             attrs=cdf_attrs.get_variable_attributes("epoch"),
@@ -371,9 +372,6 @@ def create_event_dataset(
     elif apid == CODICEAPID.COD_HI_PHA:
         dataset_name = "imap_codice_l1a_hi_pha"
 
-    # Determine the start time of the packet
-    met = packet.acq_start_seconds.data[0]
-
     # Extract the data
     # event_data = packet.event_data.data (Currently turned off, see TODO)
 
@@ -384,7 +382,7 @@ def create_event_dataset(
 
     # Define coordinates
     epoch = xr.DataArray(
-        met_to_j2000ns([met]),
+        packet.epoch,
         name="epoch",
         dims=["epoch"],
         attrs=cdf_attrs.get_variable_attributes("epoch"),
@@ -426,10 +424,7 @@ def create_hskp_dataset(
     cdf_attrs.add_global_attribute("Data_version", data_version)
 
     epoch = xr.DataArray(
-        met_to_j2000ns(
-            packet.shcoarse.data,
-            reference_epoch=np.datetime64("2010-01-01T00:01:06.184", "ns"),
-        ),
+        packet.epoch,
         name="epoch",
         dims=["epoch"],
         attrs=cdf_attrs.get_variable_attributes("epoch"),
@@ -493,10 +488,10 @@ def get_params(packet: xr.Dataset) -> tuple[int, int, int, int]:
     view_id : int
         Provides information about how data was collapsed and/or compressed.
     """
-    table_id = packet.table_id.data[0]
-    plan_id = packet.plan_id.data[0]
-    plan_step = packet.plan_step.data[0]
-    view_id = packet.view_id.data[0]
+    table_id = int(packet.table_id.data)
+    plan_id = int(packet.plan_id.data)
+    plan_step = int(packet.plan_step.data)
+    view_id = int(packet.view_id.data)
 
     return table_id, plan_id, plan_step, view_id
 
@@ -534,9 +529,6 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
             dataset = create_event_dataset(apid, packet, data_version)
 
         elif apid in constants.APIDS_FOR_SCIENCE_PROCESSING:
-            # Determine the start time of the packet
-            met = packet.acq_start_seconds.data[0]
-
             # Extract the data
             science_values = packet.data.data[0]
 
@@ -547,7 +539,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
             pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id)
             pipeline.configure_data_products(apid)
             pipeline.unpack_science_data(science_values)
-            dataset = pipeline.create_science_dataset(met, data_version)
+            dataset = pipeline.create_science_dataset(packet, data_version)
 
     logger.info(f"\nFinal data product:\n{dataset}\n")
 
diff --git a/imap_processing/tests/codice/test_codice_l1a.py b/imap_processing/tests/codice/test_codice_l1a.py
index ff5302b48..1d897d3ec 100644
--- a/imap_processing/tests/codice/test_codice_l1a.py
+++ b/imap_processing/tests/codice/test_codice_l1a.py
@@ -34,23 +34,6 @@
     (1, 1, 1, 128),  # lo-nsw-species
     (1, 128),  # lo-pha
 ]
-EXPECTED_ARRAY_SIZES = [
-    129,  # hskp
-    1,  # hi-counters-aggregated
-    3,  # hi-counters-singles
-    8,  # hi-omni
-    4,  # hi-sectored
-    0,  # hi-pha
-    3,  # lo-counters-aggregated
-    3,  # lo-counters-singles
-    6,  # lo-sw-angular
-    3,  # lo-nsw-angular
-    7,  # lo-sw-priority
-    4,  # lo-nsw-priority
-    18,  # lo-sw-species
-    10,  # lo-nsw-species
-    0,  # lo-pha
-]
 EXPECTED_LOGICAL_SOURCE = [
     "imap_codice_l1a_hskp",
     "imap_codice_l1a_hi-counters-aggregated",
@@ -68,6 +51,23 @@
     "imap_codice_l1a_lo-nsw-species",
     "imap_codice_l1a_lo-pha",
 ]
+EXPECTED_NUM_VARIABLES = [
+    129,  # hskp
+    1,  # hi-counters-aggregated
+    3,  # hi-counters-singles
+    8,  # hi-omni
+    4,  # hi-sectored
+    0,  # hi-pha
+    3,  # lo-counters-aggregated
+    3,  # lo-counters-singles
+    6,  # lo-sw-angular
+    3,  # lo-nsw-angular
+    7,  # lo-sw-priority
+    4,  # lo-nsw-priority
+    18,  # lo-sw-species
+    10,  # lo-nsw-species
+    0,  # lo-pha
+]
 
 
 @pytest.fixture(params=TEST_PACKETS)
@@ -134,26 +134,6 @@ def test_l1a_data_array_shape(test_l1a_data: xr.Dataset, expected_shape: tuple):
             assert dataset[variable].data.shape == expected_shape
 
 
-@pytest.mark.parametrize(
-    "test_l1a_data, expected_size",
-    list(zip(TEST_PACKETS, EXPECTED_ARRAY_SIZES)),
-    indirect=["test_l1a_data"],
-)
-def test_l1a_data_array_size(test_l1a_data: xr.Dataset, expected_size: int):
-    """Tests that the data arrays in the generated CDFs have the expected size.
-
-    Parameters
-    ----------
-    test_l1a_data : xarray.Dataset
-        A ``xarray`` dataset containing the test data
-    expected_size : int
-        The expected size of the data array
-    """
-
-    dataset = test_l1a_data
-    assert len(dataset) == expected_size
-
-
 @pytest.mark.skip("Awaiting validation data")
 @pytest.mark.parametrize(
     "test_l1a_data, validation_data",
@@ -185,3 +165,23 @@ def test_l1a_data_array_values(test_l1a_data: xr.Dataset, validation_data: Path)
             np.testing.assert_array_equal(
                 validation_data[variable].data, generated_dataset[variable].data[0]
             )
+
+
+@pytest.mark.parametrize(
+    "test_l1a_data, expected_num_variables",
+    list(zip(TEST_PACKETS, EXPECTED_NUM_VARIABLES)),
+    indirect=["test_l1a_data"],
+)
+def test_l1a_num_variables(test_l1a_data: xr.Dataset, expected_num_variables: int):
+    """Tests that the data arrays in the generated CDFs have the expected size.
+
+    Parameters
+    ----------
+    test_l1a_data : xarray.Dataset
+        A ``xarray`` dataset containing the test data
+    expected_num_variables : int
+        The expected number of data variables in the CDF
+    """
+
+    dataset = test_l1a_data
+    assert len(dataset) == expected_num_variables