Skip to content

Commit

Permalink
Updated code to more accurately unpack science data
Browse files Browse the repository at this point in the history
  • Loading branch information
bourque committed Aug 20, 2024
1 parent bb9cb30 commit 6f81bef
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 55 deletions.
124 changes: 85 additions & 39 deletions imap_processing/codice/codice_l1a.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,21 @@
from imap_processing.cdf.utils import met_to_j2000ns
from imap_processing.codice import constants
from imap_processing.codice.codice_l0 import decom_packets
from imap_processing.codice.decompress import decompress
from imap_processing.codice.utils import CODICEAPID, add_metadata_to_array
from imap_processing.utils import group_by_apid, sort_by_time

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# TODO: Decom data arrays need to be decompressed
# TODO: In decommutation, how to have a variable length data and then a checksum
# after it? (Might be fixed with new XTCE script updates)
# TODO: Add support for decomming multiple APIDs from a single file
# TODO: Add these as variables in CDF: SPIN_PERIOD, ST_BIAS_GAIN_MODE,
# SW_BIAS_GAIN_MODE, RGFO_HALF_SPIN, NSO_HALF_SPIN, DATA_QUALITY
# TODO: Use new packet_file_to_dataset() function to simplify things
# TODO: Determine what should go in event data CDF and how it should be
# structured.


class CoDICEL1aPipeline:
Expand Down Expand Up @@ -92,6 +97,7 @@ def configure_data_products(self, apid: int) -> None:
config = constants.DATA_PRODUCT_CONFIGURATIONS.get(apid) # type: ignore[call-overload]
self.num_counters = config["num_counters"]
self.num_energy_steps = config["num_energy_steps"]
self.num_spin_sectors = config["num_spin_sectors"]
self.variable_names = config["variable_names"]
self.dataset_name = config["dataset_name"]

Expand Down Expand Up @@ -121,11 +127,17 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset

# Define coordinates
epoch = xr.DataArray(
met_to_j2000ns(met), # TODO: Fix after SIT-3 (see note below)
[met_to_j2000ns(met)],
name="epoch",
dims=["epoch"],
attrs=cdf_attrs.get_variable_attributes("epoch"),
)
spin_sector = xr.DataArray(
np.arange(self.num_spin_sectors),
name="spin_sector",
dims=["spin_sector"],
attrs=cdf_attrs.get_variable_attributes("spin_sector_attrs"),
)
energy_steps = xr.DataArray(
np.arange(self.num_energy_steps),
name="energy",
Expand All @@ -145,6 +157,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
dataset = xr.Dataset(
coords={
"epoch": epoch,
"spin_sector": spin_sector,
"energy": energy_steps,
"energy_label": energy_label,
},
Expand All @@ -153,20 +166,16 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset

# Create a data variable for each counter
for variable_data, variable_name in zip(self.data, self.variable_names):
# TODO: Currently, cdflib doesn't properly write/read CDF files that
# have a single epoch value. To get around this for now, use
# two epoch values and reshape accordingly. Revisit this after
# SIT-3. See https://github.com/MAVENSDC/cdflib/issues/268
variable_data_arr = np.array(list(variable_data) * 2, dtype=int).reshape(
2, self.num_energy_steps
variable_data_arr = np.array(variable_data).reshape(
1, self.num_spin_sectors, self.num_energy_steps
)
cdf_attrs_key = (
f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}"
)
dataset[variable_name] = xr.DataArray(
variable_data_arr,
name=variable_name,
dims=["epoch", "energy"],
dims=["epoch", "spin_sector", "energy"],
attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key),
)

Expand Down Expand Up @@ -262,33 +271,69 @@ def get_esa_sweep_values(self) -> None:
sweep_table = sweep_data[sweep_data["table_idx"] == sweep_table_id]
self.esa_sweep_values = sweep_table["esa_v"].values

def unpack_science_data(self, science_values: str) -> None:
def unpack_hi_science_data(self, science_values: str) -> None:
"""
Unpack the science data from the packet.
Unpack the CoDICE-Hi science data from the packet.
For LO SW Species Counts data, the science data within the packet is a
blob of compressed values of length 2048 bits (16 species * 128 energy
levels). These data need to be divided up by species so that each
species can have their own data variable in the L1A CDF file.
The science data within the packet is a compressed, binary string of
values.
Parameters
----------
science_values : str
A string of binary data representing the science values of the data.
"""
self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id]

# Decompress the binary string
science_values = decompress(science_values, self.compression_algorithm)

# Divide up the data by the number of priorities or species
chunk_size = len(science_values) // self.num_counters
science_values_unpacked = [
science_values[i : i + chunk_size]
for i in range(0, len(science_values), chunk_size)
]

# TODO: Determine how to properly divide up hi data. For now, just use
# arrays for each counter
self.data = science_values_unpacked

def unpack_lo_science_data(self, science_values: str) -> None:
"""
Unpack the CoDICE-Lo science data from the packet.
The science data within the packet is a compressed, binary string of
values. These data need to be divided up by species or priorities,
and re-arranged into 2D arrays representing energy and spin angle.
Parameters
----------
science_values : str
A string of binary data representing the science values of the data.
"""
self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id]
self.collapse_table_id = constants.LO_COLLAPSE_TABLE_ID_LOOKUP[self.view_id]

# TODO: Turn this back on after SIT-3
# For SIT-3, just create appropriate length data arrays of all ones
# Decompress the binary string
science_values = decompress(science_values, self.compression_algorithm)

# Divide up the data by the number of priorities or species
# science_values = packets[0].data["DATA"].raw_value
# num_bits = len(science_values)
# chunk_size = len(science_values) // self.num_counters
# self.data = [
# science_values[i : i + chunk_size] for i in range(0, num_bits, chunk_size)
# ]
self.data = [["1"] * 128] * self.num_counters
chunk_size = len(science_values) // self.num_counters
science_values_unpacked = [
science_values[i : i + chunk_size]
for i in range(0, len(science_values), chunk_size)
]

# Further divide up the data by energy levels
# The result is a [12,128] array representing 12 spin angles and 128
# energy levels
self.data = []
for counter_data in science_values_unpacked:
data_array = [
counter_data[i : i + self.num_energy_steps]
for i in range(0, len(counter_data), self.num_energy_steps)
]
self.data.append(data_array)


def create_event_dataset(
Expand Down Expand Up @@ -334,9 +379,6 @@ def create_event_dataset(
attrs=cdf_attrs.get_global_attributes(dataset_name),
)

# TODO: Determine what should go in event data CDF and how it should be
# structured.

return dataset


Expand Down Expand Up @@ -385,13 +427,15 @@ def create_hskp_dataset(
)

# TODO: Change 'TBD' catdesc and fieldname
# Once housekeeping packet definition file is re-generated with updated
# version of space_packet_parser, can get fieldname and catdesc info via:
# for key, value in (packet.header | packet.data).items():
# fieldname = value.short_description
# catdesc = value.long_description
# I am holding off making this change until I acquire updated housekeeping
# packets/validation data that match the latest telemetry definitions
# Once housekeeping packet definition file is re-generated with
# updated version of space_packet_parser, can get fieldname and
# catdesc info via:
# for key, value in (packet.header | packet.data).items():
# fieldname = value.short_description
# catdesc = value.long_description
# I am holding off making this change until I acquire updated
# housekeeping packets/validation data that match the latest telemetry
# definitions
for key, value in metadata_arrays.items():
attrs = cdf_attrs.get_variable_attributes("codice_support_attrs")
attrs["CATDESC"] = "TBD"
Expand Down Expand Up @@ -457,8 +501,6 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
dataset : xarray.Dataset
The ``xarray`` dataset containing the science data and supporting metadata.
"""
# TODO: Use new packet_file_to_dataset() function to simplify things

# Decom the packets, group data by APID, and sort by time
packets = decom_packets(file_path)
grouped_data = group_by_apid(packets)
Expand Down Expand Up @@ -496,7 +538,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:

# Determine the start time of the packet
met = packets[0].data["ACQ_START_SECONDS"].raw_value
met = [met, met + 1] # TODO: Remove after cdflib fix

# Extract the data
science_values = packets[0].data["DATA"].raw_value

Expand All @@ -506,8 +548,12 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
# Run the pipeline to create a dataset for the product
pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id)
pipeline.configure_data_products(apid)
pipeline.unpack_science_data(science_values)
if "_lo_" in pipeline.dataset_name:
pipeline.unpack_lo_science_data(science_values)
elif "_hi_" in pipeline.dataset_name:
pipeline.unpack_hi_science_data(science_values)
dataset = pipeline.create_science_dataset(met, data_version)

logger.info(f"\nFinal data product:\n{dataset}\n")

return dataset
29 changes: 13 additions & 16 deletions imap_processing/tests/codice/test_codice_l1a.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,19 @@

EXPECTED_ARRAY_SHAPES = [
(99,), # hskp
(1, 128), # hi-counters-aggregated
(1, 128), # hi-counters-singles
(1, 128), # hi-omni
(1, 128), # hi-sectored
(1, 128), # hi-pha
(1, 128), # lo-counters-aggregated
(1, 128), # lo-counters-aggregated
(1, 128), # lo-sw-angular
(1, 128), # lo-nsw-angular
(1, 128), # lo-sw-priority
(1, 128), # lo-nsw-priority
(1, 128), # lo-sw-species
(1, 128), # lo-nsw-species
(1, 6, 1), # hi-counters-aggregated
(1, 16, 1), # hi-counters-singles
(1, 60, 1), # hi-omni
(1, 1152, 1), # hi-sectored
(1, 1), # hi-pha
(1, 36, 128), # lo-counters-aggregated
(1, 144, 128), # lo-counters-aggregated
(1, 60, 128), # lo-sw-angular
(1, 228, 128), # lo-nsw-angular
(1, 12, 128), # lo-sw-priority
(1, 12, 128), # lo-nsw-priority
(1, 1, 128), # lo-sw-species
(1, 1, 128), # lo-nsw-species
(1, 128), # lo-pha
]
EXPECTED_ARRAY_SIZES = [
Expand Down Expand Up @@ -110,9 +110,6 @@ def test_l1a_cdf_filenames(test_l1a_data: xr.Dataset, expected_logical_source: s
assert dataset.attrs["Logical_source"] == expected_logical_source


@pytest.mark.xfail(
reason="Currently failing due to cdflib/epoch issue. See https://github.com/MAVENSDC/cdflib/issues/268"
)
@pytest.mark.parametrize(
"test_l1a_data, expected_shape",
list(zip(TEST_PACKETS, EXPECTED_ARRAY_SHAPES)),
Expand Down

0 comments on commit 6f81bef

Please sign in to comment.