Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CoDICE L1 decompression and unpacking data updates #762

Merged
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ energy_attrs:
VALIDMAX: 127
VAR_TYPE: support_data

spin_sector_attrs:
<<: *default
CATDESC: Spin sector indicating range of spin angles
FIELDNAM: Spin sector
FORMAT: I4
LABLAXIS: spin sector
UNITS: ' '
VALIDMIN: 0
VALIDMAX: 1152
VAR_TYPE: support_data
bourque marked this conversation as resolved.
Show resolved Hide resolved

# <=== Labels ===>
energy_label:
CATDESC: Energy per charge (E/q) sweeping step
Expand Down
11 changes: 11 additions & 0 deletions imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ energy_attrs:
VALIDMAX: 127
VAR_TYPE: support_data

spin_sector_attrs:
<<: *default
CATDESC: Spin sector indicating range of spin angles
FIELDNAM: Spin sector
FORMAT: I4
LABLAXIS: spin sector
UNITS: ' '
VALIDMIN: 0
VALIDMAX: 1152
VAR_TYPE: support_data

# <=== Labels ===>
energy_label:
CATDESC: Energy per charge (E/q) sweeping step
Expand Down
135 changes: 94 additions & 41 deletions imap_processing/codice/codice_l1a.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,22 @@
from imap_processing.cdf.utils import met_to_j2000ns
from imap_processing.codice import constants
from imap_processing.codice.codice_l0 import decom_packets
from imap_processing.codice.decompress import decompress
from imap_processing.codice.utils import CODICEAPID, add_metadata_to_array
from imap_processing.utils import group_by_apid, sort_by_time

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# TODO: Decom data arrays need to be decompressed
# TODO: In decommutation, how to have a variable length data and then a checksum
# after it? (Might be fixed with new XTCE script updates)
# TODO: Add support for decomming multiple APIDs from a single file
# TODO: Add these as variables in CDF: SPIN_PERIOD, ST_BIAS_GAIN_MODE,
bourque marked this conversation as resolved.
Show resolved Hide resolved
# SW_BIAS_GAIN_MODE, RGFO_HALF_SPIN, NSO_HALF_SPIN, DATA_QUALITY
# TODO: Use new packet_file_to_dataset() function to simplify things
# TODO: Determine what should go in event data CDF and how it should be
# structured.
# TODO: Make sure CDF attributes match expected nomenclature


class CoDICEL1aPipeline:
Expand Down Expand Up @@ -69,8 +75,10 @@ class CoDICEL1aPipeline:
Retrieve the acquisition times via the Lo stepping table.
get_esa_sweep_values()
Retrieve the ESA sweep values.
unpack_science_data()
Make 4D L1a data product from the decompressed science data.
unpack_hi_science_data()
Decompress, unpack, and restructure CoDICE-Hi data arrays.
unpack_lo_science_data()
Decompress, unpack, and restructure CoDICE-Lo data arrays.
"""

def __init__(self, table_id: int, plan_id: int, plan_step: int, view_id: int):
Expand All @@ -92,6 +100,7 @@ def configure_data_products(self, apid: int) -> None:
config = constants.DATA_PRODUCT_CONFIGURATIONS.get(apid) # type: ignore[call-overload]
self.num_counters = config["num_counters"]
self.num_energy_steps = config["num_energy_steps"]
self.num_spin_sectors = config["num_spin_sectors"]
self.variable_names = config["variable_names"]
self.dataset_name = config["dataset_name"]

Expand Down Expand Up @@ -121,11 +130,17 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset

# Define coordinates
epoch = xr.DataArray(
met_to_j2000ns(met), # TODO: Fix after SIT-3 (see note below)
[met_to_j2000ns(met)],
name="epoch",
dims=["epoch"],
attrs=cdf_attrs.get_variable_attributes("epoch"),
)
spin_sector = xr.DataArray(
np.arange(self.num_spin_sectors),
name="spin_sector",
dims=["spin_sector"],
attrs=cdf_attrs.get_variable_attributes("spin_sector_attrs"),
)
energy_steps = xr.DataArray(
np.arange(self.num_energy_steps),
name="energy",
Expand All @@ -145,6 +160,7 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
dataset = xr.Dataset(
coords={
"epoch": epoch,
"spin_sector": spin_sector,
"energy": energy_steps,
"energy_label": energy_label,
},
Expand All @@ -153,20 +169,16 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset

# Create a data variable for each counter
for variable_data, variable_name in zip(self.data, self.variable_names):
# TODO: Currently, cdflib doesn't properly write/read CDF files that
# have a single epoch value. To get around this for now, use
# two epoch values and reshape accordingly. Revisit this after
# SIT-3. See https://github.com/MAVENSDC/cdflib/issues/268
variable_data_arr = np.array(list(variable_data) * 2, dtype=int).reshape(
2, self.num_energy_steps
variable_data_arr = np.array(variable_data).reshape(
1, self.num_spin_sectors, self.num_energy_steps
)
cdf_attrs_key = (
f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}"
)
dataset[variable_name] = xr.DataArray(
variable_data_arr,
name=variable_name,
dims=["epoch", "energy"],
dims=["epoch", "spin_sector", "energy"],
attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key),
)

Expand Down Expand Up @@ -262,33 +274,73 @@ def get_esa_sweep_values(self) -> None:
sweep_table = sweep_data[sweep_data["table_idx"] == sweep_table_id]
self.esa_sweep_values = sweep_table["esa_v"].values

def unpack_science_data(self, science_values: str) -> None:
def unpack_hi_science_data(self, science_values: str) -> None:
"""
Unpack the science data from the packet.
Decompress, unpack, and restructure CoDICE-Hi data arrays.

For LO SW Species Counts data, the science data within the packet is a
blob of compressed values of length 2048 bits (16 species * 128 energy
levels). These data need to be divided up by species so that each
species can have their own data variable in the L1A CDF file.
The science data within the packet is a compressed, binary string of
values.

Parameters
----------
science_values : str
A string of binary data representing the science values of the data.
"""
self.compression_algorithm = constants.HI_COMPRESSION_ID_LOOKUP[self.view_id]

# Decompress the binary string
science_values_decompressed = decompress(
science_values, self.compression_algorithm
)

# Divide up the data by the number of priorities or species
chunk_size = len(science_values_decompressed) // self.num_counters
science_values_unpacked = [
science_values_decompressed[i : i + chunk_size]
for i in range(0, len(science_values_decompressed), chunk_size)
]

# TODO: Determine how to properly divide up hi data. For now, just use
# arrays for each counter
self.data = science_values_unpacked

def unpack_lo_science_data(self, science_values: str) -> None:
"""
Decompress, unpack, and restructure CoDICE-Lo data arrays.

The science data within the packet is a compressed, binary string of
values. These data need to be divided up by species or priorities,
and re-arranged into 2D arrays representing energy and spin angle.

Parameters
----------
science_values : str
A string of binary data representing the science values of the data.
"""
self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id]
self.collapse_table_id = constants.LO_COLLAPSE_TABLE_ID_LOOKUP[self.view_id]

# TODO: Turn this back on after SIT-3
# For SIT-3, just create appropriate length data arrays of all ones
# Decompress the binary string
science_values_decompressed = decompress(
science_values, self.compression_algorithm
)

# Divide up the data by the number of priorities or species
# science_values = packets[0].data["DATA"].raw_value
# num_bits = len(science_values)
# chunk_size = len(science_values) // self.num_counters
# self.data = [
# science_values[i : i + chunk_size] for i in range(0, num_bits, chunk_size)
# ]
self.data = [["1"] * 128] * self.num_counters
chunk_size = len(science_values_decompressed) // self.num_counters
science_values_unpacked = [
science_values_decompressed[i : i + chunk_size]
for i in range(0, len(science_values_decompressed), chunk_size)
]

# Further divide up the data by energy levels
# The result is a [12,128] array representing 12 spin angles and 128
# energy levels
self.data = []
for counter_data in science_values_unpacked:
data_array = [
counter_data[i : i + self.num_energy_steps]
for i in range(0, len(counter_data), self.num_energy_steps)
]
self.data.append(data_array) # type: ignore[arg-type]
bourque marked this conversation as resolved.
Show resolved Hide resolved


def create_event_dataset(
Expand Down Expand Up @@ -334,9 +386,6 @@ def create_event_dataset(
attrs=cdf_attrs.get_global_attributes(dataset_name),
)

# TODO: Determine what should go in event data CDF and how it should be
# structured.

return dataset


Expand Down Expand Up @@ -385,13 +434,15 @@ def create_hskp_dataset(
)

# TODO: Change 'TBD' catdesc and fieldname
# Once housekeeping packet definition file is re-generated with updated
# version of space_packet_parser, can get fieldname and catdesc info via:
# for key, value in (packet.header | packet.data).items():
# fieldname = value.short_description
# catdesc = value.long_description
# I am holding off making this change until I acquire updated housekeeping
# packets/validation data that match the latest telemetry definitions
# Once housekeeping packet definition file is re-generated with
# updated version of space_packet_parser, can get fieldname and
# catdesc info via:
# for key, value in (packet.header | packet.data).items():
# fieldname = value.short_description
# catdesc = value.long_description
# I am holding off making this change until I acquire updated
# housekeeping packets/validation data that match the latest telemetry
# definitions
bourque marked this conversation as resolved.
Show resolved Hide resolved
for key, value in metadata_arrays.items():
attrs = cdf_attrs.get_variable_attributes("codice_support_attrs")
attrs["CATDESC"] = "TBD"
Expand Down Expand Up @@ -457,8 +508,6 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
dataset : xarray.Dataset
The ``xarray`` dataset containing the science data and supporting metadata.
"""
# TODO: Use new packet_file_to_dataset() function to simplify things
bourque marked this conversation as resolved.
Show resolved Hide resolved

# Decom the packets, group data by APID, and sort by time
packets = decom_packets(file_path)
grouped_data = group_by_apid(packets)
Expand Down Expand Up @@ -496,7 +545,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:

# Determine the start time of the packet
met = packets[0].data["ACQ_START_SECONDS"].raw_value
met = [met, met + 1] # TODO: Remove after cdflib fix

# Extract the data
science_values = packets[0].data["DATA"].raw_value

Expand All @@ -506,8 +555,12 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
# Run the pipeline to create a dataset for the product
pipeline = CoDICEL1aPipeline(table_id, plan_id, plan_step, view_id)
pipeline.configure_data_products(apid)
pipeline.unpack_science_data(science_values)
if "_lo_" in pipeline.dataset_name:
pipeline.unpack_lo_science_data(science_values)
elif "_hi_" in pipeline.dataset_name:
pipeline.unpack_hi_science_data(science_values)
dataset = pipeline.create_science_dataset(met, data_version)

logger.info(f"\nFinal data product:\n{dataset}\n")

return dataset
Loading
Loading