Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CoDICE L1 decompression and unpacking data updates #762

Merged
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,28 @@ energy_attrs:
VALIDMAX: 127
VAR_TYPE: support_data

inst_az_attrs:
<<: *default
CATDESC: Azimuth
FIELDNAM: Azimuth
FORMAT: I2
LABLAXIS: Azimuth
Comment on lines +41 to +44
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm planning to provide better labels/descriptions for these in an upcoming PR where I address nomenclature

UNITS: ' '
VALIDMIN: 0
VALIDMAX: 31
VAR_TYPE: support_data

spin_sector_attrs:
<<: *default
CATDESC: Spin sector indicating range of spin angles
FIELDNAM: Spin sector
FORMAT: I2
LABLAXIS: spin sector
UNITS: ' '
VALIDMIN: 0
VALIDMAX: 11
VAR_TYPE: support_data

# <=== Labels ===>
energy_label:
CATDESC: Energy per charge (E/q) sweeping step
Expand Down
22 changes: 22 additions & 0 deletions imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,28 @@ energy_attrs:
VALIDMAX: 127
VAR_TYPE: support_data

inst_az_attrs:
<<: *default
CATDESC: Azimuth
FIELDNAM: Azimuth
FORMAT: I2
LABLAXIS: Azimuth
UNITS: ' '
VALIDMIN: 0
VALIDMAX: 31
VAR_TYPE: support_data

spin_sector_attrs:
<<: *default
CATDESC: Spin sector indicating range of spin angles
FIELDNAM: Spin sector
FORMAT: I4
LABLAXIS: spin sector
UNITS: ' '
VALIDMIN: 0
VALIDMAX: 1152
VAR_TYPE: support_data

# <=== Labels ===>
energy_label:
CATDESC: Energy per charge (E/q) sweeping step
Expand Down
133 changes: 90 additions & 43 deletions imap_processing/codice/codice_l1a.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,22 @@
from imap_processing.cdf.utils import met_to_j2000ns
from imap_processing.codice import constants
from imap_processing.codice.codice_l0 import decom_packets
from imap_processing.codice.decompress import decompress
from imap_processing.codice.utils import CODICEAPID, add_metadata_to_array
from imap_processing.utils import group_by_apid, sort_by_time

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# TODO: Decom data arrays need to be decompressed
# TODO: In decommutation, how to have a variable length data and then a checksum
# after it? (Might be fixed with new XTCE script updates)
# TODO: Add support for decomming multiple APIDs from a single file
# TODO: Add these as variables in CDF: SPIN_PERIOD, ST_BIAS_GAIN_MODE,
bourque marked this conversation as resolved.
Show resolved Hide resolved
# SW_BIAS_GAIN_MODE, RGFO_HALF_SPIN, NSO_HALF_SPIN, DATA_QUALITY
# TODO: Use new packet_file_to_dataset() function to simplify things
# TODO: Determine what should go in event data CDF and how it should be
# structured.
# TODO: Make sure CDF attributes match expected nomenclature


class CoDICEL1aPipeline:
Expand Down Expand Up @@ -69,8 +75,10 @@ class CoDICEL1aPipeline:
Retrieve the acquisition times via the Lo stepping table.
get_esa_sweep_values()
Retrieve the ESA sweep values.
unpack_science_data()
Make 4D L1a data product from the decompressed science data.
unpack_hi_science_data()
Decompress, unpack, and restructure CoDICE-Hi data arrays.
unpack_lo_science_data()
Decompress, unpack, and restructure CoDICE-Lo data arrays.
"""

def __init__(self, table_id: int, plan_id: int, plan_step: int, view_id: int):
Expand All @@ -92,8 +100,11 @@ def configure_data_products(self, apid: int) -> None:
config = constants.DATA_PRODUCT_CONFIGURATIONS.get(apid) # type: ignore[call-overload]
self.num_counters = config["num_counters"]
self.num_energy_steps = config["num_energy_steps"]
self.num_spin_sectors = config["num_spin_sectors"]
self.num_positions = config["num_positions"]
self.variable_names = config["variable_names"]
self.dataset_name = config["dataset_name"]
self.instrument = config["instrument"]

def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset:
"""
Expand Down Expand Up @@ -121,11 +132,23 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset

# Define coordinates
epoch = xr.DataArray(
met_to_j2000ns(met), # TODO: Fix after SIT-3 (see note below)
[met_to_j2000ns(met)],
name="epoch",
dims=["epoch"],
attrs=cdf_attrs.get_variable_attributes("epoch"),
)
inst_az = xr.DataArray(
np.arange(self.num_positions),
name="inst_az",
dims=["inst_az"],
attrs=cdf_attrs.get_variable_attributes("inst_az_attrs"),
)
spin_sector = xr.DataArray(
np.arange(self.num_spin_sectors),
name="spin_sector",
dims=["spin_sector"],
attrs=cdf_attrs.get_variable_attributes("spin_sector_attrs"),
)
energy_steps = xr.DataArray(
np.arange(self.num_energy_steps),
name="energy",
Expand All @@ -145,6 +168,8 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset
dataset = xr.Dataset(
coords={
"epoch": epoch,
"inst_az": inst_az,
"spin_sector": spin_sector,
"energy": energy_steps,
"energy_label": energy_label,
},
Expand All @@ -153,25 +178,34 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset

# Create a data variable for each counter
for variable_data, variable_name in zip(self.data, self.variable_names):
# TODO: Currently, cdflib doesn't properly write/read CDF files that
# have a single epoch value. To get around this for now, use
# two epoch values and reshape accordingly. Revisit this after
# SIT-3. See https://github.com/MAVENSDC/cdflib/issues/268
variable_data_arr = np.array(list(variable_data) * 2, dtype=int).reshape(
2, self.num_energy_steps
)
# Data arrays are structured depending on the instrument
if self.instrument == "lo":
variable_data_arr = np.array(variable_data).reshape(
1, self.num_positions, self.num_spin_sectors, self.num_energy_steps
)
dims = ["epoch", "inst_az", "spin_sector", "energy"]
elif self.instrument == "hi":
variable_data_arr = np.array(variable_data).reshape(
1, self.num_energy_steps, self.num_positions, self.num_spin_sectors
)
dims = ["epoch", "energy", "inst_az", "spin_sector"]

# Get the CDF attributes
cdf_attrs_key = (
f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}"
)
attrs = cdf_attrs.get_variable_attributes(cdf_attrs_key)

# Create the CDF data variable
dataset[variable_name] = xr.DataArray(
variable_data_arr,
name=variable_name,
dims=["epoch", "energy"],
attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key),
dims=dims,
attrs=attrs,
)

# Add ESA Sweep Values and acquisition times (lo only)
if "_lo_" in self.dataset_name:
if self.instrument == "lo":
self.get_esa_sweep_values()
self.get_acquisition_times()
dataset["esa_sweep_values"] = xr.DataArray(
Expand Down Expand Up @@ -264,31 +298,46 @@ def get_esa_sweep_values(self) -> None:

def unpack_science_data(self, science_values: str) -> None:
"""
Unpack the science data from the packet.
Decompress, unpack, and restructure science data arrays.

For LO SW Species Counts data, the science data within the packet is a
blob of compressed values of length 2048 bits (16 species * 128 energy
levels). These data need to be divided up by species so that each
species can have their own data variable in the L1A CDF file.
The science data within the packet is a compressed, binary string of
values. These data need to be divided up by species or priorities (or
what I am calling "counters" as a general term), and re-arranged into
3D arrays representing spin sectors, positions, and energies (the order
of which depends on the instrument).

Parameters
----------
science_values : str
A string of binary data representing the science values of the data.
"""
self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id]
self.collapse_table_id = constants.LO_COLLAPSE_TABLE_ID_LOOKUP[self.view_id]

# TODO: Turn this back on after SIT-3
# For SIT-3, just create appropriate length data arrays of all ones
# Divide up the data by the number of priorities or species
# science_values = packets[0].data["DATA"].raw_value
# num_bits = len(science_values)
# chunk_size = len(science_values) // self.num_counters
# self.data = [
# science_values[i : i + chunk_size] for i in range(0, num_bits, chunk_size)
# ]
self.data = [["1"] * 128] * self.num_counters
# Decompress the binary string into a list of integers
science_values_decompressed = decompress(
science_values, self.compression_algorithm
)

# Re-arrange the counter data
# For CoDICE-lo, data are a 3D arrays with a shape representing
# [<num_positions>,<num_spin_sectors>,<num_energy_steps>]
if self.instrument == "lo":
self.data = np.array(science_values_decompressed, dtype=np.uint).reshape(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you mean to add dtype=np.uint32 or 64?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think this can be uint32. This prompted me to read up on what the difference between np.uint and np.uint32 was, and it sounds like uint32 is a better choice:

np.uint:

    This is an alias for the default unsigned integer type in NumPy, which is platform-dependent. On most platforms, np.uint is equivalent to np.uint32, meaning it is a 32-bit unsigned integer. However, this can vary based on the platform and version of NumPy.

np.uint32:

    This explicitly defines a 32-bit unsigned integer. It can store values from 0 to 232−1232−1, which is 0 to 4,294,967,295. This is a fixed-size type and is not platform-dependent.

self.num_counters,
self.num_positions,
self.num_spin_sectors,
self.num_energy_steps,
)

# For CoDICE-hi, data are a 3D array with a shape representing
# [<num_energy_steps>,<num_positions>,<num_spin_sectors>]
elif self.instrument == "hi":
self.data = np.array(science_values_decompressed, dtype=np.uint).reshape(
self.num_counters,
self.num_energy_steps,
self.num_positions,
self.num_spin_sectors,
)


def create_event_dataset(
Expand Down Expand Up @@ -334,9 +383,6 @@ def create_event_dataset(
attrs=cdf_attrs.get_global_attributes(dataset_name),
)

# TODO: Determine what should go in event data CDF and how it should be
# structured.

return dataset


Expand Down Expand Up @@ -385,13 +431,15 @@ def create_hskp_dataset(
)

# TODO: Change 'TBD' catdesc and fieldname
# Once housekeeping packet definition file is re-generated with updated
# version of space_packet_parser, can get fieldname and catdesc info via:
# for key, value in (packet.header | packet.data).items():
# fieldname = value.short_description
# catdesc = value.long_description
# I am holding off making this change until I acquire updated housekeeping
# packets/validation data that match the latest telemetry definitions
# Once housekeeping packet definition file is re-generated with
# updated version of space_packet_parser, can get fieldname and
# catdesc info via:
# for key, value in (packet.header | packet.data).items():
# fieldname = value.short_description
# catdesc = value.long_description
# I am holding off making this change until I acquire updated
# housekeeping packets/validation data that match the latest telemetry
# definitions
bourque marked this conversation as resolved.
Show resolved Hide resolved
for key, value in metadata_arrays.items():
attrs = cdf_attrs.get_variable_attributes("codice_support_attrs")
attrs["CATDESC"] = "TBD"
Expand Down Expand Up @@ -457,8 +505,6 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
dataset : xarray.Dataset
The ``xarray`` dataset containing the science data and supporting metadata.
"""
# TODO: Use new packet_file_to_dataset() function to simplify things
bourque marked this conversation as resolved.
Show resolved Hide resolved

# Decom the packets, group data by APID, and sort by time
packets = decom_packets(file_path)
grouped_data = group_by_apid(packets)
Expand Down Expand Up @@ -496,7 +542,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:

# Determine the start time of the packet
met = packets[0].data["ACQ_START_SECONDS"].raw_value
met = [met, met + 1] # TODO: Remove after cdflib fix

# Extract the data
science_values = packets[0].data["DATA"].raw_value

Expand All @@ -510,4 +556,5 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset:
dataset = pipeline.create_science_dataset(met, data_version)

logger.info(f"\nFinal data product:\n{dataset}\n")

return dataset
Loading
Loading