Skip to content

Commit

Permalink
Adding quality flags and uncertainty updates
Browse files Browse the repository at this point in the history
  • Loading branch information
tech3371 committed Aug 29, 2024
1 parent 3e71692 commit 9c8c457
Show file tree
Hide file tree
Showing 7 changed files with 212 additions and 76 deletions.
8 changes: 4 additions & 4 deletions imap_processing/cdf/config/imap_swapi_global_cdf_attrs.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
instrument_base: &instrument_base
Descriptor: SWAPI>The Solar Wind and Pickup Ion
Descriptor: SWAPI>Solar Wind and Pickup Ion
TEXT: >
The Solar Wind and Pickup Ion (SWAPI) instrument measures several different
elements of the solar wind, including hydrogen (H) and helium (He) ions,
Expand All @@ -15,8 +15,8 @@ imap_swapi_l1_sci:
# NOTE: Right now, this Data_level is required to produce valid CDF
Data_level: "1"
Data_type: L1_SCI>Level-1 Science data in 1 minute resolution
Logical_source: imap_swapi_l1_sci-1min
Logical_source_description: SWAPI Instrument Level-1 Science Data in 1 minute resolution
Logical_source: imap_swapi_l1_sci
Logical_source_description: SWAPI Instrument Level-1 Science Data

imap_swapi_l1_hk:
<<: *instrument_base
Expand All @@ -29,5 +29,5 @@ imap_swapi_l2_sci:
<<: *instrument_base
Data_level: "2"
Data_type: L2_SCI>Level-2 Science data in 1 minute resolution
Logical_source: imap_swapi_l2_sci-1min
Logical_source: imap_swapi_l2_sci
Logical_source_description: SWAPI Instrument Level-1 Science Data in 1 minute resolution
50 changes: 29 additions & 21 deletions imap_processing/cdf/config/imap_swapi_variable_attrs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,31 @@ energy:
VALIDMIN: 0
VAR_TYPE: support_data

flags:
CATDESC: Data quality flags
FIELDNAM: Quality Flags
FILLVAL: -9223370000000000000
FORMAT: I2
LABLAXIS: Flags
SCALE_TYP: linear
UNITS: " "
VALIDMAX: 1
VALIDMIN: 0
VAR_TYPE: support_data

# <=== LABL_PTR_i Attributes ===>
energy_label:
CATDESC: Energy step id in lookup table
FIELDNAM: Energy Step
FORMAT: A2
VAR_TYPE: metadata

flags_label:
CATDESC: Data quality flags
FIELDNAM: Quality Flags
FORMAT: A20
VAR_TYPE: metadata

# <=== Data Variables ===>
counts_default: &counts_default
DEPEND_0: epoch
Expand All @@ -32,18 +50,26 @@ counts_default: &counts_default
VAR_TYPE: data
SCALETYP: linear

compression_default: &compression_default
flags_default:
CATDESC: Data quality flags.
FIELDNAM: Quality Flag
LABLAXIS: Flags
DEPEND_0: epoch
DEPEND_1: energy
DEPEND_1: flags
DEPEND_2: energy
DISPLAY_TYPE: spectrogram
LABL_PTR_1: energy_label
LABL_PTR_1: flags_label
LABL_PTR_2: energy_label
FILLVAL: 4294967295
FORMAT: I1
UNITS: ' '
VALIDMIN: 0
VALIDMAX: 1
VAR_TYPE: data
SCALETYP: linear
VAR_NOTES: >
There are 13 flags in total, first three flags are from science packets
and remaining 10 flags are from housekeeping packets.
uncertainty_default: &uncertainty_default
DEPEND_0: epoch
Expand Down Expand Up @@ -89,24 +115,6 @@ coin_counts:
FIELDNAM: Coincidence CEM Counts
LABLAXIS: coin_cnts

pcem_flags:
<<: *compression_default
CATDESC: Primary Channel Electron Multiplier (CEM) compression flag
FIELDNAM: Primary CEM Flag
LABLAXIS: pcem_flag

scem_flags:
<<: *compression_default
CATDESC: Secondary Channel Electron Multiplier (CEM) compression flag
FIELDNAM: Secondary CEM Flag
LABLAXIS: scem_flag

coin_flags:
<<: *compression_default
CATDESC: Coincidence compression flag
FIELDNAM: Coincidence Flag
LABLAXIS: coin_flag

pcem_uncertainty:
<<: *uncertainty_default
CATDESC: Primary Channel Electron Multiplier (CEM) uncertainty
Expand Down
163 changes: 131 additions & 32 deletions imap_processing/swapi/l1/swapi_l1.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,15 @@ def decompress_count(
# If data is compressed, decompress it
compressed_indices = compression_flag == 1
new_count[compressed_indices] *= 16
# TODO: add data type check here.

# If the data was compressed and the count was 0xFFFF, mark it as an overflow
if np.any(count_data < 0):
raise ValueError(
"Count data type must be unsigned int and should not contain negative value"
)
new_count[compressed_indices & (count_data == 0xFFFF)] = -1

# SWAPI suggested using big value to indicate overflow.
new_count[compressed_indices & (count_data == 0xFFFF)] = np.iinfo(np.int32).max
return new_count


Expand Down Expand Up @@ -405,14 +407,18 @@ def process_sweep_data(full_sweep_sci: xr.Dataset, cem_prefix: str) -> xr.Datase
return all_cem_data


def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Dataset:
def process_swapi_science(
sci_dataset: xr.Dataset, hk_dataset: xr.Dataset, data_version: str
) -> xr.Dataset:
"""
Will process SWAPI science data and create CDF file.
Parameters
----------
sci_dataset : xarray.Dataset
L0 data.
hk_dataset : xarray.Dataset
Housekeeping data.
data_version : str
Version of the data product being created.
Expand All @@ -421,6 +427,11 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
dataset : xarray.Dataset
Dataset.
"""
# pylint: disable=too-many-statements
# error:
# imap_processing/swapi/l1/swapi_l1.py:412:5: PLR0915
# Too many statements (51 > 50)

# ====================================================
# Step 1: Filter full cycle data
# ====================================================
Expand Down Expand Up @@ -454,18 +465,95 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
swp_scem_counts = decompress_count(raw_scem_count, scem_compression_flags)
swp_coin_counts = decompress_count(raw_coin_count, coin_compression_flags)

# ====================================================
# Load the CDF attributes
# ====================================================
cdf_manager = ImapCdfAttributes()
cdf_manager.add_instrument_global_attrs("swapi")
cdf_manager.load_variable_attributes("imap_swapi_variable_attrs.yaml")

# ===================================================================
# Quality flags
# ===================================================================
flags_name = [
"swp_pcem_comp",
"swp_scem_comp",
"swp_coin_comp",
"ovr_t_st",
"und_t_st",
"pcem_cnt_st",
"scem_cnt_st",
"pcem_v_st",
"pcem_i_st",
"pcem_int_st",
"scem_v_st",
"scem_i_st",
"scem_int_st",
]
quality_flags_data = []
# Add science data quality flags
quality_flags_data.append(pcem_compression_flags)
quality_flags_data.append(scem_compression_flags)
quality_flags_data.append(coin_compression_flags)

# Add housekeeping-derived quality flags
# --------------------------------------
# Get times of good and full sweep data from science dataset.
# Then use these times to get housekeeping data.
# TODO: how to handle time delta between SWAPI HK and SWAPI SCI packets?
# For now, we assume that SHCOARSE in the SWAPI HK and SWAPI SCI packets
# are exactly equal at any given point, no significant time delta.
good_sweep_times = good_sweep_sci["epoch"].data
good_sweep_hk_data = hk_dataset.sel({"epoch": good_sweep_times})

# Since there will be one SWAPI HK packet for each SWAPI SCI packets, as
# both are recorded at 1 Hz(1 second), we can use this information to set
# quality flag for each science packet's data. Each packet contains
# information of one sequence data. Each sequence's
# PCEM_CNT0, PCEM_CNT1, PCEM_CNT2, PCEM_CNT3, PCEM_CNT4,
# PCEM_CNT5 data is coming from the same sciene packet and therefore
# it's going to share the same HK quality flag. That's why HK quality
# flag is repeated 6 times for each sequence(aka packet).
for flag_name in flags_name[3:]:
quality_flags_data.append(
np.repeat(good_sweep_hk_data[flag_name].data, 6).reshape(-1, 72)
)

# Before, individual quality flags data were in this shape:
# (number_of_sweep, energy_step)
# Now, to group array of flags data into this shape:
# (number_of_sweep, number_of_flags, energy_step)
# With this, each sweep has its quality flag.
quality_flags_data = np.stack(quality_flags_data, axis=1)

swp_flags = xr.DataArray(
np.array(quality_flags_data, dtype=np.uint8),
dims=["epoch", "flags", "energy"],
attrs=cdf_manager.get_variable_attributes("flags_default"),
)

# Quality flags of sweep data
flags = xr.DataArray(
np.arange(13),
name="flags",
dims=["flags"],
attrs=cdf_manager.get_variable_attributes("flags"),
)

flags_label = xr.DataArray(
flags_name,
name="flags_label",
dims=["flags_label"],
attrs=cdf_manager.get_variable_attributes("flags_label"),
)

# ===================================================================
# Step 3: Create xarray.Dataset
# ===================================================================

# epoch time. Should be same dimension as number of good sweeps
epoch_values = good_sweep_sci["epoch"].data.reshape(total_full_sweeps, 12)[:, 0]

# Load the CDF attributes
cdf_manager = ImapCdfAttributes()
cdf_manager.add_instrument_global_attrs("swapi")
cdf_manager.load_variable_attributes("imap_swapi_variable_attrs.yaml")

epoch_time = xr.DataArray(
epoch_values,
name="epoch",
Expand Down Expand Up @@ -497,7 +585,13 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
l1_global_attrs["Apid"] = f"{sci_dataset['pkt_apid'].data[0]}"

dataset = xr.Dataset(
coords={"epoch": epoch_time, "energy": energy, "energy_label": energy_label},
coords={
"epoch": epoch_time,
"energy": energy,
"energy_label": energy_label,
"flags": flags,
"flags_label": flags_label,
},
attrs=l1_global_attrs,
)

Expand All @@ -517,42 +611,44 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
attrs=cdf_manager.get_variable_attributes("coin_counts"),
)

# L1 quality flags
# TODO: Should these be kept in raw format rather than derived into strings?
dataset["swp_pcem_flags"] = xr.DataArray(
np.array(pcem_compression_flags, dtype=np.uint8),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("pcem_flags"),
)
dataset["swp_scem_flags"] = xr.DataArray(
np.array(scem_compression_flags, dtype=np.uint8),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("scem_flags"),
)
dataset["swp_coin_flags"] = xr.DataArray(
np.array(coin_compression_flags, dtype=np.uint8),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("coin_flags"),
)

# Add quality flags to the dataset
dataset["swp_flags"] = swp_flags
# ===================================================================
# Step 4: Calculate uncertainty
# ===================================================================
# Uncertainty in counts formula:
# Uncertainty is quantified for the PCEM, SCEM, and COIN counts.
# The Poisson contribution is
# uncertainty = sqrt(count)
dataset["swp_pcem_err"] = xr.DataArray(

# TODO: fill in with actual formula once SWAPI provides it.
# Right now, we are using sqrt(count) as a placeholder
dataset["swp_pcem_err_plus"] = xr.DataArray(
np.sqrt(swp_pcem_counts),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("pcem_uncertainty"),
)
dataset["swp_scem_err"] = xr.DataArray(
dataset["swp_pcem_err_minus"] = xr.DataArray(
np.sqrt(swp_pcem_counts),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("pcem_uncertainty"),
)
dataset["swp_scem_err_plus"] = xr.DataArray(
np.sqrt(swp_scem_counts),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("scem_uncertainty"),
)
dataset["swp_scem_err_minus"] = xr.DataArray(
np.sqrt(swp_scem_counts),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("scem_uncertainty"),
)
dataset["swp_coin_err"] = xr.DataArray(
dataset["swp_coin_err_plus"] = xr.DataArray(
np.sqrt(swp_coin_counts),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("coin_uncertainty"),
)
dataset["swp_coin_err_minus"] = xr.DataArray(
np.sqrt(swp_coin_counts),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("coin_uncertainty"),
Expand Down Expand Up @@ -587,14 +683,17 @@ def swapi_l1(file_path: str, data_version: str) -> xr.Dataset:
file_path, xtce_definition, use_derived_value=False
)
processed_data = []

for apid, ds_data in datasets.items():
# Right now, we only process SWP_HK and SWP_SCI
# other packets are not process in this processing pipeline
# If appId is science, then the file should contain all data of science appId

if apid == SWAPIAPID.SWP_SCI.value:
data = process_swapi_science(ds_data, data_version)
processed_data.append(data)
sci_dataset = process_swapi_science(
ds_data, datasets[SWAPIAPID.SWP_HK], data_version
)
processed_data.append(sci_dataset)
if apid == SWAPIAPID.SWP_HK.value:
# Add HK datalevel attrs
# TODO: ask SWAPI if we need to process HK if we can use WebPODA
Expand Down
Loading

0 comments on commit 9c8c457

Please sign in to comment.