Skip to content

Commit

Permalink
SWAPI Quality Flag (#805)
Browse files Browse the repository at this point in the history
Adding quality flags and uncertainty updates
  • Loading branch information
tech3371 authored Sep 6, 2024
1 parent 8b7d5e8 commit 46bb8fd
Show file tree
Hide file tree
Showing 8 changed files with 194 additions and 76 deletions.
8 changes: 4 additions & 4 deletions imap_processing/cdf/config/imap_swapi_global_cdf_attrs.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
instrument_base: &instrument_base
Descriptor: SWAPI>The Solar Wind and Pickup Ion
Descriptor: SWAPI>Solar Wind and Pickup Ion
TEXT: >
The Solar Wind and Pickup Ion (SWAPI) instrument measures several different
elements of the solar wind, including hydrogen (H) and helium (He) ions,
Expand All @@ -15,8 +15,8 @@ imap_swapi_l1_sci:
# NOTE: Right now, this Data_level is required to produce valid CDF
Data_level: "1"
Data_type: L1_SCI>Level-1 Science data in 1 minute resolution
Logical_source: imap_swapi_l1_sci-1min
Logical_source_description: SWAPI Instrument Level-1 Science Data in 1 minute resolution
Logical_source: imap_swapi_l1_sci
Logical_source_description: SWAPI Instrument Level-1 Science Data

imap_swapi_l1_hk:
<<: *instrument_base
Expand All @@ -29,5 +29,5 @@ imap_swapi_l2_sci:
<<: *instrument_base
Data_level: "2"
Data_type: L2_SCI>Level-2 Science data in 1 minute resolution
Logical_source: imap_swapi_l2_sci-1min
Logical_source: imap_swapi_l2_sci
Logical_source_description: SWAPI Instrument Level-1 Science Data in 1 minute resolution
33 changes: 13 additions & 20 deletions imap_processing/cdf/config/imap_swapi_variable_attrs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ counts_default: &counts_default
VAR_TYPE: data
SCALETYP: linear

compression_default: &compression_default
flags_default:
CATDESC: Bitwise flag. There are 13 flags. See VAR_NOTES for more details.
FIELDNAM: Quality Flag
LABLAXIS: Flags
DEPEND_0: epoch
DEPEND_1: energy
DISPLAY_TYPE: spectrogram
Expand All @@ -41,9 +44,17 @@ compression_default: &compression_default
FORMAT: I1
UNITS: ' '
VALIDMIN: 0
VALIDMAX: 1
VALIDMAX: 32767
VAR_TYPE: data
SCALETYP: linear
VAR_NOTES: >
There are 13 flags in total, first three flags are from science packets
and remaining 10 flags are from housekeeping packets. Flags are stored
as bitwise flag. 32767 is the value when all 15-bits are 1s. Eg.
int('0111111111111111', 2). First two bits from right are saved to save defaults.
Then, the remaining 13 bits from right are used for 13 flags. The flags are
as follows, "OVR_T_ST", "UND_T_ST", "PCEM_CNT_ST", "SCEM_CNT_ST","PCEM_V_ST",
"PCEM_I_ST", "PCEM_INT_ST", "SCEM_V_ST", "SCEM_I_ST", "SCEM_INT_ST".
uncertainty_default: &uncertainty_default
DEPEND_0: epoch
Expand Down Expand Up @@ -89,24 +100,6 @@ coin_counts:
FIELDNAM: Coincidence CEM Counts
LABLAXIS: coin_cnts

pcem_flags:
<<: *compression_default
CATDESC: Primary Channel Electron Multiplier (CEM) compression flag
FIELDNAM: Primary CEM Flag
LABLAXIS: pcem_flag

scem_flags:
<<: *compression_default
CATDESC: Secondary Channel Electron Multiplier (CEM) compression flag
FIELDNAM: Secondary CEM Flag
LABLAXIS: scem_flag

coin_flags:
<<: *compression_default
CATDESC: Coincidence compression flag
FIELDNAM: Coincidence Flag
LABLAXIS: coin_flag

pcem_uncertainty:
<<: *uncertainty_default
CATDESC: Primary Channel Electron Multiplier (CEM) uncertainty
Expand Down
23 changes: 23 additions & 0 deletions imap_processing/quality_flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,26 @@ class HitFlags(
INF = CommonFlags.INF # bit 0
NEG = CommonFlags.NEG # bit 1
FLAG3 = 2**2 # bit 2


class SWAPIFlags(
FlagNameMixin,
):
"""SWAPI flags."""

NONE = CommonFlags.NONE
INF = CommonFlags.INF # bit 0
NEG = CommonFlags.NEG # bit 1
SWP_PCEM_COMP = 2**2 # bit 2
SWP_SCEM_COMP = 2**3 # bit 3
SWP_COIN_COMP = 2**4 # bit 4
OVR_T_ST = 2**5 # bit 5
UND_T_ST = 2**6 # bit 6
PCEM_CNT_ST = 2**7 # bit 7
SCEM_CNT_ST = 2**8 # bit 8
PCEM_V_ST = 2**9 # bit 9
PCEM_I_ST = 2**10 # bit 10
PCEM_INT_ST = 2**11 # bit 11
SCEM_V_ST = 2**12 # bit 12
SCEM_I_ST = 2**13 # bit 13
SCEM_INT_ST = 2**14 # bit 14
142 changes: 110 additions & 32 deletions imap_processing/swapi/l1/swapi_l1.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from imap_processing import imap_module_directory
from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
from imap_processing.quality_flags import SWAPIFlags
from imap_processing.swapi.swapi_utils import SWAPIAPID, SWAPIMODE
from imap_processing.utils import packet_file_to_datasets

Expand Down Expand Up @@ -111,13 +112,15 @@ def decompress_count(
# If data is compressed, decompress it
compressed_indices = compression_flag == 1
new_count[compressed_indices] *= 16
# TODO: add data type check here.

# If the data was compressed and the count was 0xFFFF, mark it as an overflow
if np.any(count_data < 0):
raise ValueError(
"Count data type must be unsigned int and should not contain negative value"
)
new_count[compressed_indices & (count_data == 0xFFFF)] = -1

# SWAPI suggested using big value to indicate overflow.
new_count[compressed_indices & (count_data == 0xFFFF)] = np.iinfo(np.int32).max
return new_count


Expand Down Expand Up @@ -405,14 +408,18 @@ def process_sweep_data(full_sweep_sci: xr.Dataset, cem_prefix: str) -> xr.Datase
return all_cem_data


def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Dataset:
def process_swapi_science(
sci_dataset: xr.Dataset, hk_dataset: xr.Dataset, data_version: str
) -> xr.Dataset:
"""
Will process SWAPI science data and create CDF file.
Parameters
----------
sci_dataset : xarray.Dataset
L0 data.
hk_dataset : xarray.Dataset
Housekeeping data.
data_version : str
Version of the data product being created.
Expand Down Expand Up @@ -454,18 +461,79 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
swp_scem_counts = decompress_count(raw_scem_count, scem_compression_flags)
swp_coin_counts = decompress_count(raw_coin_count, coin_compression_flags)

# ====================================================
# Load the CDF attributes
# ====================================================
cdf_manager = ImapCdfAttributes()
cdf_manager.add_instrument_global_attrs("swapi")
cdf_manager.load_variable_attributes("imap_swapi_variable_attrs.yaml")

# ===================================================================
# Quality flags
# ===================================================================
quality_flags_data = np.zeros((total_full_sweeps, 72), np.uint16)

# Add science data quality flags
quality_flags_data[pcem_compression_flags == 1] |= SWAPIFlags.SWP_PCEM_COMP
quality_flags_data[scem_compression_flags == 1] |= SWAPIFlags.SWP_SCEM_COMP
quality_flags_data[coin_compression_flags == 1] |= SWAPIFlags.SWP_COIN_COMP

# Add housekeeping-derived quality flags
# --------------------------------------
# Get times of good and full sweep data from science dataset.
# Then use these times to get housekeeping data. Something is wrong if
# there is no science data that matches exactly one housekeeping data. It
# should be one-to-one matching per SWAPI team, Jamie Rankin.
good_sweep_times = good_sweep_sci["epoch"].data
good_sweep_hk_data = hk_dataset.sel({"epoch": good_sweep_times})

# Since there is one SWAPI HK packet for each SWAPI SCI packet,
# and both are recorded at 1 Hz (1 packet per second),
# we can leverage this to set the quality flags for each science
# packet's data. Each SWAPI science packet represents
# one sequence of data, where the sequence includes measurements
# like PCEM_CNT0, PCEM_CNT1, PCEM_CNT2, PCEM_CNT3,
# PCEM_CNT4, and PCEM_CNT5. Because all these measurements come
# from the same science packet, they should share
# the same HK quality flag. This is why the HK quality flag is
# repeated 6 times, once for each measurement within
# the sequence (each packet corresponds to one sequence).

hk_flags_name = [
"OVR_T_ST",
"UND_T_ST",
"PCEM_CNT_ST",
"SCEM_CNT_ST",
"PCEM_V_ST",
"PCEM_I_ST",
"PCEM_INT_ST",
"SCEM_V_ST",
"SCEM_I_ST",
"SCEM_INT_ST",
]

for flag_name in hk_flags_name:
current_flag = np.repeat(good_sweep_hk_data[flag_name.lower()].data, 6).reshape(
-1, 72
)
# Use getattr to dynamically access the flag in SWAPIFlags class
flag_to_set = getattr(SWAPIFlags, flag_name)
# set the quality flag for each data
quality_flags_data[current_flag == 1] |= flag_to_set

swp_flags = xr.DataArray(
quality_flags_data,
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("flags_default"),
)

# ===================================================================
# Step 3: Create xarray.Dataset
# ===================================================================

# epoch time. Should be same dimension as number of good sweeps
epoch_values = good_sweep_sci["epoch"].data.reshape(total_full_sweeps, 12)[:, 0]

# Load the CDF attributes
cdf_manager = ImapCdfAttributes()
cdf_manager.add_instrument_global_attrs("swapi")
cdf_manager.load_variable_attributes("imap_swapi_variable_attrs.yaml")

epoch_time = xr.DataArray(
epoch_values,
name="epoch",
Expand Down Expand Up @@ -497,7 +565,11 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
l1_global_attrs["Apid"] = f"{sci_dataset['pkt_apid'].data[0]}"

dataset = xr.Dataset(
coords={"epoch": epoch_time, "energy": energy, "energy_label": energy_label},
coords={
"epoch": epoch_time,
"energy": energy,
"energy_label": energy_label,
},
attrs=l1_global_attrs,
)

Expand All @@ -517,42 +589,45 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
attrs=cdf_manager.get_variable_attributes("coin_counts"),
)

# L1 quality flags
# TODO: Should these be kept in raw format rather than derived into strings?
dataset["swp_pcem_flags"] = xr.DataArray(
np.array(pcem_compression_flags, dtype=np.uint8),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("pcem_flags"),
)
dataset["swp_scem_flags"] = xr.DataArray(
np.array(scem_compression_flags, dtype=np.uint8),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("scem_flags"),
)
dataset["swp_coin_flags"] = xr.DataArray(
np.array(coin_compression_flags, dtype=np.uint8),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("coin_flags"),
)

# Add quality flags to the dataset
dataset["swp_flags"] = swp_flags
# ===================================================================
# Step 4: Calculate uncertainty
# ===================================================================
# Uncertainty in counts formula:
# Uncertainty is quantified for the PCEM, SCEM, and COIN counts.
# The Poisson contribution is
# uncertainty = sqrt(count)
dataset["swp_pcem_err"] = xr.DataArray(
# TODO:
# Above uncertaintly formula will change in the future.
# Replace it with actual formula once SWAPI provides it.
# Right now, we are using sqrt(count) as a placeholder
dataset["swp_pcem_err_plus"] = xr.DataArray(
np.sqrt(swp_pcem_counts),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("pcem_uncertainty"),
)
dataset["swp_pcem_err_minus"] = xr.DataArray(
np.sqrt(swp_pcem_counts),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("pcem_uncertainty"),
)
dataset["swp_scem_err"] = xr.DataArray(
dataset["swp_scem_err_plus"] = xr.DataArray(
np.sqrt(swp_scem_counts),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("scem_uncertainty"),
)
dataset["swp_scem_err_minus"] = xr.DataArray(
np.sqrt(swp_scem_counts),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("scem_uncertainty"),
)
dataset["swp_coin_err"] = xr.DataArray(
dataset["swp_coin_err_plus"] = xr.DataArray(
np.sqrt(swp_coin_counts),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("coin_uncertainty"),
)
dataset["swp_coin_err_minus"] = xr.DataArray(
np.sqrt(swp_coin_counts),
dims=["epoch", "energy"],
attrs=cdf_manager.get_variable_attributes("coin_uncertainty"),
Expand Down Expand Up @@ -587,14 +662,17 @@ def swapi_l1(file_path: str, data_version: str) -> xr.Dataset:
file_path, xtce_definition, use_derived_value=False
)
processed_data = []

for apid, ds_data in datasets.items():
# Right now, we only process SWP_HK and SWP_SCI
# other packets are not process in this processing pipeline
# If appId is science, then the file should contain all data of science appId

if apid == SWAPIAPID.SWP_SCI.value:
data = process_swapi_science(ds_data, data_version)
processed_data.append(data)
sci_dataset = process_swapi_science(
ds_data, datasets[SWAPIAPID.SWP_HK], data_version
)
processed_data.append(sci_dataset)
if apid == SWAPIAPID.SWP_HK.value:
# Add HK datalevel attrs
# TODO: ask SWAPI if we need to process HK if we can use WebPODA
Expand Down
28 changes: 19 additions & 9 deletions imap_processing/swapi/l2/swapi_l2.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@ def swapi_l2(l1_dataset: xr.Dataset, data_version: str) -> xr.Dataset:
"epoch",
"energy",
"energy_label",
"swp_pcem_flags",
"swp_scem_flags",
"swp_coin_flags",
"swp_flags",
]
l2_dataset = l1_dataset[l1_data_keys]

Expand All @@ -76,17 +74,29 @@ def swapi_l2(l1_dataset: xr.Dataset, data_version: str) -> xr.Dataset:
l2_dataset["swp_coin_rate"].attrs = cdf_manager.get_variable_attributes("coin_rate")

# update uncertainty
l2_dataset["swp_pcem_unc"] = l1_dataset["swp_pcem_err"] / TIME_PER_BIN
l2_dataset["swp_scem_unc"] = l1_dataset["swp_scem_err"] / TIME_PER_BIN
l2_dataset["swp_coin_unc"] = l1_dataset["swp_coin_err"] / TIME_PER_BIN
l2_dataset["swp_pcem_unc_plus"] = l1_dataset["swp_pcem_err_plus"] / TIME_PER_BIN
l2_dataset["swp_pcem_unc_minus"] = l1_dataset["swp_pcem_err_minus"] / TIME_PER_BIN
l2_dataset["swp_scem_unc_plus"] = l1_dataset["swp_scem_err_plus"] / TIME_PER_BIN
l2_dataset["swp_scem_unc_minus"] = l1_dataset["swp_scem_err_minus"] / TIME_PER_BIN
l2_dataset["swp_coin_unc_plus"] = l1_dataset["swp_coin_err_plus"] / TIME_PER_BIN
l2_dataset["swp_coin_unc_minus"] = l1_dataset["swp_coin_err_minus"] / TIME_PER_BIN
# update attrs
l2_dataset["swp_pcem_unc"].attrs = cdf_manager.get_variable_attributes(
l2_dataset["swp_pcem_unc_plus"].attrs = cdf_manager.get_variable_attributes(
"pcem_uncertainty"
)
l2_dataset["swp_scem_unc"].attrs = cdf_manager.get_variable_attributes(
l2_dataset["swp_pcem_unc_minus"].attrs = cdf_manager.get_variable_attributes(
"pcem_uncertainty"
)
l2_dataset["swp_scem_unc_plus"].attrs = cdf_manager.get_variable_attributes(
"scem_uncertainty"
)
l2_dataset["swp_scem_unc_minus"].attrs = cdf_manager.get_variable_attributes(
"scem_uncertainty"
)
l2_dataset["swp_coin_unc"].attrs = cdf_manager.get_variable_attributes(
l2_dataset["swp_coin_unc_plus"].attrs = cdf_manager.get_variable_attributes(
"coin_uncertainty"
)
l2_dataset["swp_coin_unc_minus"].attrs = cdf_manager.get_variable_attributes(
"coin_uncertainty"
)

Expand Down
Loading

0 comments on commit 46bb8fd

Please sign in to comment.