SWAPI Quality Flag (#805)

Adding quality flags and uncertainty updates
IMAP-Science-Operations-Center · Sep 6, 2024 · 46bb8fd · 46bb8fd
1 parent 8b7d5e8
commit 46bb8fd
Show file tree

Hide file tree

Showing 8 changed files with 194 additions and 76 deletions.
diff --git a/imap_processing/cdf/config/imap_swapi_global_cdf_attrs.yaml b/imap_processing/cdf/config/imap_swapi_global_cdf_attrs.yaml
@@ -1,5 +1,5 @@
 instrument_base: &instrument_base
-  Descriptor: SWAPI>The Solar Wind and Pickup Ion
+  Descriptor: SWAPI>Solar Wind and Pickup Ion
   TEXT: >
     The Solar Wind and Pickup Ion (SWAPI) instrument measures several different
     elements of the solar wind, including hydrogen (H) and helium (He) ions,
@@ -15,8 +15,8 @@ imap_swapi_l1_sci:
   # NOTE: Right now, this Data_level is required to produce valid CDF
   Data_level: "1"
   Data_type: L1_SCI>Level-1 Science data in 1 minute resolution
-  Logical_source: imap_swapi_l1_sci-1min
-  Logical_source_description: SWAPI Instrument Level-1 Science Data in 1 minute resolution
+  Logical_source: imap_swapi_l1_sci
+  Logical_source_description: SWAPI Instrument Level-1 Science Data
 
 imap_swapi_l1_hk:
   <<: *instrument_base
@@ -29,5 +29,5 @@ imap_swapi_l2_sci:
   <<: *instrument_base
   Data_level: "2"
   Data_type: L2_SCI>Level-2 Science data in 1 minute resolution
-  Logical_source: imap_swapi_l2_sci-1min
+  Logical_source: imap_swapi_l2_sci
   Logical_source_description: SWAPI Instrument Level-1 Science Data in 1 minute resolution
diff --git a/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml b/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml
@@ -32,7 +32,10 @@ counts_default: &counts_default
   VAR_TYPE: data
   SCALETYP: linear
 
-compression_default: &compression_default
+flags_default:
+  CATDESC: Bitwise flag. There are 13 flags. See VAR_NOTES for more details.
+  FIELDNAM: Quality Flag
+  LABLAXIS: Flags
   DEPEND_0: epoch
   DEPEND_1: energy
   DISPLAY_TYPE: spectrogram
@@ -41,9 +44,17 @@ compression_default: &compression_default
   FORMAT: I1
   UNITS: ' '
   VALIDMIN: 0
-  VALIDMAX: 1
+  VALIDMAX: 32767
   VAR_TYPE: data
   SCALETYP: linear
+  VAR_NOTES: >
+    There are 13 flags in total, first three flags are from science packets
+    and remaining 10 flags are from housekeeping packets. Flags are stored
+    as bitwise flag. 32767 is the value when all 15-bits are 1s. Eg.
+    int('0111111111111111', 2). First two bits from right are saved to save defaults.
+    Then, the remaining 13 bits from right are used for 13 flags. The flags are
+    as follows, "OVR_T_ST", "UND_T_ST", "PCEM_CNT_ST", "SCEM_CNT_ST","PCEM_V_ST",
+    "PCEM_I_ST", "PCEM_INT_ST", "SCEM_V_ST", "SCEM_I_ST", "SCEM_INT_ST".
 
 uncertainty_default: &uncertainty_default
   DEPEND_0: epoch
@@ -89,24 +100,6 @@ coin_counts:
   FIELDNAM: Coincidence CEM Counts
   LABLAXIS: coin_cnts
 
-pcem_flags:
-  <<: *compression_default
-  CATDESC: Primary Channel Electron Multiplier (CEM) compression flag
-  FIELDNAM: Primary CEM Flag
-  LABLAXIS: pcem_flag
-
-scem_flags:
-  <<: *compression_default
-  CATDESC: Secondary Channel Electron Multiplier (CEM) compression flag
-  FIELDNAM: Secondary CEM Flag
-  LABLAXIS: scem_flag
-
-coin_flags:
-  <<: *compression_default
-  CATDESC: Coincidence compression flag
-  FIELDNAM: Coincidence Flag
-  LABLAXIS: coin_flag
-
 pcem_uncertainty:
   <<: *uncertainty_default
   CATDESC: Primary Channel Electron Multiplier (CEM) uncertainty

diff --git a/imap_processing/quality_flags.py b/imap_processing/quality_flags.py
@@ -66,3 +66,26 @@ class HitFlags(
     INF = CommonFlags.INF  # bit 0
     NEG = CommonFlags.NEG  # bit 1
     FLAG3 = 2**2  # bit 2
+
+
+class SWAPIFlags(
+    FlagNameMixin,
+):
+    """SWAPI flags."""
+
+    NONE = CommonFlags.NONE
+    INF = CommonFlags.INF  # bit 0
+    NEG = CommonFlags.NEG  # bit 1
+    SWP_PCEM_COMP = 2**2  # bit 2
+    SWP_SCEM_COMP = 2**3  # bit 3
+    SWP_COIN_COMP = 2**4  # bit 4
+    OVR_T_ST = 2**5  # bit 5
+    UND_T_ST = 2**6  # bit 6
+    PCEM_CNT_ST = 2**7  # bit 7
+    SCEM_CNT_ST = 2**8  # bit 8
+    PCEM_V_ST = 2**9  # bit 9
+    PCEM_I_ST = 2**10  # bit 10
+    PCEM_INT_ST = 2**11  # bit 11
+    SCEM_V_ST = 2**12  # bit 12
+    SCEM_I_ST = 2**13  # bit 13
+    SCEM_INT_ST = 2**14  # bit 14
diff --git a/imap_processing/swapi/l1/swapi_l1.py b/imap_processing/swapi/l1/swapi_l1.py
@@ -8,6 +8,7 @@
 
 from imap_processing import imap_module_directory
 from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
+from imap_processing.quality_flags import SWAPIFlags
 from imap_processing.swapi.swapi_utils import SWAPIAPID, SWAPIMODE
 from imap_processing.utils import packet_file_to_datasets
 
@@ -111,13 +112,15 @@ def decompress_count(
     # If data is compressed, decompress it
     compressed_indices = compression_flag == 1
     new_count[compressed_indices] *= 16
-    # TODO: add data type check here.
+
     # If the data was compressed and the count was 0xFFFF, mark it as an overflow
     if np.any(count_data < 0):
         raise ValueError(
             "Count data type must be unsigned int and should not contain negative value"
         )
-    new_count[compressed_indices & (count_data == 0xFFFF)] = -1
+
+    # SWAPI suggested using big value to indicate overflow.
+    new_count[compressed_indices & (count_data == 0xFFFF)] = np.iinfo(np.int32).max
     return new_count
 
 
@@ -405,14 +408,18 @@ def process_sweep_data(full_sweep_sci: xr.Dataset, cem_prefix: str) -> xr.Datase
     return all_cem_data
 
 
-def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Dataset:
+def process_swapi_science(
+    sci_dataset: xr.Dataset, hk_dataset: xr.Dataset, data_version: str
+) -> xr.Dataset:
     """
     Will process SWAPI science data and create CDF file.
 
     Parameters
     ----------
     sci_dataset : xarray.Dataset
         L0 data.
+    hk_dataset : xarray.Dataset
+        Housekeeping data.
     data_version : str
         Version of the data product being created.
 
@@ -454,18 +461,79 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
     swp_scem_counts = decompress_count(raw_scem_count, scem_compression_flags)
     swp_coin_counts = decompress_count(raw_coin_count, coin_compression_flags)
 
+    # ====================================================
+    # Load the CDF attributes
+    # ====================================================
+    cdf_manager = ImapCdfAttributes()
+    cdf_manager.add_instrument_global_attrs("swapi")
+    cdf_manager.load_variable_attributes("imap_swapi_variable_attrs.yaml")
+
+    # ===================================================================
+    # Quality flags
+    # ===================================================================
+    quality_flags_data = np.zeros((total_full_sweeps, 72), np.uint16)
+
+    # Add science data quality flags
+    quality_flags_data[pcem_compression_flags == 1] |= SWAPIFlags.SWP_PCEM_COMP
+    quality_flags_data[scem_compression_flags == 1] |= SWAPIFlags.SWP_SCEM_COMP
+    quality_flags_data[coin_compression_flags == 1] |= SWAPIFlags.SWP_COIN_COMP
+
+    # Add housekeeping-derived quality flags
+    # --------------------------------------
+    # Get times of good and full sweep data from science dataset.
+    # Then use these times to get housekeeping data. Something is wrong if
+    # there is no science data that matches exactly one housekeeping data. It
+    # should be one-to-one matching per SWAPI team, Jamie Rankin.
+    good_sweep_times = good_sweep_sci["epoch"].data
+    good_sweep_hk_data = hk_dataset.sel({"epoch": good_sweep_times})
+
+    # Since there is one SWAPI HK packet for each SWAPI SCI packet,
+    # and both are recorded at 1 Hz (1 packet per second),
+    # we can leverage this to set the quality flags for each science
+    # packet's data. Each SWAPI science packet represents
+    # one sequence of data, where the sequence includes measurements
+    # like PCEM_CNT0, PCEM_CNT1, PCEM_CNT2, PCEM_CNT3,
+    # PCEM_CNT4, and PCEM_CNT5. Because all these measurements come
+    # from the same science packet, they should share
+    # the same HK quality flag. This is why the HK quality flag is
+    # repeated 6 times, once for each measurement within
+    # the sequence (each packet corresponds to one sequence).
+
+    hk_flags_name = [
+        "OVR_T_ST",
+        "UND_T_ST",
+        "PCEM_CNT_ST",
+        "SCEM_CNT_ST",
+        "PCEM_V_ST",
+        "PCEM_I_ST",
+        "PCEM_INT_ST",
+        "SCEM_V_ST",
+        "SCEM_I_ST",
+        "SCEM_INT_ST",
+    ]
+
+    for flag_name in hk_flags_name:
+        current_flag = np.repeat(good_sweep_hk_data[flag_name.lower()].data, 6).reshape(
+            -1, 72
+        )
+        # Use getattr to dynamically access the flag in SWAPIFlags class
+        flag_to_set = getattr(SWAPIFlags, flag_name)
+        # set the quality flag for each data
+        quality_flags_data[current_flag == 1] |= flag_to_set
+
+    swp_flags = xr.DataArray(
+        quality_flags_data,
+        dims=["epoch", "energy"],
+        attrs=cdf_manager.get_variable_attributes("flags_default"),
+    )
+
     # ===================================================================
     # Step 3: Create xarray.Dataset
     # ===================================================================
 
     # epoch time. Should be same dimension as number of good sweeps
     epoch_values = good_sweep_sci["epoch"].data.reshape(total_full_sweeps, 12)[:, 0]
 
-    # Load the CDF attributes
-    cdf_manager = ImapCdfAttributes()
-    cdf_manager.add_instrument_global_attrs("swapi")
-    cdf_manager.load_variable_attributes("imap_swapi_variable_attrs.yaml")
-
     epoch_time = xr.DataArray(
         epoch_values,
         name="epoch",
@@ -497,7 +565,11 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
     l1_global_attrs["Apid"] = f"{sci_dataset['pkt_apid'].data[0]}"
 
     dataset = xr.Dataset(
-        coords={"epoch": epoch_time, "energy": energy, "energy_label": energy_label},
+        coords={
+            "epoch": epoch_time,
+            "energy": energy,
+            "energy_label": energy_label,
+        },
         attrs=l1_global_attrs,
     )
 
@@ -517,42 +589,45 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
         attrs=cdf_manager.get_variable_attributes("coin_counts"),
     )
 
-    # L1 quality flags
-    # TODO: Should these be kept in raw format rather than derived into strings?
-    dataset["swp_pcem_flags"] = xr.DataArray(
-        np.array(pcem_compression_flags, dtype=np.uint8),
-        dims=["epoch", "energy"],
-        attrs=cdf_manager.get_variable_attributes("pcem_flags"),
-    )
-    dataset["swp_scem_flags"] = xr.DataArray(
-        np.array(scem_compression_flags, dtype=np.uint8),
-        dims=["epoch", "energy"],
-        attrs=cdf_manager.get_variable_attributes("scem_flags"),
-    )
-    dataset["swp_coin_flags"] = xr.DataArray(
-        np.array(coin_compression_flags, dtype=np.uint8),
-        dims=["epoch", "energy"],
-        attrs=cdf_manager.get_variable_attributes("coin_flags"),
-    )
-
+    # Add quality flags to the dataset
+    dataset["swp_flags"] = swp_flags
     # ===================================================================
     # Step 4: Calculate uncertainty
     # ===================================================================
     # Uncertainty in counts formula:
     # Uncertainty is quantified for the PCEM, SCEM, and COIN counts.
     # The Poisson contribution is
     #   uncertainty = sqrt(count)
-    dataset["swp_pcem_err"] = xr.DataArray(
+    # TODO:
+    # Above uncertaintly formula will change in the future.
+    # Replace it with actual formula once SWAPI provides it.
+    # Right now, we are using sqrt(count) as a placeholder
+    dataset["swp_pcem_err_plus"] = xr.DataArray(
+        np.sqrt(swp_pcem_counts),
+        dims=["epoch", "energy"],
+        attrs=cdf_manager.get_variable_attributes("pcem_uncertainty"),
+    )
+    dataset["swp_pcem_err_minus"] = xr.DataArray(
         np.sqrt(swp_pcem_counts),
         dims=["epoch", "energy"],
         attrs=cdf_manager.get_variable_attributes("pcem_uncertainty"),
     )
-    dataset["swp_scem_err"] = xr.DataArray(
+    dataset["swp_scem_err_plus"] = xr.DataArray(
+        np.sqrt(swp_scem_counts),
+        dims=["epoch", "energy"],
+        attrs=cdf_manager.get_variable_attributes("scem_uncertainty"),
+    )
+    dataset["swp_scem_err_minus"] = xr.DataArray(
         np.sqrt(swp_scem_counts),
         dims=["epoch", "energy"],
         attrs=cdf_manager.get_variable_attributes("scem_uncertainty"),
     )
-    dataset["swp_coin_err"] = xr.DataArray(
+    dataset["swp_coin_err_plus"] = xr.DataArray(
+        np.sqrt(swp_coin_counts),
+        dims=["epoch", "energy"],
+        attrs=cdf_manager.get_variable_attributes("coin_uncertainty"),
+    )
+    dataset["swp_coin_err_minus"] = xr.DataArray(
         np.sqrt(swp_coin_counts),
         dims=["epoch", "energy"],
         attrs=cdf_manager.get_variable_attributes("coin_uncertainty"),
@@ -587,14 +662,17 @@ def swapi_l1(file_path: str, data_version: str) -> xr.Dataset:
         file_path, xtce_definition, use_derived_value=False
     )
     processed_data = []
+
     for apid, ds_data in datasets.items():
         # Right now, we only process SWP_HK and SWP_SCI
         # other packets are not process in this processing pipeline
         # If appId is science, then the file should contain all data of science appId
 
         if apid == SWAPIAPID.SWP_SCI.value:
-            data = process_swapi_science(ds_data, data_version)
-            processed_data.append(data)
+            sci_dataset = process_swapi_science(
+                ds_data, datasets[SWAPIAPID.SWP_HK], data_version
+            )
+            processed_data.append(sci_dataset)
         if apid == SWAPIAPID.SWP_HK.value:
             # Add HK datalevel attrs
             # TODO: ask SWAPI if we need to process HK if we can use WebPODA

diff --git a/imap_processing/swapi/l2/swapi_l2.py b/imap_processing/swapi/l2/swapi_l2.py
@@ -50,9 +50,7 @@ def swapi_l2(l1_dataset: xr.Dataset, data_version: str) -> xr.Dataset:
         "epoch",
         "energy",
         "energy_label",
-        "swp_pcem_flags",
-        "swp_scem_flags",
-        "swp_coin_flags",
+        "swp_flags",
     ]
     l2_dataset = l1_dataset[l1_data_keys]
 
@@ -76,17 +74,29 @@ def swapi_l2(l1_dataset: xr.Dataset, data_version: str) -> xr.Dataset:
     l2_dataset["swp_coin_rate"].attrs = cdf_manager.get_variable_attributes("coin_rate")
 
     # update uncertainty
-    l2_dataset["swp_pcem_unc"] = l1_dataset["swp_pcem_err"] / TIME_PER_BIN
-    l2_dataset["swp_scem_unc"] = l1_dataset["swp_scem_err"] / TIME_PER_BIN
-    l2_dataset["swp_coin_unc"] = l1_dataset["swp_coin_err"] / TIME_PER_BIN
+    l2_dataset["swp_pcem_unc_plus"] = l1_dataset["swp_pcem_err_plus"] / TIME_PER_BIN
+    l2_dataset["swp_pcem_unc_minus"] = l1_dataset["swp_pcem_err_minus"] / TIME_PER_BIN
+    l2_dataset["swp_scem_unc_plus"] = l1_dataset["swp_scem_err_plus"] / TIME_PER_BIN
+    l2_dataset["swp_scem_unc_minus"] = l1_dataset["swp_scem_err_minus"] / TIME_PER_BIN
+    l2_dataset["swp_coin_unc_plus"] = l1_dataset["swp_coin_err_plus"] / TIME_PER_BIN
+    l2_dataset["swp_coin_unc_minus"] = l1_dataset["swp_coin_err_minus"] / TIME_PER_BIN
     # update attrs
-    l2_dataset["swp_pcem_unc"].attrs = cdf_manager.get_variable_attributes(
+    l2_dataset["swp_pcem_unc_plus"].attrs = cdf_manager.get_variable_attributes(
         "pcem_uncertainty"
     )
-    l2_dataset["swp_scem_unc"].attrs = cdf_manager.get_variable_attributes(
+    l2_dataset["swp_pcem_unc_minus"].attrs = cdf_manager.get_variable_attributes(
+        "pcem_uncertainty"
+    )
+    l2_dataset["swp_scem_unc_plus"].attrs = cdf_manager.get_variable_attributes(
+        "scem_uncertainty"
+    )
+    l2_dataset["swp_scem_unc_minus"].attrs = cdf_manager.get_variable_attributes(
         "scem_uncertainty"
     )
-    l2_dataset["swp_coin_unc"].attrs = cdf_manager.get_variable_attributes(
+    l2_dataset["swp_coin_unc_plus"].attrs = cdf_manager.get_variable_attributes(
+        "coin_uncertainty"
+    )
+    l2_dataset["swp_coin_unc_minus"].attrs = cdf_manager.get_variable_attributes(
         "coin_uncertainty"
     )