Adding quality flags and uncertainty updates

IMAP-Science-Operations-Center · Aug 29, 2024 · 9c8c457 · 9c8c457
1 parent 3e71692
commit 9c8c457
Show file tree

Hide file tree

Showing 7 changed files with 212 additions and 76 deletions.
diff --git a/imap_processing/cdf/config/imap_swapi_global_cdf_attrs.yaml b/imap_processing/cdf/config/imap_swapi_global_cdf_attrs.yaml
@@ -1,5 +1,5 @@
 instrument_base: &instrument_base
-  Descriptor: SWAPI>The Solar Wind and Pickup Ion
+  Descriptor: SWAPI>Solar Wind and Pickup Ion
   TEXT: >
     The Solar Wind and Pickup Ion (SWAPI) instrument measures several different
     elements of the solar wind, including hydrogen (H) and helium (He) ions,
@@ -15,8 +15,8 @@ imap_swapi_l1_sci:
   # NOTE: Right now, this Data_level is required to produce valid CDF
   Data_level: "1"
   Data_type: L1_SCI>Level-1 Science data in 1 minute resolution
-  Logical_source: imap_swapi_l1_sci-1min
-  Logical_source_description: SWAPI Instrument Level-1 Science Data in 1 minute resolution
+  Logical_source: imap_swapi_l1_sci
+  Logical_source_description: SWAPI Instrument Level-1 Science Data
 
 imap_swapi_l1_hk:
   <<: *instrument_base
@@ -29,5 +29,5 @@ imap_swapi_l2_sci:
   <<: *instrument_base
   Data_level: "2"
   Data_type: L2_SCI>Level-2 Science data in 1 minute resolution
-  Logical_source: imap_swapi_l2_sci-1min
+  Logical_source: imap_swapi_l2_sci
   Logical_source_description: SWAPI Instrument Level-1 Science Data in 1 minute resolution
diff --git a/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml b/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml
@@ -11,13 +11,31 @@ energy:
   VALIDMIN: 0
   VAR_TYPE: support_data
 
+flags:
+  CATDESC: Data quality flags
+  FIELDNAM: Quality Flags
+  FILLVAL: -9223370000000000000
+  FORMAT: I2
+  LABLAXIS: Flags
+  SCALE_TYP: linear
+  UNITS: " "
+  VALIDMAX: 1
+  VALIDMIN: 0
+  VAR_TYPE: support_data
+
 # <=== LABL_PTR_i Attributes ===>
 energy_label:
   CATDESC: Energy step id in lookup table
   FIELDNAM: Energy Step
   FORMAT: A2
   VAR_TYPE: metadata
 
+flags_label:
+  CATDESC: Data quality flags
+  FIELDNAM: Quality Flags
+  FORMAT: A20
+  VAR_TYPE: metadata
+
 # <=== Data Variables ===>
 counts_default: &counts_default
   DEPEND_0: epoch
@@ -32,18 +50,26 @@ counts_default: &counts_default
   VAR_TYPE: data
   SCALETYP: linear
 
-compression_default: &compression_default
+flags_default:
+  CATDESC: Data quality flags.
+  FIELDNAM: Quality Flag
+  LABLAXIS: Flags
   DEPEND_0: epoch
-  DEPEND_1: energy
+  DEPEND_1: flags
+  DEPEND_2: energy
   DISPLAY_TYPE: spectrogram
-  LABL_PTR_1: energy_label
+  LABL_PTR_1: flags_label
+  LABL_PTR_2: energy_label
   FILLVAL: 4294967295
   FORMAT: I1
   UNITS: ' '
   VALIDMIN: 0
   VALIDMAX: 1
   VAR_TYPE: data
   SCALETYP: linear
+  VAR_NOTES: >
+    There are 13 flags in total, first three flags are from science packets
+    and remaining 10 flags are from housekeeping packets.
 
 uncertainty_default: &uncertainty_default
   DEPEND_0: epoch
@@ -89,24 +115,6 @@ coin_counts:
   FIELDNAM: Coincidence CEM Counts
   LABLAXIS: coin_cnts
 
-pcem_flags:
-  <<: *compression_default
-  CATDESC: Primary Channel Electron Multiplier (CEM) compression flag
-  FIELDNAM: Primary CEM Flag
-  LABLAXIS: pcem_flag
-
-scem_flags:
-  <<: *compression_default
-  CATDESC: Secondary Channel Electron Multiplier (CEM) compression flag
-  FIELDNAM: Secondary CEM Flag
-  LABLAXIS: scem_flag
-
-coin_flags:
-  <<: *compression_default
-  CATDESC: Coincidence compression flag
-  FIELDNAM: Coincidence Flag
-  LABLAXIS: coin_flag
-
 pcem_uncertainty:
   <<: *uncertainty_default
   CATDESC: Primary Channel Electron Multiplier (CEM) uncertainty

diff --git a/imap_processing/swapi/l1/swapi_l1.py b/imap_processing/swapi/l1/swapi_l1.py
@@ -111,13 +111,15 @@ def decompress_count(
     # If data is compressed, decompress it
     compressed_indices = compression_flag == 1
     new_count[compressed_indices] *= 16
-    # TODO: add data type check here.
+
     # If the data was compressed and the count was 0xFFFF, mark it as an overflow
     if np.any(count_data < 0):
         raise ValueError(
             "Count data type must be unsigned int and should not contain negative value"
         )
-    new_count[compressed_indices & (count_data == 0xFFFF)] = -1
+
+    # SWAPI suggested using big value to indicate overflow.
+    new_count[compressed_indices & (count_data == 0xFFFF)] = np.iinfo(np.int32).max
     return new_count
 
 
@@ -405,14 +407,18 @@ def process_sweep_data(full_sweep_sci: xr.Dataset, cem_prefix: str) -> xr.Datase
     return all_cem_data
 
 
-def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Dataset:
+def process_swapi_science(
+    sci_dataset: xr.Dataset, hk_dataset: xr.Dataset, data_version: str
+) -> xr.Dataset:
     """
     Will process SWAPI science data and create CDF file.
 
     Parameters
     ----------
     sci_dataset : xarray.Dataset
         L0 data.
+    hk_dataset : xarray.Dataset
+        Housekeeping data.
     data_version : str
         Version of the data product being created.
 
@@ -421,6 +427,11 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
     dataset : xarray.Dataset
         Dataset.
     """
+    # pylint: disable=too-many-statements
+    # error:
+    # imap_processing/swapi/l1/swapi_l1.py:412:5: PLR0915
+    # Too many statements (51 > 50)
+
     # ====================================================
     # Step 1: Filter full cycle data
     # ====================================================
@@ -454,18 +465,95 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
     swp_scem_counts = decompress_count(raw_scem_count, scem_compression_flags)
     swp_coin_counts = decompress_count(raw_coin_count, coin_compression_flags)
 
+    # ====================================================
+    # Load the CDF attributes
+    # ====================================================
+    cdf_manager = ImapCdfAttributes()
+    cdf_manager.add_instrument_global_attrs("swapi")
+    cdf_manager.load_variable_attributes("imap_swapi_variable_attrs.yaml")
+
+    # ===================================================================
+    # Quality flags
+    # ===================================================================
+    flags_name = [
+        "swp_pcem_comp",
+        "swp_scem_comp",
+        "swp_coin_comp",
+        "ovr_t_st",
+        "und_t_st",
+        "pcem_cnt_st",
+        "scem_cnt_st",
+        "pcem_v_st",
+        "pcem_i_st",
+        "pcem_int_st",
+        "scem_v_st",
+        "scem_i_st",
+        "scem_int_st",
+    ]
+    quality_flags_data = []
+    # Add science data quality flags
+    quality_flags_data.append(pcem_compression_flags)
+    quality_flags_data.append(scem_compression_flags)
+    quality_flags_data.append(coin_compression_flags)
+
+    # Add housekeeping-derived quality flags
+    # --------------------------------------
+    # Get times of good and full sweep data from science dataset.
+    # Then use these times to get housekeeping data.
+    # TODO: how to handle time delta between SWAPI HK and SWAPI SCI packets?
+    # For now, we assume that SHCOARSE in the SWAPI HK and SWAPI SCI packets
+    # are exactly equal at any given point, no significant time delta.
+    good_sweep_times = good_sweep_sci["epoch"].data
+    good_sweep_hk_data = hk_dataset.sel({"epoch": good_sweep_times})
+
+    # Since there will be one SWAPI HK packet for each SWAPI SCI packets, as
+    # both are recorded at 1 Hz(1 second), we can use this information to set
+    # quality flag for each science packet's data. Each packet contains
+    # information of one sequence data. Each sequence's
+    # PCEM_CNT0, PCEM_CNT1, PCEM_CNT2, PCEM_CNT3, PCEM_CNT4,
+    # PCEM_CNT5 data is coming from the same sciene packet and therefore
+    # it's going to share the same HK quality flag. That's why HK quality
+    # flag is repeated 6 times for each sequence(aka packet).
+    for flag_name in flags_name[3:]:
+        quality_flags_data.append(
+            np.repeat(good_sweep_hk_data[flag_name].data, 6).reshape(-1, 72)
+        )
+
+    # Before, individual quality flags data were in this shape:
+    #   (number_of_sweep, energy_step)
+    # Now, to group array of flags data into this shape:
+    #   (number_of_sweep, number_of_flags, energy_step)
+    # With this, each sweep has its quality flag.
+    quality_flags_data = np.stack(quality_flags_data, axis=1)
+
+    swp_flags = xr.DataArray(
+        np.array(quality_flags_data, dtype=np.uint8),
+        dims=["epoch", "flags", "energy"],
+        attrs=cdf_manager.get_variable_attributes("flags_default"),
+    )
+
+    # Quality flags of sweep data
+    flags = xr.DataArray(
+        np.arange(13),
+        name="flags",
+        dims=["flags"],
+        attrs=cdf_manager.get_variable_attributes("flags"),
+    )
+
+    flags_label = xr.DataArray(
+        flags_name,
+        name="flags_label",
+        dims=["flags_label"],
+        attrs=cdf_manager.get_variable_attributes("flags_label"),
+    )
+
     # ===================================================================
     # Step 3: Create xarray.Dataset
     # ===================================================================
 
     # epoch time. Should be same dimension as number of good sweeps
     epoch_values = good_sweep_sci["epoch"].data.reshape(total_full_sweeps, 12)[:, 0]
 
-    # Load the CDF attributes
-    cdf_manager = ImapCdfAttributes()
-    cdf_manager.add_instrument_global_attrs("swapi")
-    cdf_manager.load_variable_attributes("imap_swapi_variable_attrs.yaml")
-
     epoch_time = xr.DataArray(
         epoch_values,
         name="epoch",
@@ -497,7 +585,13 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
     l1_global_attrs["Apid"] = f"{sci_dataset['pkt_apid'].data[0]}"
 
     dataset = xr.Dataset(
-        coords={"epoch": epoch_time, "energy": energy, "energy_label": energy_label},
+        coords={
+            "epoch": epoch_time,
+            "energy": energy,
+            "energy_label": energy_label,
+            "flags": flags,
+            "flags_label": flags_label,
+        },
         attrs=l1_global_attrs,
     )
 
@@ -517,42 +611,44 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data
         attrs=cdf_manager.get_variable_attributes("coin_counts"),
     )
 
-    # L1 quality flags
-    # TODO: Should these be kept in raw format rather than derived into strings?
-    dataset["swp_pcem_flags"] = xr.DataArray(
-        np.array(pcem_compression_flags, dtype=np.uint8),
-        dims=["epoch", "energy"],
-        attrs=cdf_manager.get_variable_attributes("pcem_flags"),
-    )
-    dataset["swp_scem_flags"] = xr.DataArray(
-        np.array(scem_compression_flags, dtype=np.uint8),
-        dims=["epoch", "energy"],
-        attrs=cdf_manager.get_variable_attributes("scem_flags"),
-    )
-    dataset["swp_coin_flags"] = xr.DataArray(
-        np.array(coin_compression_flags, dtype=np.uint8),
-        dims=["epoch", "energy"],
-        attrs=cdf_manager.get_variable_attributes("coin_flags"),
-    )
-
+    # Add quality flags to the dataset
+    dataset["swp_flags"] = swp_flags
     # ===================================================================
     # Step 4: Calculate uncertainty
     # ===================================================================
     # Uncertainty in counts formula:
     # Uncertainty is quantified for the PCEM, SCEM, and COIN counts.
     # The Poisson contribution is
     #   uncertainty = sqrt(count)
-    dataset["swp_pcem_err"] = xr.DataArray(
+
+    # TODO: fill in with actual formula once SWAPI provides it.
+    # Right now, we are using sqrt(count) as a placeholder
+    dataset["swp_pcem_err_plus"] = xr.DataArray(
         np.sqrt(swp_pcem_counts),
         dims=["epoch", "energy"],
         attrs=cdf_manager.get_variable_attributes("pcem_uncertainty"),
     )
-    dataset["swp_scem_err"] = xr.DataArray(
+    dataset["swp_pcem_err_minus"] = xr.DataArray(
+        np.sqrt(swp_pcem_counts),
+        dims=["epoch", "energy"],
+        attrs=cdf_manager.get_variable_attributes("pcem_uncertainty"),
+    )
+    dataset["swp_scem_err_plus"] = xr.DataArray(
+        np.sqrt(swp_scem_counts),
+        dims=["epoch", "energy"],
+        attrs=cdf_manager.get_variable_attributes("scem_uncertainty"),
+    )
+    dataset["swp_scem_err_minus"] = xr.DataArray(
         np.sqrt(swp_scem_counts),
         dims=["epoch", "energy"],
         attrs=cdf_manager.get_variable_attributes("scem_uncertainty"),
     )
-    dataset["swp_coin_err"] = xr.DataArray(
+    dataset["swp_coin_err_plus"] = xr.DataArray(
+        np.sqrt(swp_coin_counts),
+        dims=["epoch", "energy"],
+        attrs=cdf_manager.get_variable_attributes("coin_uncertainty"),
+    )
+    dataset["swp_coin_err_minus"] = xr.DataArray(
         np.sqrt(swp_coin_counts),
         dims=["epoch", "energy"],
         attrs=cdf_manager.get_variable_attributes("coin_uncertainty"),
@@ -587,14 +683,17 @@ def swapi_l1(file_path: str, data_version: str) -> xr.Dataset:
         file_path, xtce_definition, use_derived_value=False
     )
     processed_data = []
+
     for apid, ds_data in datasets.items():
         # Right now, we only process SWP_HK and SWP_SCI
         # other packets are not process in this processing pipeline
         # If appId is science, then the file should contain all data of science appId
 
         if apid == SWAPIAPID.SWP_SCI.value:
-            data = process_swapi_science(ds_data, data_version)
-            processed_data.append(data)
+            sci_dataset = process_swapi_science(
+                ds_data, datasets[SWAPIAPID.SWP_HK], data_version
+            )
+            processed_data.append(sci_dataset)
         if apid == SWAPIAPID.SWP_HK.value:
             # Add HK datalevel attrs
             # TODO: ask SWAPI if we need to process HK if we can use WebPODA