From 76966adebe9eca31b592ac5bdeb869150ac72ef6 Mon Sep 17 00:00:00 2001 From: Tenzin Choedon Date: Fri, 9 Aug 2024 14:09:07 -0600 Subject: [PATCH] Adding quality flags and uncertainty updates --- .../config/imap_swapi_global_cdf_attrs.yaml | 8 +- .../cdf/config/imap_swapi_variable_attrs.yaml | 50 +++--- imap_processing/swapi/l1/swapi_l1.py | 161 ++++++++++++++---- imap_processing/swapi/l2/swapi_l2.py | 30 +++- imap_processing/tests/swapi/test_swapi_l1.py | 31 +++- imap_processing/tests/swapi/test_swapi_l2.py | 4 +- pyproject.toml | 2 +- 7 files changed, 211 insertions(+), 75 deletions(-) diff --git a/imap_processing/cdf/config/imap_swapi_global_cdf_attrs.yaml b/imap_processing/cdf/config/imap_swapi_global_cdf_attrs.yaml index ed8ce687e..7665a6885 100644 --- a/imap_processing/cdf/config/imap_swapi_global_cdf_attrs.yaml +++ b/imap_processing/cdf/config/imap_swapi_global_cdf_attrs.yaml @@ -1,5 +1,5 @@ instrument_base: &instrument_base - Descriptor: SWAPI>The Solar Wind and Pickup Ion + Descriptor: SWAPI>Solar Wind and Pickup Ion TEXT: > The Solar Wind and Pickup Ion (SWAPI) instrument measures several different elements of the solar wind, including hydrogen (H) and helium (He) ions, @@ -15,8 +15,8 @@ imap_swapi_l1_sci: # NOTE: Right now, this Data_level is required to produce valid CDF Data_level: "1" Data_type: L1_SCI>Level-1 Science data in 1 minute resolution - Logical_source: imap_swapi_l1_sci-1min - Logical_source_description: SWAPI Instrument Level-1 Science Data in 1 minute resolution + Logical_source: imap_swapi_l1_sci + Logical_source_description: SWAPI Instrument Level-1 Science Data imap_swapi_l1_hk: <<: *instrument_base @@ -29,5 +29,5 @@ imap_swapi_l2_sci: <<: *instrument_base Data_level: "2" Data_type: L2_SCI>Level-2 Science data in 1 minute resolution - Logical_source: imap_swapi_l2_sci-1min + Logical_source: imap_swapi_l2_sci Logical_source_description: SWAPI Instrument Level-1 Science Data in 1 minute resolution diff --git a/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml b/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml index 7746a5e86..07477a218 100644 --- a/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml @@ -11,6 +11,18 @@ energy: VALIDMIN: 0 VAR_TYPE: support_data +flags: + CATDESC: Data quality flags + FIELDNAM: Quality Flags + FILLVAL: -9223370000000000000 + FORMAT: I2 + LABLAXIS: Flags + SCALE_TYP: linear + UNITS: " " + VALIDMAX: 1 + VALIDMIN: 0 + VAR_TYPE: support_data + # <=== LABL_PTR_i Attributes ===> energy_label: CATDESC: Energy step id in lookup table @@ -18,6 +30,12 @@ energy_label: FORMAT: A2 VAR_TYPE: metadata +flags_label: + CATDESC: Data quality flags + FIELDNAM: Quality Flags + FORMAT: A20 + VAR_TYPE: metadata + # <=== Data Variables ===> counts_default: &counts_default DEPEND_0: epoch @@ -32,11 +50,16 @@ counts_default: &counts_default VAR_TYPE: data SCALETYP: linear -compression_default: &compression_default +flags_default: + CATDESC: Data quality flags. + FIELDNAM: Quality Flag + LABLAXIS: Flags DEPEND_0: epoch - DEPEND_1: energy + DEPEND_1: flags + DEPEND_2: energy DISPLAY_TYPE: spectrogram - LABL_PTR_1: energy_label + LABL_PTR_1: flags_label + LABL_PTR_2: energy_label FILLVAL: 4294967295 FORMAT: I1 UNITS: ' ' @@ -44,6 +67,9 @@ compression_default: &compression_default VALIDMAX: 1 VAR_TYPE: data SCALETYP: linear + VAR_NOTES: > + There are 13 flags in total, first three flags are from science packets + and remaining 10 flags are from housekeeping packets. uncertainty_default: &uncertainty_default DEPEND_0: epoch @@ -89,24 +115,6 @@ coin_counts: FIELDNAM: Coincidence CEM Counts LABLAXIS: coin_cnts -pcem_flags: - <<: *compression_default - CATDESC: Primary Channel Electron Multiplier (CEM) compression flag - FIELDNAM: Primary CEM Flag - LABLAXIS: pcem_flag - -scem_flags: - <<: *compression_default - CATDESC: Secondary Channel Electron Multiplier (CEM) compression flag - FIELDNAM: Secondary CEM Flag - LABLAXIS: scem_flag - -coin_flags: - <<: *compression_default - CATDESC: Coincidence compression flag - FIELDNAM: Coincidence Flag - LABLAXIS: coin_flag - pcem_uncertainty: <<: *uncertainty_default CATDESC: Primary Channel Electron Multiplier (CEM) uncertainty diff --git a/imap_processing/swapi/l1/swapi_l1.py b/imap_processing/swapi/l1/swapi_l1.py index d82796c22..86413b9cd 100644 --- a/imap_processing/swapi/l1/swapi_l1.py +++ b/imap_processing/swapi/l1/swapi_l1.py @@ -117,7 +117,9 @@ def decompress_count( raise ValueError( "Count data type must be unsigned int and should not contain negative value" ) - new_count[compressed_indices & (count_data == 0xFFFF)] = -1 + + # SWAPI suggested using big value to indicate overflow. + new_count[compressed_indices & (count_data == 0xFFFF)] = np.iinfo(np.int64).max return new_count @@ -405,7 +407,9 @@ def process_sweep_data(full_sweep_sci: xr.Dataset, cem_prefix: str) -> xr.Datase return all_cem_data -def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Dataset: +def process_swapi_science( + sci_dataset: xr.Dataset, hk_dataset: xr.Dataset, data_version: str +) -> xr.Dataset: """ Will process SWAPI science data and create CDF file. @@ -413,6 +417,8 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data ---------- sci_dataset : xarray.Dataset L0 data. + hk_dataset : xarray.Dataset + Housekeeping data. data_version : str Version of the data product being created. @@ -421,6 +427,11 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data dataset : xarray.Dataset Dataset. """ + # pylint: disable=too-many-statements + # error: + # imap_processing/swapi/l1/swapi_l1.py:412:5: PLR0915 + # Too many statements (51 > 50) + # ==================================================== # Step 1: Filter full cycle data # ==================================================== @@ -454,6 +465,88 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data swp_scem_counts = decompress_count(raw_scem_count, scem_compression_flags) swp_coin_counts = decompress_count(raw_coin_count, coin_compression_flags) + # ==================================================== + # Load the CDF attributes + # ==================================================== + cdf_manager = ImapCdfAttributes() + cdf_manager.add_instrument_global_attrs("swapi") + cdf_manager.load_variable_attributes("imap_swapi_variable_attrs.yaml") + + # =================================================================== + # Quality flags + # =================================================================== + flags_name = [ + "swp_pcem_comp", + "swp_scem_comp", + "swp_coin_comp", + "ovr_t_st", + "und_t_st", + "pcem_cnt_st", + "scem_cnt_st", + "pcem_v_st", + "pcem_i_st", + "pcem_int_st", + "scem_v_st", + "scem_i_st", + "scem_int_st", + ] + quality_flags_data = [] + # Add science data quality flags + quality_flags_data.append(pcem_compression_flags) + quality_flags_data.append(scem_compression_flags) + quality_flags_data.append(coin_compression_flags) + + # Add housekeeping-derived quality flags + # -------------------------------------- + # Get times of good and full sweep data from science dataset. + # Then use these times to get housekeeping data. + # TODO: how to handle time delta between SWAPI HK and SWAPI SCI packets? + # For now, we assume that SHCOARSE in the SWAPI HK and SWAPI SCI packets + # are exactly equal at any given point, no significant time delta. + good_sweep_times = good_sweep_sci["epoch"].data + good_sweep_hk_data = hk_dataset.sel({"epoch": good_sweep_times}) + + # Since there will be one SWAPI HK packet for each SWAPI SCI packets, as + # both are recorded at 1 Hz(1 second), we can use this information to set + # quality flag for each science packet's data. Each packet contains + # information of one sequence data. Each sequence's + # PCEM_CNT0, PCEM_CNT1, PCEM_CNT2, PCEM_CNT3, PCEM_CNT4, + # PCEM_CNT5 data is coming from the same sciene packet and therefore + # it's going to share the same HK quality flag. That's why HK quality + # flag is repeated 6 times for each sequence(aka packet). + for flag_name in flags_name[3:]: + quality_flags_data.append( + np.repeat(good_sweep_hk_data[flag_name].data, 6).reshape(-1, 72) + ) + + # Before, individual quality flags data were in this shape: + # (number_of_sweep, energy_step) + # Now, to group array of flags data into this shape: + # (number_of_sweep, number_of_flags, energy_step) + # With this, each sweep has its quality flag. + quality_flags_data = np.stack(quality_flags_data, axis=1) + + swp_flags = xr.DataArray( + np.array(quality_flags_data, dtype=np.uint8), + dims=["epoch", "flags", "energy"], + attrs=cdf_manager.get_variable_attributes("flags_default"), + ) + + # Quality flags of sweep data + flags = xr.DataArray( + np.arange(13), + name="flags", + dims=["flags"], + attrs=cdf_manager.get_variable_attributes("flags"), + ) + + flags_label = xr.DataArray( + flags_name, + name="flags_label", + dims=["flags_label"], + attrs=cdf_manager.get_variable_attributes("flags_label"), + ) + # =================================================================== # Step 3: Create xarray.Dataset # =================================================================== @@ -461,11 +554,6 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data # epoch time. Should be same dimension as number of good sweeps epoch_values = good_sweep_sci["epoch"].data.reshape(total_full_sweeps, 12)[:, 0] - # Load the CDF attributes - cdf_manager = ImapCdfAttributes() - cdf_manager.add_instrument_global_attrs("swapi") - cdf_manager.load_variable_attributes("imap_swapi_variable_attrs.yaml") - epoch_time = xr.DataArray( epoch_values, name="epoch", @@ -497,7 +585,13 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data l1_global_attrs["Apid"] = f"{sci_dataset['pkt_apid'].data[0]}" dataset = xr.Dataset( - coords={"epoch": epoch_time, "energy": energy, "energy_label": energy_label}, + coords={ + "epoch": epoch_time, + "energy": energy, + "energy_label": energy_label, + "flags": flags, + "flags_label": flags_label, + }, attrs=l1_global_attrs, ) @@ -517,24 +611,8 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data attrs=cdf_manager.get_variable_attributes("coin_counts"), ) - # L1 quality flags - # TODO: Should these be kept in raw format rather than derived into strings? - dataset["swp_pcem_flags"] = xr.DataArray( - np.array(pcem_compression_flags, dtype=np.uint8), - dims=["epoch", "energy"], - attrs=cdf_manager.get_variable_attributes("pcem_flags"), - ) - dataset["swp_scem_flags"] = xr.DataArray( - np.array(scem_compression_flags, dtype=np.uint8), - dims=["epoch", "energy"], - attrs=cdf_manager.get_variable_attributes("scem_flags"), - ) - dataset["swp_coin_flags"] = xr.DataArray( - np.array(coin_compression_flags, dtype=np.uint8), - dims=["epoch", "energy"], - attrs=cdf_manager.get_variable_attributes("coin_flags"), - ) - + # Add quality flags to the dataset + dataset["swp_flags"] = swp_flags # =================================================================== # Step 4: Calculate uncertainty # =================================================================== @@ -542,17 +620,35 @@ def process_swapi_science(sci_dataset: xr.Dataset, data_version: str) -> xr.Data # Uncertainty is quantified for the PCEM, SCEM, and COIN counts. # The Poisson contribution is # uncertainty = sqrt(count) - dataset["swp_pcem_err"] = xr.DataArray( + + # TODO: fill in with actual formula once SWAPI provides it. + # Right now, we are using sqrt(count) as a placeholder + dataset["swp_pcem_err_plus"] = xr.DataArray( + np.sqrt(swp_pcem_counts), + dims=["epoch", "energy"], + attrs=cdf_manager.get_variable_attributes("pcem_uncertainty"), + ) + dataset["swp_pcem_err_minus"] = xr.DataArray( np.sqrt(swp_pcem_counts), dims=["epoch", "energy"], attrs=cdf_manager.get_variable_attributes("pcem_uncertainty"), ) - dataset["swp_scem_err"] = xr.DataArray( + dataset["swp_scem_err_plus"] = xr.DataArray( + np.sqrt(swp_scem_counts), + dims=["epoch", "energy"], + attrs=cdf_manager.get_variable_attributes("scem_uncertainty"), + ) + dataset["swp_scem_err_minus"] = xr.DataArray( np.sqrt(swp_scem_counts), dims=["epoch", "energy"], attrs=cdf_manager.get_variable_attributes("scem_uncertainty"), ) - dataset["swp_coin_err"] = xr.DataArray( + dataset["swp_coin_err_plus"] = xr.DataArray( + np.sqrt(swp_coin_counts), + dims=["epoch", "energy"], + attrs=cdf_manager.get_variable_attributes("coin_uncertainty"), + ) + dataset["swp_coin_err_minus"] = xr.DataArray( np.sqrt(swp_coin_counts), dims=["epoch", "energy"], attrs=cdf_manager.get_variable_attributes("coin_uncertainty"), @@ -587,14 +683,17 @@ def swapi_l1(file_path: str, data_version: str) -> xr.Dataset: file_path, xtce_definition, use_derived_value=False ) processed_data = [] + for apid, ds_data in datasets.items(): # Right now, we only process SWP_HK and SWP_SCI # other packets are not process in this processing pipeline # If appId is science, then the file should contain all data of science appId if apid == SWAPIAPID.SWP_SCI.value: - data = process_swapi_science(ds_data, data_version) - processed_data.append(data) + sci_dataset = process_swapi_science( + ds_data, datasets[SWAPIAPID.SWP_HK], data_version + ) + processed_data.append(sci_dataset) if apid == SWAPIAPID.SWP_HK.value: # Add HK datalevel attrs # TODO: ask SWAPI if we need to process HK if we can use WebPODA diff --git a/imap_processing/swapi/l2/swapi_l2.py b/imap_processing/swapi/l2/swapi_l2.py index f59826659..e488cea66 100644 --- a/imap_processing/swapi/l2/swapi_l2.py +++ b/imap_processing/swapi/l2/swapi_l2.py @@ -50,9 +50,9 @@ def swapi_l2(l1_dataset: xr.Dataset, data_version: str) -> xr.Dataset: "epoch", "energy", "energy_label", - "swp_pcem_flags", - "swp_scem_flags", - "swp_coin_flags", + "flags", + "flags_label", + "swp_flags", ] l2_dataset = l1_dataset[l1_data_keys] @@ -76,17 +76,29 @@ def swapi_l2(l1_dataset: xr.Dataset, data_version: str) -> xr.Dataset: l2_dataset["swp_coin_rate"].attrs = cdf_manager.get_variable_attributes("coin_rate") # update uncertainty - l2_dataset["swp_pcem_unc"] = l1_dataset["swp_pcem_err"] / TIME_PER_BIN - l2_dataset["swp_scem_unc"] = l1_dataset["swp_scem_err"] / TIME_PER_BIN - l2_dataset["swp_coin_unc"] = l1_dataset["swp_coin_err"] / TIME_PER_BIN + l2_dataset["swp_pcem_unc_plus"] = l1_dataset["swp_pcem_err_plus"] / TIME_PER_BIN + l2_dataset["swp_pcem_unc_minus"] = l1_dataset["swp_pcem_err_minus"] / TIME_PER_BIN + l2_dataset["swp_scem_unc_plus"] = l1_dataset["swp_scem_err_plus"] / TIME_PER_BIN + l2_dataset["swp_scem_unc_minus"] = l1_dataset["swp_scem_err_minus"] / TIME_PER_BIN + l2_dataset["swp_coin_unc_plus"] = l1_dataset["swp_coin_err_plus"] / TIME_PER_BIN + l2_dataset["swp_coin_unc_minus"] = l1_dataset["swp_coin_err_minus"] / TIME_PER_BIN # update attrs - l2_dataset["swp_pcem_unc"].attrs = cdf_manager.get_variable_attributes( + l2_dataset["swp_pcem_unc_plus"].attrs = cdf_manager.get_variable_attributes( "pcem_uncertainty" ) - l2_dataset["swp_scem_unc"].attrs = cdf_manager.get_variable_attributes( + l2_dataset["swp_pcem_unc_minus"].attrs = cdf_manager.get_variable_attributes( + "pcem_uncertainty" + ) + l2_dataset["swp_scem_unc_plus"].attrs = cdf_manager.get_variable_attributes( + "scem_uncertainty" + ) + l2_dataset["swp_scem_unc_minus"].attrs = cdf_manager.get_variable_attributes( "scem_uncertainty" ) - l2_dataset["swp_coin_unc"].attrs = cdf_manager.get_variable_attributes( + l2_dataset["swp_coin_unc_plus"].attrs = cdf_manager.get_variable_attributes( + "coin_uncertainty" + ) + l2_dataset["swp_coin_unc_minus"].attrs = cdf_manager.get_variable_attributes( "coin_uncertainty" ) diff --git a/imap_processing/tests/swapi/test_swapi_l1.py b/imap_processing/tests/swapi/test_swapi_l1.py index 5c221eef1..5df6861a4 100644 --- a/imap_processing/tests/swapi/test_swapi_l1.py +++ b/imap_processing/tests/swapi/test_swapi_l1.py @@ -211,10 +211,18 @@ def test_swapi_algorithm(decom_test_data): def test_process_swapi_science(decom_test_data): """Test process swapi science""" ds_data = decom_test_data[SWAPIAPID.SWP_SCI] - processed_data = process_swapi_science(ds_data, data_version="001") + processed_data = process_swapi_science( + ds_data, decom_test_data[SWAPIAPID.SWP_HK], data_version="001" + ) # Test dataset dimensions - assert processed_data.sizes == {"epoch": 3, "energy": 72, "energy_label": 72} + assert processed_data.sizes == { + "epoch": 3, + "energy": 72, + "energy_label": 72, + "flags": 13, + "flags_label": 13, + } # Test epoch data is correct expected_epoch_datetime = met_to_j2000ns([48, 60, 72]) np.testing.assert_array_equal(processed_data["epoch"].data, expected_epoch_datetime) @@ -298,20 +306,29 @@ def test_process_swapi_science(decom_test_data): assert processed_data["swp_pcem_counts"].shape == (3, 72) # Test that we calculated uncertainty correctly np.testing.assert_allclose( - np.sqrt(processed_data["swp_pcem_counts"][0]), processed_data["swp_pcem_err"][0] + np.sqrt(processed_data["swp_pcem_counts"][0]), + processed_data["swp_pcem_err_plus"][0], ) # make PLAN_ID data incorrect ds_data["plan_id_science"][:12] = np.arange(12) - processed_data = process_swapi_science(ds_data, data_version="001") + processed_data = process_swapi_science( + ds_data, decom_test_data[SWAPIAPID.SWP_HK], data_version="001" + ) # Test dataset dimensions - assert processed_data.sizes == {"epoch": 2, "energy": 72, "energy_label": 72} + assert processed_data.sizes == { + "epoch": 2, + "energy": 72, + "energy_label": 72, + "flags": 13, + "flags_label": 13, + } # Test CDF File # This time mismatch is because of sample data. Sample data has # SHCOARSE time as 48, 60, 72. That's why time is different. - cdf_filename = "imap_swapi_l1_sci-1min_20100101_v001.cdf" + cdf_filename = "imap_swapi_l1_sci_20100101_v001.cdf" cdf_path = write_cdf(processed_data) assert cdf_path.name == cdf_filename @@ -327,7 +344,7 @@ def test_swapi_l1_cdf(swapi_l0_test_data_path): # Test CDF File # sci cdf file - cdf_filename = "imap_swapi_l1_sci-1min_20100101_v001.cdf" + cdf_filename = "imap_swapi_l1_sci_20100101_v001.cdf" cdf_path = write_cdf(processed_data[0]) assert cdf_path.name == cdf_filename diff --git a/imap_processing/tests/swapi/test_swapi_l2.py b/imap_processing/tests/swapi/test_swapi_l2.py index 850e3f964..1e0f32e2e 100644 --- a/imap_processing/tests/swapi/test_swapi_l2.py +++ b/imap_processing/tests/swapi/test_swapi_l2.py @@ -13,9 +13,9 @@ def test_swapi_l2_cdf(swapi_l0_test_data_path): l2_dataset = swapi_l2(l1_dataset, data_version="v001") l2_cdf = write_cdf(l2_dataset) - assert l2_cdf.name == "imap_swapi_l2_sci-1min_20100101_v001.cdf" + assert l2_cdf.name == "imap_swapi_l2_sci_20100101_v001.cdf" # Test uncertainty variables are as expected np.testing.assert_array_equal( - l2_dataset["swp_pcem_unc"], l1_dataset["swp_pcem_err"] / TIME_PER_BIN + l2_dataset["swp_pcem_unc_plus"], l1_dataset["swp_pcem_err_plus"] / TIME_PER_BIN ) diff --git a/pyproject.toml b/pyproject.toml index 598c512e7..12f557270 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,7 +80,7 @@ lint.select = ["B", "D", "E", "F", "I", "N", "S", "W", "PL", "PT", "UP", "RUF"] # D104: Missing docstring in public package # PLR2004: Magic value in comparison # RUF200: pyproject missing field (poetry doesn't follow the spec) -lint.ignore = ["D104", "PLR2004", "RUF200", "S311"] +lint.ignore = ["D104", "PLR2004", "RUF200", "S311", "PLR0915"] [tool.ruff.lint.per-file-ignores] # S603 unchecked input in subprocess call is fine in our tests