From 8c1bd847627f4370025d90d2d632e630c9aeb25c Mon Sep 17 00:00:00 2001 From: Bousquin Date: Fri, 2 Aug 2024 12:52:13 -0500 Subject: [PATCH] black format/lint --- harmonize_wq/harmonize.py | 110 ++- harmonize_wq/tests/test_harmonize_WQP.py | 1093 ++++++++++++---------- harmonize_wq/wq_data.py | 183 ++-- 3 files changed, 734 insertions(+), 652 deletions(-) diff --git a/harmonize_wq/harmonize.py b/harmonize_wq/harmonize.py index ffb4d89..9d6dc37 100644 --- a/harmonize_wq/harmonize.py +++ b/harmonize_wq/harmonize.py @@ -29,13 +29,13 @@ def dissolved_oxygen(wqp): # Check/fix dimensionality issues (Type III) for unit in wqp.dimensions_list(): - if wqp.ureg(wqp.units).check({'[length]': -3, '[mass]': 1}): + if wqp.ureg(wqp.units).check({"[length]": -3, "[mass]": 1}): # Convert to density, e.g., % or ppm -> mg/l (assumes STP for now) wqp.apply_conversion(convert.DO_saturation, unit) elif wqp.ureg(wqp.units).dimensionless: # Convert to dimensionless, e.g., mg/l -> % or ppm wqp.apply_conversion(convert.DO_concentration, unit) - warn(f'Need % saturation equation for {unit}') + warn(f"Need % saturation equation for {unit}") return wqp @@ -61,20 +61,20 @@ def salinity(wqp): wqp : wq_data.WQCharData WQP Characteristic Info Object with updated attributes. """ - wqp.check_basis(basis_col='ResultTemperatureBasisText') # Moves '@25C' out + wqp.check_basis(basis_col="ResultTemperatureBasisText") # Moves '@25C' out wqp.check_units() # Replace know problem units, fix and flag missing units # Check/fix dimensionality issues (Type III) for unit in wqp.dimensions_list(): if wqp.ureg(wqp.units).dimensionless: # Convert to dimensionless - if wqp.ureg(unit).check({'[length]': -3, '[mass]': 1}): + if wqp.ureg(unit).check({"[length]": -3, "[mass]": 1}): # Density, e.g., 'mg/l' -> 'PSU'/'PSS'/'ppth' wqp.apply_conversion(convert.density_to_PSU, unit) else: # Will cause dimensionality error, kick it there for handling continue - elif wqp.ureg(wqp.units).check({'[length]': -3, '[mass]': 1}): + elif wqp.ureg(wqp.units).check({"[length]": -3, "[mass]": 1}): # Convert to density, e.g., PSU -> 'mg/l' wqp.apply_conversion(convert.PSU_to_density, unit) @@ -126,34 +126,34 @@ def turbidity(wqp): wqp : wq_data.WQCharData WQP Characteristic Info Object with updated attributes. """ - #These units exist but have not been encountered yet - #formazin nephelometric multibeam unit (FNMU); - #formazin backscatter unit (FBU); - #backscatter units (BU); attenuation units (AU) + # These units exist but have not been encountered yet + # formazin nephelometric multibeam unit (FNMU); + # formazin backscatter unit (FBU); + # backscatter units (BU); attenuation units (AU) wqp.check_units() # Replace know problem units, fix and flag missing units # Check/fix dimensionality issues (Type III) for unit in wqp.dimensions_list(): - if wqp.ureg(wqp.units).check({'[turbidity]': 1}): + if wqp.ureg(wqp.units).check({"[turbidity]": 1}): if wqp.ureg(unit).dimensionless: - if unit=='JTU': + if unit == "JTU": wqp.apply_conversion(convert.JTU_to_NTU, unit) - elif unit=='SiO2': + elif unit == "SiO2": wqp.apply_conversion(convert.SiO2_to_NTU, unit) else: - #raise ValueError('Bad Turbidity unit: {}'.format(unit)) - warn(f'Bad Turbidity unit: {unit}') - elif wqp.ureg(unit).check({'[length]': 1}): + # raise ValueError('Bad Turbidity unit: {}'.format(unit)) + warn(f"Bad Turbidity unit: {unit}") + elif wqp.ureg(unit).check({"[length]": 1}): wqp.apply_conversion(convert.cm_to_NTU, unit) else: - #raise ValueError('Bad Turbidity unit: {}'.format(unit)) - warn(f'Bad Turbidity unit: {unit}') - elif wqp.ureg(wqp.units).check({'[length]': 1}): + # raise ValueError('Bad Turbidity unit: {}'.format(unit)) + warn(f"Bad Turbidity unit: {unit}") + elif wqp.ureg(wqp.units).check({"[length]": 1}): wqp.apply_conversion(convert.NTU_to_cm, unit) else: - #raise ValueError('Bad Turbidity unit: {}'.format(wqp.units)) - warn(f'Bad Turbidity unit: {unit}') + # raise ValueError('Bad Turbidity unit: {}'.format(wqp.units)) + warn(f"Bad Turbidity unit: {unit}") return wqp @@ -173,8 +173,8 @@ def sediment(wqp): wqp : wq_data.WQCharData WQP Characteristic Info Object with updated attributes. """ - #'< 0.0625 mm', < 0.125 mm, < 0.25 mm, < 0.5 mm, < 1 mm, < 2 mm, < 4 mm - wqp.check_basis(basis_col='ResultParticleSizeBasisText') + # '< 0.0625 mm', < 0.125 mm, < 0.25 mm, < 0.5 mm, < 1 mm, < 2 mm, < 4 mm + wqp.check_basis(basis_col="ResultParticleSizeBasisText") wqp.check_units() # Replace know problem units, fix and flag missing units @@ -188,7 +188,7 @@ def sediment(wqp): return wqp -def harmonize_all(df_in, errors='raise'): +def harmonize_all(df_in, errors="raise"): """Harmonizes all 'CharacteristicNames' column values with methods. All results are standardized to default units. Intermediate columns are @@ -263,7 +263,7 @@ def harmonize_all(df_in, errors='raise'): """ df_out = df_in.copy() - char_vals = list(set(df_out['CharacteristicName'])) + char_vals = list(set(df_out["CharacteristicName"])) char_vals.sort() for char_val in char_vals: @@ -271,8 +271,14 @@ def harmonize_all(df_in, errors='raise'): return df_out -def harmonize(df_in, char_val, units_out=None, errors='raise', - intermediate_columns=False, report=False): +def harmonize( + df_in, + char_val, + units_out=None, + errors="raise", + intermediate_columns=False, + report=False, +): """Harmonize char_val rows based methods specific to that char_val. All rows where the value in the 'CharacteristicName' column matches @@ -358,25 +364,25 @@ def harmonize(df_in, char_val, units_out=None, errors='raise', wqp.update_ureg() # This is done based on out_col/char_val # Use out_col to dictate function - if out_col in ['pH', 'Secchi']: + if out_col in ["pH", "Secchi"]: wqp.check_units() # Fix and flag missing units # NOTE: pH undefined units -> NAN -> units, - elif out_col in ['Conductivity', 'Chlorophyll']: + elif out_col in ["Conductivity", "Chlorophyll"]: # Replace know problem units, fix and flag missing units wqp.check_units() - elif out_col in ['Fecal_Coliform', 'E_coli']: + elif out_col in ["Fecal_Coliform", "E_coli"]: # NOTE: Ecoli ['cfu/100ml', 'MPN/100ml', '#/100ml'] # NOTE: feca ['CFU', 'MPN/100ml', 'cfu/100ml', 'MPN/100 ml', '#/100ml'] # Replace known special character in unit ('#' count assumed as CFU) - wqp.replace_unit_str('#', 'CFU') + wqp.replace_unit_str("#", "CFU") # Replace known unit problems (e.g., assume CFU/MPN is /100ml) wqp.replace_unit_by_dict(UNITS_REPLACE[out_col]) - #TODO: figure out why the above must be done before replace_unit_str + # TODO: figure out why the above must be done before replace_unit_str # Replace all instances in results column - wqp.replace_unit_str('/100ml', '/(100ml)') - wqp.replace_unit_str('/100 ml', '/(100ml)') + wqp.replace_unit_str("/100ml", "/(100ml)") + wqp.replace_unit_str("/100 ml", "/(100ml)") wqp.check_units() # Fix and flag missing units - elif out_col in ['Carbon', 'Phosphorus', 'Nitrogen']: + elif out_col in ["Carbon", "Phosphorus", "Nitrogen"]: # Set Basis from unit and MethodSpec column wqp.check_basis() # Replace know problem units, fix and flag missing units (wet/dry?) @@ -386,17 +392,18 @@ def harmonize(df_in, char_val, units_out=None, errors='raise', # Replace units by dictionary wqp.replace_unit_by_dict(dimension_dict, wqp.measure_mask()) wqp.moles_convert(mol_list) # Fix up units/measures where moles - elif out_col == 'Temperature': + elif out_col == "Temperature": # Remove spaces from units for pint ('deg C' == degree coulomb) - wqp.update_units(wqp.units.replace(' ', '')) # No spaces in units_out - wqp.replace_unit_str(' ', '') # Replace in results column + wqp.update_units(wqp.units.replace(" ", "")) # No spaces in units_out + wqp.replace_unit_str(" ", "") # Replace in results column wqp.check_units() # Fix and flag missing units else: - harmonize_map = {'DO': dissolved_oxygen, - 'Salinity': salinity, - 'Turbidity': turbidity, - 'Sediment': sediment, - } + harmonize_map = { + "DO": dissolved_oxygen, + "Salinity": salinity, + "Turbidity": turbidity, + "Sediment": sediment, + } try: wqp = harmonize_map[out_col](wqp) except KeyError: @@ -411,18 +418,19 @@ def harmonize(df_in, char_val, units_out=None, errors='raise', # Note: just phosphorus right now # Total is TP (digested) from the whole water sample (vs total dissolved) # Dissolved is TDP (total) filtered water digested (vs undigested DIP) - if out_col in ['Phosphorus', 'Nitrogen']: + if out_col in ["Phosphorus", "Nitrogen"]: # NOTE: only top level fractions, while TADA has lower for: - #'Chlorophyll a', 'Turbidity', 'Fecal Coliform', 'Escherichia coli' - if out_col=='Phosphorus': - frac_dict = {'TP_Phosphorus': ['Total'], - 'TDP_Phosphorus': ['Dissolved'], - 'Other_Phosphorus': ['', nan],} + # 'Chlorophyll a', 'Turbidity', 'Fecal Coliform', 'Escherichia coli' + if out_col == "Phosphorus": + frac_dict = { + "TP_Phosphorus": ["Total"], + "TDP_Phosphorus": ["Dissolved"], + "Other_Phosphorus": ["", nan], + } else: - frac_dict = 'TADA' + frac_dict = "TADA" frac_dict = wqp.fraction(frac_dict) # Run sample fraction on WQP - df_out = wqp.df # TODO: add activities/detection limits and filter on quality? e.g., cols: @@ -437,5 +445,5 @@ def harmonize(df_in, char_val, units_out=None, errors='raise', if report: print_report(df_out.loc[wqp.c_mask], out_col, wqp.col.unit_in) if not intermediate_columns: - df_out = df_out.drop(['Units'], axis=1) # Drop intermediate columns + df_out = df_out.drop(["Units"], axis=1) # Drop intermediate columns return df_out diff --git a/harmonize_wq/tests/test_harmonize_WQP.py b/harmonize_wq/tests/test_harmonize_WQP.py index a1cb2d8..a156a77 100644 --- a/harmonize_wq/tests/test_harmonize_WQP.py +++ b/harmonize_wq/tests/test_harmonize_WQP.py @@ -26,30 +26,30 @@ DIRPATH = os.path.dirname(os.path.realpath(__file__)) # Test datasets -test_dir = os.path.join(DIRPATH, 'data') +test_dir = os.path.join(DIRPATH, "data") -AOI_URL = r'https://github.com/USEPA/Coastal_Ecological_Indicators/raw/master/DGGS_Coastal/temperature_data/TampaBay.geojson' +AOI_URL = r"https://github.com/USEPA/Coastal_Ecological_Indicators/raw/master/DGGS_Coastal/temperature_data/TampaBay.geojson" # results for dataretrieval.wqp.what_sites(**query) -STATIONS = pandas.read_csv(os.path.join(test_dir, 'wqp_sites.txt')) +STATIONS = pandas.read_csv(os.path.join(test_dir, "wqp_sites.txt")) # These are split by parameter sets of 2 to keep them small but not mono-param # 'Phosphorus' & 'Temperature, water' -NARROW_RESULTS = pandas.read_csv(os.path.join(test_dir, 'wqp_results.txt')) -ACTIVITIES = pandas.read_csv(os.path.join(test_dir, 'wqp_activities.txt')) +NARROW_RESULTS = pandas.read_csv(os.path.join(test_dir, "wqp_results.txt")) +ACTIVITIES = pandas.read_csv(os.path.join(test_dir, "wqp_activities.txt")) # 'Depth, Secchi disk depth' & Dissolved Oxygen -NARROW_RESULTS1 = pandas.read_csv(os.path.join(test_dir, 'wqp_results1.txt')) +NARROW_RESULTS1 = pandas.read_csv(os.path.join(test_dir, "wqp_results1.txt")) # pH & Salinity -NARROW_RESULTS2 = pandas.read_csv(os.path.join(test_dir, 'wqp_results2.txt')) +NARROW_RESULTS2 = pandas.read_csv(os.path.join(test_dir, "wqp_results2.txt")) # Nitrogen & Conductivity -NARROW_RESULTS3 = pandas.read_csv(os.path.join(test_dir, 'wqp_results3.txt')) +NARROW_RESULTS3 = pandas.read_csv(os.path.join(test_dir, "wqp_results3.txt")) # Chlorophyll_a & Organic_carbon -NARROW_RESULTS4 = pandas.read_csv(os.path.join(test_dir, 'wqp_results4.txt')) +NARROW_RESULTS4 = pandas.read_csv(os.path.join(test_dir, "wqp_results4.txt")) # Turbidity & Sediment -NARROW_RESULTS5 = pandas.read_csv(os.path.join(test_dir, 'wqp_results5.txt')) +NARROW_RESULTS5 = pandas.read_csv(os.path.join(test_dir, "wqp_results5.txt")) # Nutrients and sediment additional characteristics # NARROW_RESULTS6 = pandas.read_csv(os.path.join(test_dir, 'wqp_results6.txt')) # Fecal Coliform and Ecoli -NARROW_RESULTS7 = pandas.read_csv(os.path.join(test_dir, 'wqp_results7.txt')) +NARROW_RESULTS7 = pandas.read_csv(os.path.join(test_dir, "wqp_results7.txt")) # fixture to eventually test output writing (.shp) # @pytest.fixture(scope="session") @@ -69,11 +69,13 @@ def test_get_bounding_box(): AOI : geopandas.GeoDataFrame Geodataframe for Tampa Bay read from github """ - expected = ['-82.76095952246396', - '27.47487752677648', - '-82.37480995151799', - '28.12535740372124'] - actual = wrangle.get_bounding_box(AOI_URL).split(',') + expected = [ + "-82.76095952246396", + "27.47487752677648", + "-82.37480995151799", + "28.12535740372124", + ] + actual = wrangle.get_bounding_box(AOI_URL).split(",") assert actual == expected @@ -93,7 +95,7 @@ def test_get_bounding_box(): # Test it appends when QA_flag exists (not in test_harmonize_sites) # """ # actual = test_add_QA_flag(df_in, cond, flag) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def merged_tables(): """ Merge narrow_results and activities tables. This fixture is used in some @@ -107,48 +109,52 @@ def merged_tables(): df1 = NARROW_RESULTS df2 = ACTIVITIES # Fields to get (all for test instead?) - df2_cols = ['ActivityTypeCode', - 'ActivityMediaName', - 'ActivityMediaSubdivisionName', - 'ActivityEndDate', - 'ActivityEndTime/Time', - 'ActivityEndTime/TimeZoneCode'] + df2_cols = [ + "ActivityTypeCode", + "ActivityMediaName", + "ActivityMediaSubdivisionName", + "ActivityEndDate", + "ActivityEndTime/Time", + "ActivityEndTime/TimeZoneCode", + ] return wrangle.merge_tables(df1, df2, df2_cols=df2_cols) -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_add_activities(merged_tables): # Run using first 100 orginal results df1 = NARROW_RESULTS actual = wrangle.add_activities_to_df(df1[:100]) # Compare against activities retrieved before for expected_col in list(merged_tables.columns): - assert expected_col in actual.columns, f'{expected_col} missing' + assert expected_col in actual.columns, f"{expected_col} missing" # Check the value for one ('Quality Control Field Replicate Msr/Obs') # NOTE: this will fail if result changes but index should be consistent - actual_val = actual.iloc[46]['ActivityTypeCode'] - expected_val = merged_tables.iloc[46]['ActivityTypeCode'] - assert actual_val == expected_val, 'Not expected ActivityMediaName value' + actual_val = actual.iloc[46]["ActivityTypeCode"] + expected_val = merged_tables.iloc[46]["ActivityTypeCode"] + assert actual_val == expected_val, "Not expected ActivityMediaName value" -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_add_detection(merged_tables): merged_cols = list(merged_tables.columns) # only retrieve for first 100 phosphorous results - phos_df = merged_tables[merged_tables['CharacteristicName']=='Phosphorus'] - actual = wrangle.add_detection(phos_df[:100], 'Phosphorus') + phos_df = merged_tables[merged_tables["CharacteristicName"] == "Phosphorus"] + actual = wrangle.add_detection(phos_df[:100], "Phosphorus") actual_cols = [x for x in list(actual.columns) if x not in merged_cols] - expected_cols = ['DetectionQuantitationLimitTypeName', - 'DetectionQuantitationLimitMeasure/MeasureValue', - 'DetectionQuantitationLimitMeasure/MeasureUnitCode'] - assert actual_cols == expected_cols, 'Detection columns not added' + expected_cols = [ + "DetectionQuantitationLimitTypeName", + "DetectionQuantitationLimitMeasure/MeasureValue", + "DetectionQuantitationLimitMeasure/MeasureUnitCode", + ] + assert actual_cols == expected_cols, "Detection columns not added" # Check the value for one # NOTE: this will fail if result changes but index should be consistent actual_val = actual.iloc[97][expected_cols[1]] - assert actual_val == 0.02, 'Not expected DetectionQuantitation value' + assert actual_val == 0.02, "Not expected DetectionQuantitation value" -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def harmonized_tables(): """ Harmonize Nitrogen and Conductivity results in NARROW_RESULTS3. This @@ -160,8 +166,8 @@ def harmonized_tables(): Harmonized results for Nitrogen and Conductivity. """ - harmonized_table = harmonize.harmonize(NARROW_RESULTS3, 'Nitrogen') - harmonized_table = harmonize.harmonize(harmonized_table, 'Conductivity') + harmonized_table = harmonize.harmonize(NARROW_RESULTS3, "Nitrogen") + harmonized_table = harmonize.harmonize(harmonized_table, "Conductivity") return harmonized_table @@ -188,11 +194,12 @@ def test_harmonize_depth(): Read from data/wqp_results1.txt. """ actual = clean.harmonize_depth(NARROW_RESULTS1) - assert len(actual['Depth'].dropna()) == 13 - expected_unit = 'meter' - assert str(actual.iloc[135227]['Depth'].units) == expected_unit + assert len(actual["Depth"].dropna()) == 13 + expected_unit = "meter" + assert str(actual.iloc[135227]["Depth"].units) == expected_unit -@pytest.fixture(scope='session') + +@pytest.fixture(scope="session") def test_harmonize_locations(): """ Test functions standardizes the sites correctly @@ -204,35 +211,35 @@ def test_harmonize_locations(): """ actual = location.harmonize_locations(STATIONS) - crs_col = 'HorizontalCoordinateReferenceSystemDatumName' - expected_flag = crs_col + ': Bad datum OTHER, EPSG:4326 assumed' + crs_col = "HorizontalCoordinateReferenceSystemDatumName" + expected_flag = crs_col + ": Bad datum OTHER, EPSG:4326 assumed" assert isinstance(actual, geopandas.geodataframe.GeoDataFrame) # Test type - assert actual.crs.name == 'WGS 84' # Test for expected CRS + assert actual.crs.name == "WGS 84" # Test for expected CRS assert actual.size == 1063506 # TODO: confirm original fields un-altered # Test for expected columns - for col in ['QA_flag', 'geometry']: + for col in ["QA_flag", "geometry"]: assert col in actual.columns # Test new fields have expected dtype - assert actual['geometry'].dtype == 'geometry' + assert actual["geometry"].dtype == "geometry" # assert actual['EPSG'].dtype == 'float64' # Converted to int() later # Test flag & fix when un-recognized CRS (test on row[CRS]=='OTHER') # assert actual.iloc[3522]['EPSG'] == 4326.0 # Test fixed in new col - assert actual.iloc[3522]['QA_flag'] == expected_flag # Test flag + assert actual.iloc[3522]["QA_flag"] == expected_flag # Test flag # No changes not changes # Converted converted # Missing unit infered # Check QA_flag # Check for precision flag - actual_imprecise = actual.iloc[302]['QA_flag'] - expected_imprecise = 'LatitudeMeasure: Imprecise: lessthan3decimaldigits' + actual_imprecise = actual.iloc[302]["QA_flag"] + expected_imprecise = "LatitudeMeasure: Imprecise: lessthan3decimaldigits" assert actual_imprecise == expected_imprecise - + return actual -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_harmonize_phosphorus(merged_tables): """ Test function standardizes Phosphorus results correctly @@ -243,84 +250,94 @@ def test_harmonize_phosphorus(merged_tables): Read from data/wqp_results.txt. """ # TODO: Test for expected dimensionalityError with NARROW_RESULTS? - actual = harmonize.harmonize(merged_tables, 'Phosphorus') # mg/l + actual = harmonize.harmonize(merged_tables, "Phosphorus") # mg/l # TODO: test conversion to moles and other non-standard units # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type assert actual.size == 16896735 # 17256240 # Test size # Test for expected columns - for col in ['TP_Phosphorus', 'TDP_Phosphorus', 'Other_Phosphorus']: + for col in ["TP_Phosphorus", "TDP_Phosphorus", "Other_Phosphorus"]: assert col in actual.columns # Number of results in each col - assert len(actual['TP_Phosphorus'].dropna()) == 11243 - assert len(actual['TDP_Phosphorus'].dropna()) == 601 - assert len(actual['Other_Phosphorus'].dropna()) == 1124 # 1075 NAN + assert len(actual["TP_Phosphorus"].dropna()) == 11243 + assert len(actual["TDP_Phosphorus"].dropna()) == 601 + assert len(actual["Other_Phosphorus"].dropna()) == 1124 # 1075 NAN # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(merged_tables[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(merged_tables[orig_unit_col]) # Inspect specific results - expected_unit = 'milligram / liter' # Desired units + expected_unit = "milligram / liter" # Desired units # TP - out_col = 'TP_Phosphorus' - actual.loc[((actual['CharacteristicName'] == 'Phosphorus') & - (actual['ResultSampleFractionText'] == 'Total') & - (actual[out_col].notna())), out_col] + out_col = "TP_Phosphorus" + actual.loc[ + ( + (actual["CharacteristicName"] == "Phosphorus") + & (actual["ResultSampleFractionText"] == "Total") + & (actual[out_col].notna()) + ), + out_col, + ] # Inspect specific result - where units are not converted - assert actual.iloc[2866][orig_unit_col] == 'mg/l' # Confirm orig unit + assert actual.iloc[2866][orig_unit_col] == "mg/l" # Confirm orig unit assert str(actual.iloc[2866][out_col].units) == expected_unit expected_val = actual.iloc[2866][orig_val_col] # Original value assert actual.iloc[2866][out_col].magnitude == expected_val # Unchanged # Inspect specific result - where units converted # Basis in units 'mg/l as P' # Confirm original unit - assert actual.iloc[134674][orig_unit_col] == 'mg/l as P' + assert actual.iloc[134674][orig_unit_col] == "mg/l as P" assert str(actual.iloc[134674][out_col].units) == expected_unit # Confirm original measure assert actual.iloc[134674][orig_val_col] == 0.29 assert actual.iloc[134674][out_col].magnitude == 0.29 # Basis in units 'mg/l PO4' - assert actual.iloc[142482][orig_unit_col] == 'mg/l PO4' # Confirm orig unit + assert actual.iloc[142482][orig_unit_col] == "mg/l PO4" # Confirm orig unit assert str(actual.iloc[142482][out_col].units) == expected_unit # TODO: None with different units that get converted # Inspect specific result - where units missing - assert str(actual.iloc[9738][orig_unit_col]) == 'nan' # Confirm missing + assert str(actual.iloc[9738][orig_unit_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasure/MeasureUnitCode: MISSING UNITS, mg/l assumed' - assert actual.iloc[9738]['QA_flag'] == expected_flag + expected_flag = "ResultMeasure/MeasureUnitCode: MISSING UNITS, mg/l assumed" + assert actual.iloc[9738]["QA_flag"] == expected_flag # Check value unchanged for missing units expected_val = float(actual.iloc[9738][orig_val_col]) # Original value assert actual.iloc[9738][out_col].magnitude == expected_val # Unchanged # Inspect specific result - where value missing - assert str(actual.iloc[134943][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[134943][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing values - expected_flag = 'ResultMeasureValue: missing (NaN) result' - actual_flags = actual.iloc[134943]['QA_flag'].split('; ') + expected_flag = "ResultMeasureValue: missing (NaN) result" + actual_flags = actual.iloc[134943]["QA_flag"].split("; ") assert actual_flags[0] == expected_flag # Inspect specific result - un-usable non-numeric values - assert actual.iloc[19902][orig_val_col] == '*Not Reported' + assert actual.iloc[19902][orig_val_col] == "*Not Reported" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "*Not Reported" result cannot be used' - actual_flags = actual.iloc[19902]['QA_flag'].split('; ') + actual_flags = actual.iloc[19902]["QA_flag"].split("; ") assert actual_flags[0] == expected_flag # TDP - out_col = 'TDP_Phosphorus' - actual.loc[((actual['CharacteristicName'] == 'Phosphorus') & - (actual['ResultSampleFractionText'] == 'Dissolved') & - (actual[out_col].notna())), out_col] + out_col = "TDP_Phosphorus" + actual.loc[ + ( + (actual["CharacteristicName"] == "Phosphorus") + & (actual["ResultSampleFractionText"] == "Dissolved") + & (actual[out_col].notna()) + ), + out_col, + ] # Inspect specific result - where units are not converted - assert actual.iloc[673][orig_unit_col] == 'mg/l' # Confirm orig unit + assert actual.iloc[673][orig_unit_col] == "mg/l" # Confirm orig unit assert str(actual.iloc[673][out_col].units) == expected_unit expected_val = actual.iloc[673][orig_val_col] # Original value assert actual.iloc[673][out_col].magnitude == expected_val # Unchanged # Inspect specific result - where units converted # Basis in units 'mg/l as P' idx = 134696 - assert actual.iloc[idx][orig_unit_col] == 'mg/l as P' # Confirm orig unit + assert actual.iloc[idx][orig_unit_col] == "mg/l as P" # Confirm orig unit assert str(actual.iloc[idx][out_col].units) == expected_unit assert actual.iloc[idx][orig_val_col] == 0.38 # Confirm original measure assert actual.iloc[idx][out_col].magnitude == 0.38 @@ -328,22 +345,27 @@ def test_harmonize_phosphorus(merged_tables): # Inspect specific result - where units missing # TODO: None missing units w/ value # Inspect specific result - where value missing - assert str(actual.iloc[138475][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[138475][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing values - expected_flag = 'ResultMeasureValue: missing (NaN) result' - actual_flags = actual.iloc[138475]['QA_flag'].split('; ') + expected_flag = "ResultMeasureValue: missing (NaN) result" + actual_flags = actual.iloc[138475]["QA_flag"].split("; ") assert actual_flags[0] == expected_flag # Inspect specific result - un-usable non-numeric values # TODO: no bad value # Other - out_col = 'Other_Phosphorus' + out_col = "Other_Phosphorus" # NOTE: these are neither labled 'Total' nor 'Dissolved' - actual.loc[((actual['CharacteristicName'] == 'Phosphorus') & - (actual['ResultSampleFractionText'].isna()) & - (actual[out_col].notna())), out_col] + actual.loc[ + ( + (actual["CharacteristicName"] == "Phosphorus") + & (actual["ResultSampleFractionText"].isna()) + & (actual[out_col].notna()) + ), + out_col, + ] # Inspect specific result - where units are not converted - assert actual.iloc[19665][orig_unit_col] == 'mg/l' # Confirm orig unit + assert actual.iloc[19665][orig_unit_col] == "mg/l" # Confirm orig unit assert str(actual.iloc[19665][out_col].units) == expected_unit expected_val = float(actual.iloc[19665][orig_val_col]) # Original value assert actual.iloc[19665][out_col].magnitude == expected_val # Unchanged @@ -352,17 +374,17 @@ def test_harmonize_phosphorus(merged_tables): # Inspect specific result - where units missing # TODO: None missing units w/ value # Inspect specific result - where value missing - assert str(actual.iloc[177611][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[177611][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing values - expected_flag = 'ResultMeasureValue: missing (NaN) result' - actual_flags = actual.iloc[177611]['QA_flag'].split('; ') + expected_flag = "ResultMeasureValue: missing (NaN) result" + actual_flags = actual.iloc[177611]["QA_flag"].split("; ") assert actual_flags[0] == expected_flag # Inspect specific result - un-usable non-numeric values # TODO: no bad value -@pytest.fixture(scope='session') -#@pytest.mark.skip(reason="no change") +@pytest.fixture(scope="session") +# @pytest.mark.skip(reason="no change") def test_harmonize_temperature(): """ Test function standardizes Temperature results correctly @@ -372,54 +394,54 @@ def test_harmonize_temperature(): NARROW_RESULTS : pandas.DataFrame Read from data/wqp_results.txt. """ - actual = harmonize.harmonize(NARROW_RESULTS, 'Temperature, water') - actual2 = harmonize.harmonize(NARROW_RESULTS.iloc[0:10], - 'Temperature, water', - units_out='deg F') + actual = harmonize.harmonize(NARROW_RESULTS, "Temperature, water") + actual2 = harmonize.harmonize( + NARROW_RESULTS.iloc[0:10], "Temperature, water", units_out="deg F" + ) # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type assert actual.size == 13301685 # Test size #14784040 - assert 'Temperature' in actual.columns # Check for column - assert len(actual['Temperature'].dropna()) == 346210 # Number of results + assert "Temperature" in actual.columns # Check for column + assert len(actual["Temperature"].dropna()) == 346210 # Number of results # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(NARROW_RESULTS[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(NARROW_RESULTS[orig_unit_col]) # Inspect specific result - where units are not converted - assert actual.iloc[0][orig_unit_col] == 'deg C' # Confirm orig unit - expected_unit = 'degree_Celsius' # Desired units - assert str(actual.iloc[0]['Temperature'].units) == expected_unit + assert actual.iloc[0][orig_unit_col] == "deg C" # Confirm orig unit + expected_unit = "degree_Celsius" # Desired units + assert str(actual.iloc[0]["Temperature"].units) == expected_unit expected_val = actual.iloc[0][orig_val_col] # Original value - assert actual.iloc[0]['Temperature'].magnitude == expected_val # Unchanged + assert actual.iloc[0]["Temperature"].magnitude == expected_val # Unchanged # Inspect specific result - where units converted - assert actual.iloc[55013][orig_unit_col] == 'deg F' # Confirm orig unit - assert str(actual.iloc[55013]['Temperature'].units) == expected_unit - assert actual.iloc[55013][orig_val_col] == '87' # Confirm original measure - assert actual.iloc[55013]['Temperature'].magnitude == 30.5555555555556 + assert actual.iloc[55013][orig_unit_col] == "deg F" # Confirm orig unit + assert str(actual.iloc[55013]["Temperature"].units) == expected_unit + assert actual.iloc[55013][orig_val_col] == "87" # Confirm original measure + assert actual.iloc[55013]["Temperature"].magnitude == 30.5555555555556 # Inspect specific result - where units missing - assert str(actual.iloc[143765][orig_unit_col]) == 'nan' # Confirm missing + assert str(actual.iloc[143765][orig_unit_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasure/MeasureUnitCode: MISSING UNITS, degC assumed' - actual_flags = actual.iloc[143765]['QA_flag'].split('; ') + expected_flag = "ResultMeasure/MeasureUnitCode: MISSING UNITS, degC assumed" + actual_flags = actual.iloc[143765]["QA_flag"].split("; ") assert actual_flags[1] == expected_flag # Should be assessed 1st (flag 0) # Check value unchagned for missing units # TODO: values would stay the same (no conversion), but this example is nan # Inspect specific result - where value missing - assert str(actual.iloc[143765][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[143765][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered values - expected_flag = 'ResultMeasureValue: missing (NaN) result' + expected_flag = "ResultMeasureValue: missing (NaN) result" assert actual_flags[0] == expected_flag # Inspect specific result - un-usable non-numeric values - assert actual.iloc[359504][orig_val_col] == 'Not Reported' + assert actual.iloc[359504][orig_val_col] == "Not Reported" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "Not Reported" result cannot be used' - assert actual.iloc[359504]['QA_flag'] == expected_flag + assert actual.iloc[359504]["QA_flag"] == expected_flag return actual -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_harmonize_secchi(): """ Test function standardizes Seccchi results correctly @@ -429,50 +451,50 @@ def test_harmonize_secchi(): NARROW_RESULTS1 : pandas.DataFrame Read from data/wqp_results1.txt. """ - actual = harmonize.harmonize(NARROW_RESULTS1, 'Depth, Secchi disk depth') + actual = harmonize.harmonize(NARROW_RESULTS1, "Depth, Secchi disk depth") # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type assert actual.size == 11818094 # Test size - assert 'Secchi' in actual.columns # Check for column - assert len(actual['Secchi'].dropna()) == 69144 # Number of results + assert "Secchi" in actual.columns # Check for column + assert len(actual["Secchi"].dropna()) == 69144 # Number of results # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(NARROW_RESULTS1[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(NARROW_RESULTS1[orig_unit_col]) # Inspect specific result - where units are not converted - assert actual.iloc[1][orig_unit_col] == 'm' # Confirm orig unit - expected_unit = 'meter' # Desired units - assert str(actual.iloc[1]['Secchi'].units) == expected_unit + assert actual.iloc[1][orig_unit_col] == "m" # Confirm orig unit + expected_unit = "meter" # Desired units + assert str(actual.iloc[1]["Secchi"].units) == expected_unit expected_val = float(actual.iloc[1][orig_val_col]) # Original value - assert actual.iloc[1]['Secchi'].magnitude == expected_val # Unchanged + assert actual.iloc[1]["Secchi"].magnitude == expected_val # Unchanged # Inspect specific result - where units converted - assert actual.iloc[369][orig_unit_col] == 'ft' # Confirm orig unit - assert str(actual.iloc[369]['Secchi'].units) == expected_unit - assert actual.iloc[369][orig_val_col] == '1.5' # Confirm original measure - assert actual.iloc[369]['Secchi'].magnitude == 0.45719999999999994 + assert actual.iloc[369][orig_unit_col] == "ft" # Confirm orig unit + assert str(actual.iloc[369]["Secchi"].units) == expected_unit + assert actual.iloc[369][orig_val_col] == "1.5" # Confirm original measure + assert actual.iloc[369]["Secchi"].magnitude == 0.45719999999999994 # Inspect specific result - where units missing - assert str(actual.iloc[347590][orig_unit_col]) == 'nan' # Confirm missing + assert str(actual.iloc[347590][orig_unit_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasure/MeasureUnitCode: MISSING UNITS, m assumed' - actual_flags = actual.iloc[347590]['QA_flag'].split('; ') + expected_flag = "ResultMeasure/MeasureUnitCode: MISSING UNITS, m assumed" + actual_flags = actual.iloc[347590]["QA_flag"].split("; ") assert actual_flags[1] == expected_flag # Should be assessed 1st (flag 0) # Check value unchanged for missing units # TODO: values would stay the same (no conversion), but this example is nan # Inspect specific result - where value missing - assert str(actual.iloc[347590][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[347590][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered values - expected_flag = 'ResultMeasureValue: missing (NaN) result' + expected_flag = "ResultMeasureValue: missing (NaN) result" assert actual_flags[0] == expected_flag # Inspect specific result - un-usable non-numeric values - assert actual.iloc[347589][orig_val_col] == 'Not Reported' + assert actual.iloc[347589][orig_val_col] == "Not Reported" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "Not Reported" result cannot be used' - assert actual.iloc[347589]['QA_flag'] == expected_flag + assert actual.iloc[347589]["QA_flag"] == expected_flag -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_harmonize_DO(): """ Test function standardizes Dissolved oxygen (DO) results correctly @@ -482,63 +504,69 @@ def test_harmonize_DO(): NARROW_RESULTS1 : pandas.DataFrame Read from data/wqp_results1.txt. """ - actual = harmonize.harmonize(NARROW_RESULTS1, 'Dissolved oxygen (DO)') + actual = harmonize.harmonize(NARROW_RESULTS1, "Dissolved oxygen (DO)") # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type assert actual.size == 11818094 # Test size - assert 'DO' in actual.columns # Check for column - assert len(actual['DO'].dropna()) == 278395 # Number of results + assert "DO" in actual.columns # Check for column + assert len(actual["DO"].dropna()) == 278395 # Number of results # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(NARROW_RESULTS1[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(NARROW_RESULTS1[orig_unit_col]) # Inspect specific result - where units are not converted - assert actual.iloc[0][orig_unit_col] == 'mg/l' # Confirm orig unit - expected_unit = 'milligram / liter' # Desired units - assert str(actual.iloc[0]['DO'].units) == expected_unit + assert actual.iloc[0][orig_unit_col] == "mg/l" # Confirm orig unit + expected_unit = "milligram / liter" # Desired units + assert str(actual.iloc[0]["DO"].units) == expected_unit expected_val = float(actual.iloc[0][orig_val_col]) # Original value - assert actual.iloc[0]['DO'].magnitude == expected_val # Unchanged + assert actual.iloc[0]["DO"].magnitude == expected_val # Unchanged # Inspect specific result - where units converted - assert actual.iloc[4][orig_unit_col] == '%' # Confirm orig unit - assert str(actual.iloc[4]['DO'].units) == expected_unit - assert actual.iloc[4][orig_val_col] == '68.7' # Confirm original measure - assert actual.iloc[4]['DO'].magnitude == 0.05676222371166 + assert actual.iloc[4][orig_unit_col] == "%" # Confirm orig unit + assert str(actual.iloc[4]["DO"].units) == expected_unit + assert actual.iloc[4][orig_val_col] == "68.7" # Confirm original measure + assert actual.iloc[4]["DO"].magnitude == 0.05676222371166 # TODO: add tests for 99637 in ppm? Currently ppm == mg/l - + # TODO: add tests at different pressure and temperature - actual_p2 = str(convert.DO_saturation(70, '0.5 standard_atmosphere')) - expected_p2 = '2.7994178481769043 milligram / liter' + actual_p2 = str(convert.DO_saturation(70, "0.5 standard_atmosphere")) + expected_p2 = "2.7994178481769043 milligram / liter" assert actual_p2 == expected_p2 from harmonize_wq.convert import u_reg - actual_p2 = str(convert.DO_concentration('0.7 milligram / liter', - '2 standard_atmosphere', - u_reg.Quantity(32, u_reg("degC")))) - expected_p2 = '4.681314214558987' + + actual_p2 = str( + convert.DO_concentration( + "0.7 milligram / liter", + "2 standard_atmosphere", + u_reg.Quantity(32, u_reg("degC")), + ) + ) + expected_p2 = "4.681314214558987" assert actual_p2 == expected_p2 - + # Inspect specific result - where units missing - assert str(actual.iloc[6816][orig_unit_col]) == 'nan' # Confirm missing + assert str(actual.iloc[6816][orig_unit_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasure/MeasureUnitCode: MISSING UNITS, mg/l assumed' - actual_flags = actual.iloc[6816]['QA_flag'].split('; ') + expected_flag = "ResultMeasure/MeasureUnitCode: MISSING UNITS, mg/l assumed" + actual_flags = actual.iloc[6816]["QA_flag"].split("; ") assert actual_flags[1] == expected_flag # Check value unchanged for missing units - # TODO: values would stay the same (no conversion), but this example is '*Not Reported' + # TODO: values would stay the same (no conversion), but this example is + # '*Not Reported' # Inspect specific result - where value missing - assert str(actual.iloc[130784][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[130784][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered values - expected_flag = 'ResultMeasureValue: missing (NaN) result' - assert actual.iloc[130784]['QA_flag'].split('; ')[0] == expected_flag + expected_flag = "ResultMeasureValue: missing (NaN) result" + assert actual.iloc[130784]["QA_flag"].split("; ")[0] == expected_flag # Inspect specific result - un-usable non-numeric values - assert actual.iloc[6816][orig_val_col] == '*Not Reported' + assert actual.iloc[6816][orig_val_col] == "*Not Reported" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "*Not Reported" result cannot be used' - assert actual.iloc[6816]['QA_flag'].split('; ')[0] == expected_flag + assert actual.iloc[6816]["QA_flag"].split("; ")[0] == expected_flag -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_harmonize_salinity(): """ Test function standardizes Salinity results correctly @@ -550,61 +578,63 @@ def test_harmonize_salinity(): NARROW_RESULTS2 : pandas.DataFrame Read from data/wqp_results2.txt. """ - actual = harmonize.harmonize(NARROW_RESULTS2, 'Salinity', units_out='PSS') + actual = harmonize.harmonize(NARROW_RESULTS2, "Salinity", units_out="PSS") # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type assert actual.size == 12181392 # Test size - assert 'Salinity' in actual.columns # Check for column - assert len(actual['Salinity'].dropna()) == 185562 # Number of results + assert "Salinity" in actual.columns # Check for column + assert len(actual["Salinity"].dropna()) == 185562 # Number of results # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(NARROW_RESULTS2[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(NARROW_RESULTS2[orig_unit_col]) # Inspect specific result - where units are not converted - assert actual.iloc[3][orig_unit_col] == 'PSS' # Confirm orig unit - expected_unit = 'Practical_Salinity_Units' # Desired units - assert str(actual.iloc[3]['Salinity'].units) == expected_unit + assert actual.iloc[3][orig_unit_col] == "PSS" # Confirm orig unit + expected_unit = "Practical_Salinity_Units" # Desired units + assert str(actual.iloc[3]["Salinity"].units) == expected_unit expected_val = float(actual.iloc[3][orig_val_col]) # Original value - assert actual.iloc[3]['Salinity'].magnitude == expected_val # Unchanged + assert actual.iloc[3]["Salinity"].magnitude == expected_val # Unchanged # Inspect specific result - where units converted (ptth) - assert actual.iloc[0][orig_unit_col] == 'ppth' # Confirm orig unit - assert str(actual.iloc[0]['Salinity'].units) == expected_unit - assert actual.iloc[0][orig_val_col] == '40' # Confirm original measure - assert actual.iloc[0]['Salinity'].magnitude == 40 + assert actual.iloc[0][orig_unit_col] == "ppth" # Confirm orig unit + assert str(actual.iloc[0]["Salinity"].units) == expected_unit + assert actual.iloc[0][orig_val_col] == "40" # Confirm original measure + assert actual.iloc[0]["Salinity"].magnitude == 40 # Inspect specific result - where units converted (mg/ml) # TODO: need a different test value (something weird here) - assert actual.iloc[335435][orig_unit_col] == 'mg/mL @25C' # Confirm unit - assert str(actual.iloc[335435]['Salinity'].units) + assert actual.iloc[335435][orig_unit_col] == "mg/mL @25C" # Confirm unit + assert str(actual.iloc[335435]["Salinity"].units) assert actual.iloc[335435][orig_val_col] == 120.0 # Confirm measure - assert actual.iloc[335435]['Salinity'].magnitude == 125.28127999999992 - psu_example = str(actual.iloc[335435]['Salinity']) + assert actual.iloc[335435]["Salinity"].magnitude == 125.28127999999992 + psu_example = str(actual.iloc[335435]["Salinity"]) # Inspect specific result - where units missing - assert str(actual.iloc[21277][orig_unit_col]) == 'nan' # Confirm missing + assert str(actual.iloc[21277][orig_unit_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasure/MeasureUnitCode: MISSING UNITS, PSS assumed' - actual_flags = actual.iloc[21277]['QA_flag'].split('; ') + expected_flag = "ResultMeasure/MeasureUnitCode: MISSING UNITS, PSS assumed" + actual_flags = actual.iloc[21277]["QA_flag"].split("; ") assert actual_flags[1] == expected_flag # Check value unchagned for missing units - # TODO: values would stay the same (no conversion), but this example is '*Not Reported' + # TODO: values would stay the same (no conversion), but this example is + # '*Not Reported' # Inspect specific result - where value missing - assert str(actual.iloc[69781][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[69781][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered values - expected_flag = 'ResultMeasureValue: missing (NaN) result' - assert actual.iloc[69781]['QA_flag'].split('; ')[0] == expected_flag + expected_flag = "ResultMeasureValue: missing (NaN) result" + assert actual.iloc[69781]["QA_flag"].split("; ")[0] == expected_flag # Inspect specific result - un-usable non-numeric values - assert actual.iloc[21277][orig_val_col] == '*Not Reported' + assert actual.iloc[21277][orig_val_col] == "*Not Reported" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "*Not Reported" result cannot be used' - assert actual.iloc[21277]['QA_flag'].split('; ')[0] == expected_flag + assert actual.iloc[21277]["QA_flag"].split("; ")[0] == expected_flag # Backward test PSU to density density = convert.PSU_to_density(psu_example) - assert str(density) == '997.1428971400308 milligram / milliliter' + assert str(density) == "997.1428971400308 milligram / milliliter" + -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_harmonize_pH(): """ Test function standardizes pH results correctly @@ -615,51 +645,53 @@ def test_harmonize_pH(): Read from data/wqp_results2.txt. """ # actual1 = harmonize.harmonize_pH(NARROW_RESULTS2, units='dimensionless') - actual = harmonize.harmonize(NARROW_RESULTS2, 'pH') + actual = harmonize.harmonize(NARROW_RESULTS2, "pH") # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type assert actual.size == 12181392 # Test size - assert 'pH' in actual.columns # Check for column - assert len(actual['pH'].dropna()) == 152314 # Number of results + assert "pH" in actual.columns # Check for column + assert len(actual["pH"].dropna()) == 152314 # Number of results # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(NARROW_RESULTS2[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(NARROW_RESULTS2[orig_unit_col]) # Inspect specific result - where units are not converted - assert actual.iloc[1][orig_unit_col] == 'None' # Confirm orig unit - expected_unit = 'dimensionless' # Desired units - assert str(actual.iloc[1]['pH'].units) == expected_unit + assert actual.iloc[1][orig_unit_col] == "None" # Confirm orig unit + expected_unit = "dimensionless" # Desired units + assert str(actual.iloc[1]["pH"].units) == expected_unit expected_val = float(actual.iloc[1][orig_val_col]) # Original value - assert actual.iloc[1]['pH'].magnitude == expected_val # Unchanged + assert actual.iloc[1]["pH"].magnitude == expected_val # Unchanged # Inspect specific result - where units converted - assert actual.iloc[1][orig_unit_col] == 'None' # Confirm orig unit - assert str(actual.iloc[1]['pH'].units) == expected_unit - assert actual.iloc[1][orig_val_col] == '8.18' # Confirm original measure - assert actual.iloc[1]['pH'].magnitude == 8.18 + assert actual.iloc[1][orig_unit_col] == "None" # Confirm orig unit + assert str(actual.iloc[1]["pH"].units) == expected_unit + assert actual.iloc[1][orig_val_col] == "8.18" # Confirm original measure + assert actual.iloc[1]["pH"].magnitude == 8.18 # Inspect specific result - where units missing - assert str(actual.iloc[195644][orig_unit_col]) == 'nan' # Confirm missing + assert str(actual.iloc[195644][orig_unit_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasure/MeasureUnitCode: MISSING UNITS, dimensionless assumed' - actual_flags = actual.iloc[195644]['QA_flag'].split('; ') + expected_flag = ( + "ResultMeasure/MeasureUnitCode: MISSING UNITS, dimensionless assumed" + ) + actual_flags = actual.iloc[195644]["QA_flag"].split("; ") assert actual_flags[0] == expected_flag # Check value unchanged for missing units expected_val = float(actual.iloc[195644][orig_val_col]) # Original value - assert actual.iloc[195644]['pH'].magnitude == expected_val # Unchanged + assert actual.iloc[195644]["pH"].magnitude == expected_val # Unchanged # Inspect specific result - where value missing - assert str(actual.iloc[77966][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[77966][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered values - expected_flag = 'ResultMeasureValue: missing (NaN) result' - assert actual.iloc[77966]['QA_flag'].split('; ')[0] == expected_flag + expected_flag = "ResultMeasureValue: missing (NaN) result" + assert actual.iloc[77966]["QA_flag"].split("; ")[0] == expected_flag # Inspect specific result - un-usable non-numeric values - assert actual.iloc[2641][orig_val_col] == '*Not Reported' + assert actual.iloc[2641][orig_val_col] == "*Not Reported" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "*Not Reported" result cannot be used' - assert actual.iloc[2641]['QA_flag'].split('; ')[0] == expected_flag + assert actual.iloc[2641]["QA_flag"].split("; ")[0] == expected_flag -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_harmonize_nitrogen(): """ Test function standardizes Nitrogen results correctly @@ -670,58 +702,59 @@ def test_harmonize_nitrogen(): Read from data/wqp_results3.txt. """ # actual1 = harmonize.harmonize_Nitrogen(NARROW_RESULTS3, units='mg/l') - actual = harmonize.harmonize(NARROW_RESULTS3, 'Nitrogen') + actual = harmonize.harmonize(NARROW_RESULTS3, "Nitrogen") # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type - assert actual.size == 16728 # Test size - assert 'Nitrogen' in actual.columns # Check for column - assert len(actual['Nitrogen'].dropna()) == 182 # Number of results + assert actual.size == 16728 # Test size + assert "Nitrogen" in actual.columns # Check for column + assert len(actual["Nitrogen"].dropna()) == 182 # Number of results # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(NARROW_RESULTS3[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(NARROW_RESULTS3[orig_unit_col]) # Inspect specific result - where units are not converted - assert actual.iloc[55][orig_unit_col] == 'mg/l' # Confirm orig unit - expected_unit = 'milligram / liter' # Desired units - assert str(actual.iloc[55]['Nitrogen'].units) == expected_unit + assert actual.iloc[55][orig_unit_col] == "mg/l" # Confirm orig unit + expected_unit = "milligram / liter" # Desired units + assert str(actual.iloc[55]["Nitrogen"].units) == expected_unit expected_val = float(actual.iloc[55][orig_val_col]) # Original value - assert actual.iloc[55]['Nitrogen'].magnitude == expected_val # Unchanged + assert actual.iloc[55]["Nitrogen"].magnitude == expected_val # Unchanged # Inspect specific result - where units converted - assert actual.iloc[245][orig_unit_col] == 'g/m**3' # Confirm orig unit - assert str(actual.iloc[245]['Nitrogen'].units) == expected_unit - assert actual.iloc[245][orig_val_col] == '1' # Confirm original measure - assert actual.iloc[245]['Nitrogen'].magnitude == 1.0000000000000002 + assert actual.iloc[245][orig_unit_col] == "g/m**3" # Confirm orig unit + assert str(actual.iloc[245]["Nitrogen"].units) == expected_unit + assert actual.iloc[245][orig_val_col] == "1" # Confirm original measure + assert actual.iloc[245]["Nitrogen"].magnitude == 1.0000000000000002 # Inspect specific result - where units missing - assert str(actual.iloc[211][orig_unit_col]) == 'nan' # Confirm missing + assert str(actual.iloc[211][orig_unit_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasure/MeasureUnitCode: MISSING UNITS, mg/l assumed' - actual_flags = actual.iloc[211]['QA_flag'].split('; ') + expected_flag = "ResultMeasure/MeasureUnitCode: MISSING UNITS, mg/l assumed" + actual_flags = actual.iloc[211]["QA_flag"].split("; ") assert actual_flags[1] == expected_flag # Check value unchagned for missing units # TODO: values would stay the same (no conversion), but this example is nan # Inspect specific result - where value missing - assert str(actual.iloc[211][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[211][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered values - expected_flag = 'ResultMeasureValue: missing (NaN) result' - assert actual.iloc[211]['QA_flag'].split('; ')[0] == expected_flag + expected_flag = "ResultMeasureValue: missing (NaN) result" + assert actual.iloc[211]["QA_flag"].split("; ")[0] == expected_flag # Inspect specific result - un-usable non-numeric values - assert actual.iloc[240][orig_val_col] == 'Not reported' + assert actual.iloc[240][orig_val_col] == "Not reported" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "Not reported" result cannot be used' - assert actual.iloc[240]['QA_flag'].split('; ')[0] == expected_flag + assert actual.iloc[240]["QA_flag"].split("; ")[0] == expected_flag # TODO: add test case where 'g/kg' # TODO: add test case where 'cm3/g @STP' # TODO: add test case where 'cm3/g STP' - + # check sample fraction, everything went to total mixed forms - assert len(actual['Nitrogen'].dropna()) == 182, "Fraction issue" - fract_col = 'TOTAL NITROGEN_ MIXED FORMS' + assert len(actual["Nitrogen"].dropna()) == 182, "Fraction issue" + fract_col = "TOTAL NITROGEN_ MIXED FORMS" assert len(actual[fract_col].dropna()) == 182, "Fraction issue" -#@pytest.mark.skip(reason="no change") + +# @pytest.mark.skip(reason="no change") def test_harmonize_conductivity(): """ Test function standardizes Conductivity results correctly @@ -731,52 +764,52 @@ def test_harmonize_conductivity(): NARROW_RESULTS3 : pandas.DataFrame Read from data/wqp_results3.txt. """ - #actual1 = harmonize.harmonize_Conductivity(NARROW_RESULTS3, units='uS/cm') - actual = harmonize.harmonize(NARROW_RESULTS3, 'Conductivity') + # actual1 = harmonize.harmonize_Conductivity(NARROW_RESULTS3, units='uS/cm') + actual = harmonize.harmonize(NARROW_RESULTS3, "Conductivity") # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type assert actual.size == 16236 # Test size - assert 'Conductivity' in actual.columns # Check for column - assert len(actual['Conductivity'].dropna()) == 59 # Number of results + assert "Conductivity" in actual.columns # Check for column + assert len(actual["Conductivity"].dropna()) == 59 # Number of results # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(NARROW_RESULTS3[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(NARROW_RESULTS3[orig_unit_col]) # Inspect specific result - where units are not converted - assert actual.iloc[79][orig_unit_col] == 'uS/cm' # Confirm orig unit - expected_unit = 'microsiemens / centimeter' # Desired units - assert str(actual.iloc[79]['Conductivity'].units) == expected_unit + assert actual.iloc[79][orig_unit_col] == "uS/cm" # Confirm orig unit + expected_unit = "microsiemens / centimeter" # Desired units + assert str(actual.iloc[79]["Conductivity"].units) == expected_unit expected_val = float(actual.iloc[79][orig_val_col]) # Original value - assert actual.iloc[79]['Conductivity'].magnitude == expected_val # Unchanged + assert actual.iloc[79]["Conductivity"].magnitude == expected_val # Unchanged # Inspect specific result - where units converted - assert actual.iloc[244][orig_unit_col] == 'mS/cm' # Confirm orig unit - assert str(actual.iloc[244]['Conductivity'].units) == expected_unit - assert actual.iloc[244][orig_val_col] == '1' # Confirm original measure - assert actual.iloc[244]['Conductivity'].magnitude == 1000.0 + assert actual.iloc[244][orig_unit_col] == "mS/cm" # Confirm orig unit + assert str(actual.iloc[244]["Conductivity"].units) == expected_unit + assert actual.iloc[244][orig_val_col] == "1" # Confirm original measure + assert actual.iloc[244]["Conductivity"].magnitude == 1000.0 # Inspect specific result - where units missing - assert str(actual.iloc[241][orig_unit_col]) == 'nan' # Confirm missing + assert str(actual.iloc[241][orig_unit_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasure/MeasureUnitCode: MISSING UNITS, uS/cm assumed' - actual_flags = actual.iloc[241]['QA_flag'] + expected_flag = "ResultMeasure/MeasureUnitCode: MISSING UNITS, uS/cm assumed" + actual_flags = actual.iloc[241]["QA_flag"] assert actual_flags == expected_flag # Check value unchagned for missing units expected_val = float(actual.iloc[241][orig_val_col]) # Original value - assert actual.iloc[241]['Conductivity'].magnitude == expected_val # Unchanged + assert actual.iloc[241]["Conductivity"].magnitude == expected_val # Unchanged # Inspect specific result - where value missing - assert str(actual.iloc[242][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[242][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered values - expected_flag = 'ResultMeasureValue: missing (NaN) result' - assert actual.iloc[242]['QA_flag'].split('; ')[0] == expected_flag + expected_flag = "ResultMeasureValue: missing (NaN) result" + assert actual.iloc[242]["QA_flag"].split("; ")[0] == expected_flag # Inspect specific result - un-usable non-numeric values - assert actual.iloc[243][orig_val_col] == 'Not Reported' + assert actual.iloc[243][orig_val_col] == "Not Reported" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "Not Reported" result cannot be used' - assert actual.iloc[243]['QA_flag'].split('; ')[0] == expected_flag + assert actual.iloc[243]["QA_flag"].split("; ")[0] == expected_flag -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_harmonize_carbon_organic(): """ Test function standardizes Organic carbon results correctly @@ -786,56 +819,56 @@ def test_harmonize_carbon_organic(): NARROW_RESULTS4 : pandas.DataFrame Read from data/wqp_results4.txt. """ - #actual1 = harmonize.harmonize_Carbon_organic(NARROW_RESULTS4, units='mg/l') - #actual2 = harmonize.harmonize_Carbon_organic(NARROW_RESULTS4, units='g/kg') - actual = harmonize.harmonize(NARROW_RESULTS4, 'Organic carbon') + # actual1 = harmonize.harmonize_Carbon_organic(NARROW_RESULTS4, units='mg/l') + # actual2 = harmonize.harmonize_Carbon_organic(NARROW_RESULTS4, units='g/kg') + actual = harmonize.harmonize(NARROW_RESULTS4, "Organic carbon") # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type assert actual.size == 6906695 # Test size - assert 'Carbon' in actual.columns # Check for column - assert len(actual['Carbon'].dropna()) == 30631 # Number of results + assert "Carbon" in actual.columns # Check for column + assert len(actual["Carbon"].dropna()) == 30631 # Number of results # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(NARROW_RESULTS4[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(NARROW_RESULTS4[orig_unit_col]) # Inspect specific result - where units are not converted - assert actual.iloc[1][orig_unit_col] == 'mg/l' # Confirm orig unit - expected_unit = 'milligram / liter' # Desired units - assert str(actual.iloc[1]['Carbon'].units) == expected_unit + assert actual.iloc[1][orig_unit_col] == "mg/l" # Confirm orig unit + expected_unit = "milligram / liter" # Desired units + assert str(actual.iloc[1]["Carbon"].units) == expected_unit expected_val = float(actual.iloc[1][orig_val_col]) # Original value - assert actual.iloc[1]['Carbon'].magnitude == expected_val # Unchanged + assert actual.iloc[1]["Carbon"].magnitude == expected_val # Unchanged # Inspect specific result - where units converted - assert actual.iloc[355][orig_unit_col] == '%' # Confirm orig unit - assert str(actual.iloc[355]['Carbon'].units) == expected_unit - assert actual.iloc[355][orig_val_col] == '0.1' # Confirm original measure - assert actual.iloc[355]['Carbon'].magnitude == 1000.0 + assert actual.iloc[355][orig_unit_col] == "%" # Confirm orig unit + assert str(actual.iloc[355]["Carbon"].units) == expected_unit + assert actual.iloc[355][orig_val_col] == "0.1" # Confirm original measure + assert actual.iloc[355]["Carbon"].magnitude == 1000.0 # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasure/MeasureUnitCode: MISSING UNITS, mg/l assumed' - actual_flags = actual.iloc[103082]['QA_flag'] + expected_flag = "ResultMeasure/MeasureUnitCode: MISSING UNITS, mg/l assumed" + actual_flags = actual.iloc[103082]["QA_flag"] assert actual_flags == expected_flag # Check value unchagned for missing units expected_val = float(actual.iloc[103082][orig_val_col]) # Original value - assert actual.iloc[103082]['Carbon'].magnitude == expected_val # Unchanged + assert actual.iloc[103082]["Carbon"].magnitude == expected_val # Unchanged # Inspect specific result - where value missing - assert str(actual.iloc[22044][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[22044][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered values - expected_flag = 'ResultMeasureValue: missing (NaN) result' - assert actual.iloc[22044]['QA_flag'].split('; ')[0] == expected_flag + expected_flag = "ResultMeasureValue: missing (NaN) result" + assert actual.iloc[22044]["QA_flag"].split("; ")[0] == expected_flag # Inspect specific result - un-usable non-numeric values - assert actual.iloc[0][orig_val_col] == '*Non-detect' + assert actual.iloc[0][orig_val_col] == "*Non-detect" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "*Non-detect" result cannot be used' - assert actual.iloc[0]['QA_flag'].split('; ')[0] == expected_flag + assert actual.iloc[0]["QA_flag"].split("; ")[0] == expected_flag # Moles test - assert actual.iloc[103084][orig_unit_col] == 'umol' # Confirm orig unit + assert actual.iloc[103084][orig_unit_col] == "umol" # Confirm orig unit float(actual.iloc[103084][orig_val_col]) # Confirm original value - assert str(actual.iloc[103084]['Carbon'].units) == expected_unit - assert actual.iloc[103084]['Carbon'].magnitude == 0.0477424 + assert str(actual.iloc[103084]["Carbon"].units) == expected_unit + assert actual.iloc[103084]["Carbon"].magnitude == 0.0477424 -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_harmonize_chlorophyll_a(): """ Test function standardizes Chlorophyll a results correctly @@ -845,50 +878,50 @@ def test_harmonize_chlorophyll_a(): NARROW_RESULTS4 : pandas.DataFrame Read from data/wqp_results4.txt. """ - actual = harmonize.harmonize(NARROW_RESULTS4, 'Chlorophyll a') + actual = harmonize.harmonize(NARROW_RESULTS4, "Chlorophyll a") # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type assert actual.size == 6803610 # Test size - assert 'Chlorophyll' in actual.columns # Check for column - assert len(actual['Chlorophyll'].dropna()) == 68201 # Number of results + assert "Chlorophyll" in actual.columns # Check for column + assert len(actual["Chlorophyll"].dropna()) == 68201 # Number of results # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(NARROW_RESULTS4[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(NARROW_RESULTS4[orig_unit_col]) # Inspect specific result - where units are not converted - assert actual.iloc[47190][orig_unit_col] == 'mg/l' # Confirm orig unit - expected_unit = 'milligram / liter' # Desired units - assert str(actual.iloc[47190]['Chlorophyll'].units) == expected_unit + assert actual.iloc[47190][orig_unit_col] == "mg/l" # Confirm orig unit + expected_unit = "milligram / liter" # Desired units + assert str(actual.iloc[47190]["Chlorophyll"].units) == expected_unit expected_val = float(actual.iloc[47190][orig_val_col]) # Original value - assert actual.iloc[47190]['Chlorophyll'].magnitude == expected_val # Unchanged + assert actual.iloc[47190]["Chlorophyll"].magnitude == expected_val # Unchanged # Inspect specific result - where units converted - assert actual.iloc[345][orig_unit_col] == 'ug/l' # Confirm orig unit - assert str(actual.iloc[345]['Chlorophyll'].units) == expected_unit - assert actual.iloc[345][orig_val_col] == '2.28' # Confirm original measure - assert actual.iloc[345]['Chlorophyll'].magnitude == 0.00228 + assert actual.iloc[345][orig_unit_col] == "ug/l" # Confirm orig unit + assert str(actual.iloc[345]["Chlorophyll"].units) == expected_unit + assert actual.iloc[345][orig_val_col] == "2.28" # Confirm original measure + assert actual.iloc[345]["Chlorophyll"].magnitude == 0.00228 # Inspect specific result - where units missing - assert str(actual.iloc[12618][orig_unit_col]) == 'nan' # Confirm missing + assert str(actual.iloc[12618][orig_unit_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasure/MeasureUnitCode: MISSING UNITS, mg/l assumed' - actual_flags = actual.iloc[12618]['QA_flag'] + expected_flag = "ResultMeasure/MeasureUnitCode: MISSING UNITS, mg/l assumed" + actual_flags = actual.iloc[12618]["QA_flag"] assert actual_flags == expected_flag # Check value unchagned for missing units expected_val = float(actual.iloc[12618][orig_val_col]) # Original value - assert actual.iloc[12618]['Chlorophyll'].magnitude == expected_val # Unchanged + assert actual.iloc[12618]["Chlorophyll"].magnitude == expected_val # Unchanged # Inspect specific result - where value missing - assert str(actual.iloc[947][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[947][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered values - expected_flag = 'ResultMeasureValue: missing (NaN) result' - assert actual.iloc[947]['QA_flag'].split('; ')[0] == expected_flag + expected_flag = "ResultMeasureValue: missing (NaN) result" + assert actual.iloc[947]["QA_flag"].split("; ")[0] == expected_flag # Inspect specific result - un-usable non-numeric values assert actual.iloc[103081][orig_val_col] == "Not Reported" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "Not Reported" result cannot be used' - assert actual.iloc[103081]['QA_flag'].split('; ')[0] == expected_flag + assert actual.iloc[103081]["QA_flag"].split("; ")[0] == expected_flag -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_harmonize_turbidity(): """ Test function standardizes Turbidity results correctly @@ -900,67 +933,69 @@ def test_harmonize_turbidity(): NARROW_RESULTS5 : pandas.DataFrame Read from data/wqp_results5.txt. """ - actual = harmonize.harmonize(NARROW_RESULTS5, 'Turbidity') + actual = harmonize.harmonize(NARROW_RESULTS5, "Turbidity") # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type assert actual.size == 8628100 # Test size - assert 'Turbidity' in actual.columns # Check for column - assert len(actual['Turbidity'].dropna()) == 131013 # Number of results + assert "Turbidity" in actual.columns # Check for column + assert len(actual["Turbidity"].dropna()) == 131013 # Number of results # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(NARROW_RESULTS5[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(NARROW_RESULTS5[orig_unit_col]) # Inspect specific result - where units are not converted - assert actual.iloc[1][orig_unit_col] == 'NTU' # Confirm orig unit - expected_unit = 'Nephelometric_Turbidity_Units' # Desired units - assert str(actual.iloc[1]['Turbidity'].units) == expected_unit + assert actual.iloc[1][orig_unit_col] == "NTU" # Confirm orig unit + expected_unit = "Nephelometric_Turbidity_Units" # Desired units + assert str(actual.iloc[1]["Turbidity"].units) == expected_unit expected_val = float(actual.iloc[1][orig_val_col]) # Original value - assert actual.iloc[1]['Turbidity'].magnitude == expected_val # Unchanged + assert actual.iloc[1]["Turbidity"].magnitude == expected_val # Unchanged # Inspect specific result - where units converted - assert actual.iloc[58433][orig_unit_col] == 'cm' # Confirm orig unit - assert str(actual.iloc[58433]['Turbidity'].units) == expected_unit - assert actual.iloc[58433][orig_val_col] == '60' # Confirm original measure - assert actual.iloc[58433]['Turbidity'].magnitude == 8.17455929421168 #16.046015096322353 + assert actual.iloc[58433][orig_unit_col] == "cm" # Confirm orig unit + assert str(actual.iloc[58433]["Turbidity"].units) == expected_unit + assert actual.iloc[58433][orig_val_col] == "60" # Confirm original measure + assert ( + actual.iloc[58433]["Turbidity"].magnitude == 8.17455929421168 + ) # 16.046015096322353 # JTU -> NTU - assert actual.iloc[100158][orig_unit_col] == 'JTU' # Confirm orig unit - assert str(actual.iloc[100158]['Turbidity'].units) == expected_unit + assert actual.iloc[100158][orig_unit_col] == "JTU" # Confirm orig unit + assert str(actual.iloc[100158]["Turbidity"].units) == expected_unit assert actual.iloc[100158][orig_val_col] == 5.0 # Confirm original measure - assert actual.iloc[100158]['Turbidity'].magnitude == 95.0773 + assert actual.iloc[100158]["Turbidity"].magnitude == 95.0773 # mg/l SiO2 -> NTU - assert actual.iloc[126494][orig_unit_col] == 'mg/l SiO2' # Original unit - assert str(actual.iloc[126494]['Turbidity'].units) == expected_unit - assert actual.iloc[126494][orig_val_col] == '4.0' # Confirm original measure - assert actual.iloc[126494]['Turbidity'].magnitude == 30.378500000000003 + assert actual.iloc[126494][orig_unit_col] == "mg/l SiO2" # Original unit + assert str(actual.iloc[126494]["Turbidity"].units) == expected_unit + assert actual.iloc[126494][orig_val_col] == "4.0" # Confirm original measure + assert actual.iloc[126494]["Turbidity"].magnitude == 30.378500000000003 # NTRU == NTU - assert actual.iloc[124849][orig_unit_col] == 'NTRU' # Confirm orig unit - assert str(actual.iloc[124849]['Turbidity'].units) == expected_unit - assert actual.iloc[124849][orig_val_col] == '0.7' # Confirm original measure - assert actual.iloc[124849]['Turbidity'].magnitude == 0.7 + assert actual.iloc[124849][orig_unit_col] == "NTRU" # Confirm orig unit + assert str(actual.iloc[124849]["Turbidity"].units) == expected_unit + assert actual.iloc[124849][orig_val_col] == "0.7" # Confirm original measure + assert actual.iloc[124849]["Turbidity"].magnitude == 0.7 # Inspect specific result - where units missing - assert str(actual.iloc[132736][orig_unit_col]) == 'nan' # Confirm missing + assert str(actual.iloc[132736][orig_unit_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasure/MeasureUnitCode: MISSING UNITS, NTU assumed' - actual_flags = actual.iloc[132736]['QA_flag'].split('; ') + expected_flag = "ResultMeasure/MeasureUnitCode: MISSING UNITS, NTU assumed" + actual_flags = actual.iloc[132736]["QA_flag"].split("; ") assert actual_flags[0] == expected_flag # Check value unchagned for missing units expected_val = float(actual.iloc[132736][orig_val_col]) # Original value - assert actual.iloc[132736]['Turbidity'].magnitude == expected_val # Unchanged + assert actual.iloc[132736]["Turbidity"].magnitude == expected_val # Unchanged # Inspect specific result - where value missing - assert str(actual.iloc[19988][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[19988][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered values - expected_flag = 'ResultMeasureValue: missing (NaN) result' - assert actual.iloc[19988]['QA_flag'].split('; ')[0] == expected_flag + expected_flag = "ResultMeasureValue: missing (NaN) result" + assert actual.iloc[19988]["QA_flag"].split("; ")[0] == expected_flag # Inspect specific result - un-usable non-numeric values - assert actual.iloc[42][orig_val_col] == '*Not Reported' + assert actual.iloc[42][orig_val_col] == "*Not Reported" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "*Not Reported" result cannot be used' - assert actual.iloc[42]['QA_flag'].split('; ')[0] == expected_flag + assert actual.iloc[42]["QA_flag"].split("; ")[0] == expected_flag -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_harmonize_sediment(): """ Test function standardizes Sediment results correctly @@ -977,53 +1012,51 @@ def test_harmonize_sediment(): NARROW_RESULTS5 : pandas.DataFrame Read from data/wqp_results5.txt. """ - actual = harmonize.harmonize(NARROW_RESULTS5, - char_val='Sediment', - units_out='g/kg') + actual = harmonize.harmonize(NARROW_RESULTS5, char_val="Sediment", units_out="g/kg") # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type assert actual.size == 8628100 # Test size - assert 'Sediment' in actual.columns # Check for column - assert len(actual['Sediment'].dropna()) == 37 # Number of results + assert "Sediment" in actual.columns # Check for column + assert len(actual["Sediment"].dropna()) == 37 # Number of results # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(NARROW_RESULTS5[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(NARROW_RESULTS5[orig_unit_col]) # Inspect specific result - where units are not converted - assert actual.iloc[132737][orig_unit_col] == 'g/kg' # Confirm orig unit - expected_unit = 'gram / kilogram' # Desired units - assert str(actual.iloc[132737]['Sediment'].units) == expected_unit + assert actual.iloc[132737][orig_unit_col] == "g/kg" # Confirm orig unit + expected_unit = "gram / kilogram" # Desired units + assert str(actual.iloc[132737]["Sediment"].units) == expected_unit expected_val = float(actual.iloc[132737][orig_val_col]) # Original value - assert actual.iloc[132737]['Sediment'].magnitude == expected_val # Unchanged + assert actual.iloc[132737]["Sediment"].magnitude == expected_val # Unchanged # Inspect specific result - where units converted - assert actual.iloc[128909][orig_unit_col] == '%' # Confirm orig unit - assert str(actual.iloc[128909]['Sediment'].units) == expected_unit - assert actual.iloc[128909][orig_val_col] == '17' # Confirm original measure - assert actual.iloc[128909]['Sediment'].magnitude == 170.0 + assert actual.iloc[128909][orig_unit_col] == "%" # Confirm orig unit + assert str(actual.iloc[128909]["Sediment"].units) == expected_unit + assert actual.iloc[128909][orig_val_col] == "17" # Confirm original measure + assert actual.iloc[128909]["Sediment"].magnitude == 170.0 # Inspect specific result - where units missing - assert str(actual.iloc[132738][orig_unit_col]) == 'nan' # Confirm missing + assert str(actual.iloc[132738][orig_unit_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasure/MeasureUnitCode: MISSING UNITS, g/kg assumed' - actual_flags = actual.iloc[132738]['QA_flag'].split('; ') + expected_flag = "ResultMeasure/MeasureUnitCode: MISSING UNITS, g/kg assumed" + actual_flags = actual.iloc[132738]["QA_flag"].split("; ") assert actual_flags[0] == expected_flag # Check value unchagned for missing units expected_val = float(actual.iloc[132738][orig_val_col]) # Original value - assert actual.iloc[132738]['Sediment'].magnitude == expected_val # Unchanged + assert actual.iloc[132738]["Sediment"].magnitude == expected_val # Unchanged # Inspect specific result - where value missing - assert str(actual.iloc[126342][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[126342][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing value - expected_flag = 'ResultMeasureValue: missing (NaN) result' - assert actual.iloc[126342]['QA_flag'].split('; ')[0] == expected_flag + expected_flag = "ResultMeasureValue: missing (NaN) result" + assert actual.iloc[126342]["QA_flag"].split("; ")[0] == expected_flag # Inspect specific result - un-usable non-numeric values - assert actual.iloc[132739][orig_val_col] == 'Not Reported' + assert actual.iloc[132739][orig_val_col] == "Not Reported" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "Not Reported" result cannot be used' - assert actual.iloc[132739]['QA_flag'].split('; ')[0] == expected_flag + assert actual.iloc[132739]["QA_flag"].split("; ")[0] == expected_flag # TODO: add units mg/l -#@pytest.mark.skip(reason="not implemented") +# @pytest.mark.skip(reason="not implemented") def test_harmonize_phosphorus_plus(): """ Test function standardizes varied Phosphorus results correctly @@ -1034,7 +1067,8 @@ def test_harmonize_phosphorus_plus(): Read from data/wqp_results6.txt. """ -#@pytest.mark.skip(reason="not implemented") + +# @pytest.mark.skip(reason="not implemented") def test_harmonize_nitrogen_plus(): """ Test function standardizes varied Nitrogen results correctly @@ -1045,7 +1079,8 @@ def test_harmonize_nitrogen_plus(): Read from data/wqp_results6.txt. """ -#@pytest.mark.skip(reason="not implemented") + +# @pytest.mark.skip(reason="not implemented") def test_harmonize_sediment_plus(): """ Test function standardizes varied Sediment results correctly @@ -1057,7 +1092,7 @@ def test_harmonize_sediment_plus(): """ -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_harmonize_fecal_coliform(): """ Test function standardizes Fecal Coliform results correctly @@ -1067,50 +1102,50 @@ def test_harmonize_fecal_coliform(): NARROW_RESULTS7 : pandas.DataFrame Read from data/wqp_results7.txt. """ - actual = harmonize.harmonize(NARROW_RESULTS7, 'Fecal Coliform') + actual = harmonize.harmonize(NARROW_RESULTS7, "Fecal Coliform") # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type assert actual.size == 8778720 # Test size - assert 'Fecal_Coliform' in actual.columns # Check for column - assert len(actual['Fecal_Coliform'].dropna()) == 68264 # Number of results + assert "Fecal_Coliform" in actual.columns # Check for column + assert len(actual["Fecal_Coliform"].dropna()) == 68264 # Number of results # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(NARROW_RESULTS7[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(NARROW_RESULTS7[orig_unit_col]) # Inspect specific result - where units are not converted - assert actual.iloc[3][orig_unit_col] == 'cfu/100ml' # Confirm orig unit - expected_unit = 'Colony_Forming_Units / milliliter' # Desired units - assert str(actual.iloc[3]['Fecal_Coliform'].units) == expected_unit + assert actual.iloc[3][orig_unit_col] == "cfu/100ml" # Confirm orig unit + expected_unit = "Colony_Forming_Units / milliliter" # Desired units + assert str(actual.iloc[3]["Fecal_Coliform"].units) == expected_unit expected_val = float(actual.iloc[3][orig_val_col]) # Original value - assert actual.iloc[3]['Fecal_Coliform'].magnitude == expected_val # Unchanged + assert actual.iloc[3]["Fecal_Coliform"].magnitude == expected_val # Unchanged # Inspect specific result - where units converted - assert actual.iloc[0][orig_unit_col] == '#/100ml' # Confirm orig unit - assert str(actual.iloc[0]['Fecal_Coliform'].units) == expected_unit - assert actual.iloc[0][orig_val_col] == '2' # Confirm original measure - assert actual.iloc[0]['Fecal_Coliform'].magnitude == 2.0 + assert actual.iloc[0][orig_unit_col] == "#/100ml" # Confirm orig unit + assert str(actual.iloc[0]["Fecal_Coliform"].units) == expected_unit + assert actual.iloc[0][orig_val_col] == "2" # Confirm original measure + assert actual.iloc[0]["Fecal_Coliform"].magnitude == 2.0 # Inspect specific result - where units missing - assert str(actual.iloc[1][orig_unit_col]) == 'nan' # Confirm missing + assert str(actual.iloc[1][orig_unit_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasure/MeasureUnitCode: MISSING UNITS, CFU/(100ml) assumed' - actual_flags = actual.iloc[1]['QA_flag'].split('; ') + expected_flag = "ResultMeasure/MeasureUnitCode: MISSING UNITS, CFU/(100ml) assumed" + actual_flags = actual.iloc[1]["QA_flag"].split("; ") assert actual_flags[1] == expected_flag # Check value unchagned for missing units expected_val = float(actual.iloc[3][orig_val_col]) # Original value - assert actual.iloc[3]['Fecal_Coliform'].magnitude == expected_val # Unchanged + assert actual.iloc[3]["Fecal_Coliform"].magnitude == expected_val # Unchanged # Inspect specific result - where value missing - assert str(actual.iloc[1][orig_val_col]) == '*Non-detect' # Confirm missing + assert str(actual.iloc[1][orig_val_col]) == "*Non-detect" # Confirm missing # Confirm expected flag - for missing value expected_flag = 'ResultMeasureValue: "*Non-detect" result cannot be used; ResultMeasure/MeasureUnitCode: MISSING UNITS, CFU/(100ml) assumed' - assert actual.iloc[1]['QA_flag'] == expected_flag + assert actual.iloc[1]["QA_flag"] == expected_flag # Inspect specific result - un-usable non-numeric values - assert actual.iloc[75305][orig_val_col] == 'Not Reported' + assert actual.iloc[75305][orig_val_col] == "Not Reported" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "Not Reported" result cannot be used' - assert actual.iloc[75305]['QA_flag'].split('; ')[0] == expected_flag + assert actual.iloc[75305]["QA_flag"].split("; ")[0] == expected_flag -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_harmonize_E_Coli(): """ Test function standardizes Escherichia Coliform (E. Coli) results correctly @@ -1120,59 +1155,59 @@ def test_harmonize_E_Coli(): NARROW_RESULTS7 : pandas.DataFrame Read from data/wqp_results7.txt. """ - actual = harmonize.harmonize(NARROW_RESULTS7, 'Escherichia coli') + actual = harmonize.harmonize(NARROW_RESULTS7, "Escherichia coli") # Test that the dataframe has expected type, size, cols, and rows assert isinstance(actual, pandas.core.frame.DataFrame) # Test type assert actual.size == 8778720 # Test size - assert 'E_coli' in actual.columns # Check for column - assert len(actual['E_coli'].dropna()) == 7205 # Number of results + assert "E_coli" in actual.columns # Check for column + assert len(actual["E_coli"].dropna()) == 7205 # Number of results # Confirm orginal data was not altered - orig_val_col = 'ResultMeasureValue' # Values + orig_val_col = "ResultMeasureValue" # Values assert actual[orig_val_col].equals(NARROW_RESULTS7[orig_val_col]) - orig_unit_col = 'ResultMeasure/MeasureUnitCode' # Units + orig_unit_col = "ResultMeasure/MeasureUnitCode" # Units assert actual[orig_unit_col].equals(NARROW_RESULTS7[orig_unit_col]) # Inspect specific result - where units are not converted - assert actual.iloc[59267][orig_unit_col] == 'cfu/100ml' # Confirm orig unit - expected_unit = 'Colony_Forming_Units / milliliter' # Desired units - assert str(actual.iloc[59267]['E_coli'].units) == expected_unit + assert actual.iloc[59267][orig_unit_col] == "cfu/100ml" # Confirm orig unit + expected_unit = "Colony_Forming_Units / milliliter" # Desired units + assert str(actual.iloc[59267]["E_coli"].units) == expected_unit expected_val = float(actual.iloc[59267][orig_val_col]) # Original value - assert actual.iloc[59267]['E_coli'].magnitude == expected_val # Unchanged + assert actual.iloc[59267]["E_coli"].magnitude == expected_val # Unchanged # Inspect specific result - where units converted - assert actual.iloc[28804][orig_unit_col] == 'MPN/100ml' # Confirm orig unit - assert str(actual.iloc[28804]['E_coli'].units) == expected_unit - assert actual.iloc[28804][orig_val_col] == '7.3' # Confirm original measure - assert actual.iloc[28804]['E_coli'].magnitude == 7.3 + assert actual.iloc[28804][orig_unit_col] == "MPN/100ml" # Confirm orig unit + assert str(actual.iloc[28804]["E_coli"].units) == expected_unit + assert actual.iloc[28804][orig_val_col] == "7.3" # Confirm original measure + assert actual.iloc[28804]["E_coli"].magnitude == 7.3 # Inspect specific result - where units missing - assert str(actual.iloc[108916][orig_unit_col]) == 'nan' # Confirm missing + assert str(actual.iloc[108916][orig_unit_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered units - expected_flag = 'ResultMeasureValue: missing (NaN) result' - actual_flags = actual.iloc[108916]['QA_flag'].split('; ') + expected_flag = "ResultMeasureValue: missing (NaN) result" + actual_flags = actual.iloc[108916]["QA_flag"].split("; ") assert actual_flags[0] == expected_flag # Check value unchagned for missing units expected_val = float(actual.iloc[59267][orig_val_col]) # Original value - assert actual.iloc[59267]['E_coli'].magnitude == expected_val # Unchanged + assert actual.iloc[59267]["E_coli"].magnitude == expected_val # Unchanged # Inspect specific result - where value missing - assert str(actual.iloc[28805][orig_val_col]) == 'nan' # Confirm missing + assert str(actual.iloc[28805][orig_val_col]) == "nan" # Confirm missing # Confirm expected flag - for missing/infered values - expected_flag = 'ResultMeasureValue: missing (NaN) result' - assert actual.iloc[28805]['QA_flag'].split('; ')[0] == expected_flag + expected_flag = "ResultMeasureValue: missing (NaN) result" + assert actual.iloc[28805]["QA_flag"].split("; ")[0] == expected_flag # Inspect specific result - un-usable non-numeric values - assert actual.iloc[69168 ][orig_val_col] == '*Not Reported' + assert actual.iloc[69168][orig_val_col] == "*Not Reported" # Confirm expected flag - for un-usable value expected_flag = 'ResultMeasureValue: "*Not Reported" result cannot be used' - assert actual.iloc[69168 ]['QA_flag'].split('; ')[0] == expected_flag + assert actual.iloc[69168]["QA_flag"].split("; ")[0] == expected_flag -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_conductivity_to_PSU(harmonized_tables): - conductivity_series = harmonized_tables['Conductivity'].dropna() + conductivity_series = harmonized_tables["Conductivity"].dropna() # With wrapper it should have to be converted to string first conductivity_series_str = conductivity_series.apply(str) actual = conductivity_series_str.apply(convert.conductivity_to_PSU) # No loss of rows assert len(actual) == len(conductivity_series) # Check it is dimensionless - assert str(actual[0].units) == 'dimensionless' + assert str(actual[0].units) == "dimensionless" # Check conversion was accurate assert conductivity_series[0].magnitude == 111.0 assert actual[0].magnitude == 0.057 @@ -1180,43 +1215,44 @@ def test_conductivity_to_PSU(harmonized_tables): assert actual[244].magnitude == 0.493 -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_accept_methods(merged_tables): - actual = clean.methods_check(merged_tables, 'Phosphorus') + actual = clean.methods_check(merged_tables, "Phosphorus") actual.sort() # Order is inconsistent so it's sorted - expected = ['365.1', '365.3', '365.4', '4500-P-E', '4500-P-F'] + expected = ["365.1", "365.3", "365.4", "4500-P-E", "4500-P-F"] assert actual == expected -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_datetime(harmonized_tables): # Testit actual = clean.datetime(harmonized_tables) # Type for time field - assert isinstance(actual['Activity_datetime'][0], - pandas._libs.tslibs.timestamps.Timestamp) + assert isinstance( + actual["Activity_datetime"][0], pandas._libs.tslibs.timestamps.Timestamp + ) -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_split_col(harmonized_tables): # Testit with default QA actual_QA = wrangle.split_col(harmonized_tables) # Check for expected columns - assert 'QA_Nitrogen' in actual_QA.columns - assert 'QA_Conductivity' in actual_QA.columns - assert 'QA_flag' not in actual_QA.columns + assert "QA_Nitrogen" in actual_QA.columns + assert "QA_Conductivity" in actual_QA.columns + assert "QA_flag" not in actual_QA.columns # Testit with non-default column - col = 'ResultAnalyticalMethod/MethodIdentifier' - actual_methods = wrangle.split_col(harmonized_tables, col, 'MethodID') - assert 'MethodID_Nitrogen' in actual_methods.columns - assert 'MethodID_Conductivity' in actual_methods.columns + col = "ResultAnalyticalMethod/MethodIdentifier" + actual_methods = wrangle.split_col(harmonized_tables, col, "MethodID") + assert "MethodID_Nitrogen" in actual_methods.columns + assert "MethodID_Conductivity" in actual_methods.columns assert col not in actual_methods.columns # TODO: test when out_col is list (i.e., Phosphorus) -#@pytest.mark.skip(reason="no change") +# @pytest.mark.skip(reason="no change") def test_split_table(harmonized_tables): # Note: it will do datetime() as well actual_main, actual_chars = wrangle.split_table(harmonized_tables) @@ -1224,82 +1260,115 @@ def test_split_table(harmonized_tables): assert len(actual_main) == len(harmonized_tables) assert len(actual_chars) == len(harmonized_tables) # Check columns expected - expected = ['OrganizationIdentifier', 'OrganizationFormalName', - 'ActivityIdentifier', 'ProjectIdentifier', - 'MonitoringLocationIdentifier', - 'DetectionQuantitationLimitTypeName', - 'DetectionQuantitationLimitMeasure/MeasureValue', - 'DetectionQuantitationLimitMeasure/MeasureUnitCode', - 'ProviderName', 'QA_flag', 'Nitrogen', 'Speciation', - 'TOTAL NITROGEN_ MIXED FORMS', 'Conductivity', - 'Activity_datetime', 'Depth'] + expected = [ + "OrganizationIdentifier", + "OrganizationFormalName", + "ActivityIdentifier", + "ProjectIdentifier", + "MonitoringLocationIdentifier", + "DetectionQuantitationLimitTypeName", + "DetectionQuantitationLimitMeasure/MeasureValue", + "DetectionQuantitationLimitMeasure/MeasureUnitCode", + "ProviderName", + "QA_flag", + "Nitrogen", + "Speciation", + "TOTAL NITROGEN_ MIXED FORMS", + "Conductivity", + "Activity_datetime", + "Depth", + ] assert list(actual_main.columns) == expected - expected = ['ActivityStartDate', 'ActivityStartTime/Time', - 'ActivityStartTime/TimeZoneCode', - 'ResultDetectionConditionText', - 'MethodSpecificationName', 'CharacteristicName', - 'ResultSampleFractionText', 'ResultMeasureValue', - 'ResultMeasure/MeasureUnitCode', 'MeasureQualifierCode', - 'ResultStatusIdentifier', 'StatisticalBaseCode', - 'ResultValueTypeName', 'ResultWeightBasisText', - 'ResultTimeBasisText', 'ResultTemperatureBasisText', - 'ResultParticleSizeBasisText', 'PrecisionValue', - 'ResultCommentText', 'USGSPCode', - 'ResultDepthHeightMeasure/MeasureValue', - 'ResultDepthHeightMeasure/MeasureUnitCode', - 'ResultDepthAltitudeReferencePointText', - 'SubjectTaxonomicName', 'SampleTissueAnatomyName', - 'ResultAnalyticalMethod/MethodIdentifier', - 'ResultAnalyticalMethod/MethodIdentifierContext', - 'ResultAnalyticalMethod/MethodName', - 'MethodDescriptionText', 'LaboratoryName', - 'AnalysisStartDate', 'ResultLaboratoryCommentText', - 'ActivityTypeCode', 'ActivityMediaName', - 'ActivityMediaSubdivisionName', 'ActivityEndDate', - 'ActivityEndTime/Time', 'ActivityEndTime/TimeZoneCode', - 'ActivityDepthHeightMeasure/MeasureValue', - 'ActivityDepthHeightMeasure/MeasureUnitCode', - 'ActivityDepthAltitudeReferencePointText', - 'ActivityTopDepthHeightMeasure/MeasureValue', - 'ActivityTopDepthHeightMeasure/MeasureUnitCode', - 'ActivityBottomDepthHeightMeasure/MeasureValue', - 'ActivityBottomDepthHeightMeasure/MeasureUnitCode', - 'ActivityConductingOrganizationText', - 'ActivityCommentText', 'SampleAquifer', - 'HydrologicCondition', 'HydrologicEvent', - 'SampleCollectionMethod/MethodIdentifier', - 'SampleCollectionMethod/MethodIdentifierContext', - 'SampleCollectionMethod/MethodName', - 'SampleCollectionEquipmentName', 'PreparationStartDate', ] + expected = [ + "ActivityStartDate", + "ActivityStartTime/Time", + "ActivityStartTime/TimeZoneCode", + "ResultDetectionConditionText", + "MethodSpecificationName", + "CharacteristicName", + "ResultSampleFractionText", + "ResultMeasureValue", + "ResultMeasure/MeasureUnitCode", + "MeasureQualifierCode", + "ResultStatusIdentifier", + "StatisticalBaseCode", + "ResultValueTypeName", + "ResultWeightBasisText", + "ResultTimeBasisText", + "ResultTemperatureBasisText", + "ResultParticleSizeBasisText", + "PrecisionValue", + "ResultCommentText", + "USGSPCode", + "ResultDepthHeightMeasure/MeasureValue", + "ResultDepthHeightMeasure/MeasureUnitCode", + "ResultDepthAltitudeReferencePointText", + "SubjectTaxonomicName", + "SampleTissueAnatomyName", + "ResultAnalyticalMethod/MethodIdentifier", + "ResultAnalyticalMethod/MethodIdentifierContext", + "ResultAnalyticalMethod/MethodName", + "MethodDescriptionText", + "LaboratoryName", + "AnalysisStartDate", + "ResultLaboratoryCommentText", + "ActivityTypeCode", + "ActivityMediaName", + "ActivityMediaSubdivisionName", + "ActivityEndDate", + "ActivityEndTime/Time", + "ActivityEndTime/TimeZoneCode", + "ActivityDepthHeightMeasure/MeasureValue", + "ActivityDepthHeightMeasure/MeasureUnitCode", + "ActivityDepthAltitudeReferencePointText", + "ActivityTopDepthHeightMeasure/MeasureValue", + "ActivityTopDepthHeightMeasure/MeasureUnitCode", + "ActivityBottomDepthHeightMeasure/MeasureValue", + "ActivityBottomDepthHeightMeasure/MeasureUnitCode", + "ActivityConductingOrganizationText", + "ActivityCommentText", + "SampleAquifer", + "HydrologicCondition", + "HydrologicEvent", + "SampleCollectionMethod/MethodIdentifier", + "SampleCollectionMethod/MethodIdentifierContext", + "SampleCollectionMethod/MethodName", + "SampleCollectionEquipmentName", + "PreparationStartDate", + ] assert list(actual_chars.columns) == expected -#test viz + +# test viz def test_map_counts(test_harmonize_locations, test_harmonize_temperature): - actual = viz.map_counts(test_harmonize_temperature, - test_harmonize_locations, - 'Temperature') - assert len(actual['cnt']) == 21075 - assert sum(actual['cnt']) == 346210 + actual = viz.map_counts( + test_harmonize_temperature, test_harmonize_locations, "Temperature" + ) + assert len(actual["cnt"]) == 21075 + assert sum(actual["cnt"]) == 346210 + def test_map_measure(test_harmonize_locations, test_harmonize_temperature): - actual = viz.map_measure(test_harmonize_temperature, - test_harmonize_locations, - 'Temperature') - assert len(actual['mean']) == 21075 - assert sum(actual['mean']) == 523776.35504297394 + actual = viz.map_measure( + test_harmonize_temperature, test_harmonize_locations, "Temperature" + ) + assert len(actual["mean"]) == 21075 + assert sum(actual["mean"]) == 523776.35504297394 + def test_station_summary(test_harmonize_temperature): - actual = viz.station_summary(test_harmonize_temperature, 'Temperature') - assert list(actual.columns) == ['MonitoringLocationIdentifier', 'cnt', 'mean'] - assert len(actual['cnt']) == 21075 - assert sum(actual['cnt']) == 346210 - assert len(actual['mean']) == 21075 - assert sum(actual['mean']) == 523776.35504297394 + actual = viz.station_summary(test_harmonize_temperature, "Temperature") + assert list(actual.columns) == ["MonitoringLocationIdentifier", "cnt", "mean"] + assert len(actual["cnt"]) == 21075 + assert sum(actual["cnt"]) == 346210 + assert len(actual["mean"]) == 21075 + assert sum(actual["mean"]) == 523776.35504297394 + def test_print_report(test_harmonize_temperature, capsys): - viz.print_report(test_harmonize_temperature, - 'Temperature', - 'ResultMeasure/MeasureUnitCode') + viz.print_report( + test_harmonize_temperature, "Temperature", "ResultMeasure/MeasureUnitCode" + ) captured, err = capsys.readouterr() expected = "-Usable results-\ncount 346210.000000\n" expected += "mean 25.175700\nstd 143.175647\n" @@ -1309,4 +1378,4 @@ def test_print_report(test_harmonize_temperature, capsys): expected += "Unusable results: 13295\n" expected += "Usable results with inferred units: 0\n" expected += "Results outside threshold (0.0 to 884.2295835882991): 4\n" - assert captured == expected \ No newline at end of file + assert captured == expected diff --git a/harmonize_wq/wq_data.py b/harmonize_wq/wq_data.py index 44d5187..3e71a53 100644 --- a/harmonize_wq/wq_data.py +++ b/harmonize_wq/wq_data.py @@ -46,7 +46,7 @@ def units_dimension(series_in, units, ureg=None): >>> wq_data.units_dimension(unit_series, units='mg/l') ['g/kg'] """ - #TODO: this should be a method + # TODO: this should be a method if ureg is None: ureg = pint.UnitRegistry() dim_list = [] # List for units with mismatched dimensions @@ -59,7 +59,7 @@ def units_dimension(series_in, units, ureg=None): return dim_list -class WQCharData(): +class WQCharData: """Class for specific characteristic in Water Quality Portal results. Parameters @@ -121,16 +121,17 @@ def __init__(self, df_in, char_val): df_out = df_in.copy() # self.check_df(df) df_checks(df_out) - c_mask = df_out['CharacteristicName'] == char_val + c_mask = df_out["CharacteristicName"] == char_val self.c_mask = c_mask # Deal with units: set out = in - cols = {'unit_in': 'ResultMeasure/MeasureUnitCode', - 'unit_out': 'Units', - 'measure': 'ResultMeasureValue', - 'basis': 'Speciation', } + cols = { + "unit_in": "ResultMeasure/MeasureUnitCode", + "unit_out": "Units", + "measure": "ResultMeasureValue", + "basis": "Speciation", + } self.col = SimpleNamespace(**cols) - df_out.loc[c_mask, self.col.unit_out] = df_out.loc[c_mask, - self.col.unit_in] + df_out.loc[c_mask, self.col.unit_out] = df_out.loc[c_mask, self.col.unit_in] self.df = df_out # Deal with values: set out_col = in self.out_col = domains.out_col_lookup[char_val] @@ -148,14 +149,13 @@ def _coerce_measure(self): meas_col = self.col.measure # Coerce bad measures in series to NaN - meas_s = pandas.to_numeric(df_out.loc[c_mask, meas_col], - errors='coerce') + meas_s = pandas.to_numeric(df_out.loc[c_mask, meas_col], errors="coerce") # Create a list of the bad measures in the series bad_measures = [df_out.iloc[i][meas_col] for i in meas_s[meas_s.isna()].index] for bad_meas in pandas.unique(bad_measures): # Flag each unique bad measure one measure (not row) at a time if pandas.isna(bad_meas): - flag = f'{meas_col}: missing (NaN) result' + flag = f"{meas_col}: missing (NaN) result" cond = c_mask & (df_out[meas_col].isna()) else: flag = f'{meas_col}: "{bad_meas}" result cannot be used' @@ -184,7 +184,7 @@ def _infer_units(self, flag_col=None): The default None uses WQCharData.col.unit_out instead. """ # QA flag for missing units - flag = self._unit_qa_flag('MISSING', flag_col) + flag = self._unit_qa_flag("MISSING", flag_col) # Update mask for missing units units_mask = self.c_mask & self.df[self.col.unit_out].isna() self.df = add_qa_flag(self.df, units_mask, flag) # Assign flag @@ -192,7 +192,6 @@ def _infer_units(self, flag_col=None): self.df.loc[units_mask, self.col.unit_out] = self.units # Note: .fillna(self.units) is slightly faster but hits datatype issues - def _unit_qa_flag(self, trouble, flag_col=None): """Generate a QA_flag flag string for the units column. @@ -213,9 +212,9 @@ def _unit_qa_flag(self, trouble, flag_col=None): Flag to use in QA_flag column. """ if flag_col: - return f'{flag_col}: {trouble} UNITS, {self.units} assumed' + return f"{flag_col}: {trouble} UNITS, {self.units} assumed" # Else: Used when flag_col is None, typically the column being checked - return f'{self.col.unit_out}: {trouble} UNITS, {self.units} assumed' + return f"{self.col.unit_out}: {trouble} UNITS, {self.units} assumed" def _replace_in_col(self, col, old_val, new_val, mask=None): """Replace string throughout column, filter rows to skip by mask. @@ -245,9 +244,9 @@ def _replace_in_col(self, col, old_val, new_val, mask=None): df_in = self.df # Note: Timing is just as fast as long as df isn't copied # Timing for replace vs set unkown - mask_old = mask & (df_in[col]==old_val) - #str.replace did not work for short str to long str (over-replaces) - #df.loc[mask, col] = df.loc[mask, col].str.replace(old_val, new_val) + mask_old = mask & (df_in[col] == old_val) + # str.replace did not work for short str to long str (over-replaces) + # df.loc[mask, col] = df.loc[mask, col].str.replace(old_val, new_val) df_in.loc[mask_old, col] = new_val # This should be more explicit return df_in @@ -278,25 +277,25 @@ def _dimension_handling(self, unit, quant=None, ureg=None): ureg = pint.UnitRegistry() # Conversion to moles performed a level up from here (class method) - if ureg(units).check({'[length]': -3, '[mass]': 1}): + if ureg(units).check({"[length]": -3, "[mass]": 1}): # Convert to density, e.g., '%' -> 'mg/l' - if ureg(unit).check({'[substance]': 1}): + if ureg(unit).check({"[substance]": 1}): if quant: # Moles -> mg/l; dim = ' / l' - return {unit: quant + ' / l'}, [quant + ' / l'] + return {unit: quant + " / l"}, [quant + " / l"] raise ValueError("Pint Quantity required for moles conversions") # Else assume it is dimensionless (e.g. unit = 'g/kg') - return {unit: unit + ' * H2O'}, [] + return {unit: unit + " * H2O"}, [] if ureg(units).dimensionless: # Convert to dimensionless, e.g., 'mg/l' -> '%' - if ureg(unit).check({'[substance]': 1}): + if ureg(unit).check({"[substance]": 1}): if quant: # Moles -> g/kg; dim = ' / l / H2O' - return {unit: quant + ' / l / H2O'}, [quant + ' / l / H2O'] + return {unit: quant + " / l / H2O"}, [quant + " / l / H2O"] raise ValueError("Pint Quantity required for moles conversions") # Else assume it is density (e.g. unit = 'mg/l') - return {unit: unit + ' / H2O'}, [] - warn('WARNING: Unexpected dimensionality') + return {unit: unit + " / H2O"}, [] + warn("WARNING: Unexpected dimensionality") return {}, [] def check_units(self, flag_col=None): @@ -384,7 +383,7 @@ def check_units(self, flag_col=None): df_out.loc[u_mask, self.col.unit_out] = self.units # Replace w/ default self.df = df_out - def check_basis(self, basis_col='MethodSpecificationName'): + def check_basis(self, basis_col="MethodSpecificationName"): """Determine speciation (basis) for measure. Parameters @@ -451,8 +450,7 @@ def check_basis(self, basis_col='MethodSpecificationName'): df_checks(self.df, [basis_col]) # Basis from MethodSpecificationName - if basis_col == 'MethodSpecificationName': - + if basis_col == "MethodSpecificationName": # Add basis out column (i.e., 'Speciation') if it doesn't exist if self.col.basis not in self.df.columns: self.df[self.col.basis] = nan @@ -463,9 +461,9 @@ def check_basis(self, basis_col='MethodSpecificationName'): # Basis from unit try: basis_dict = basis.unit_basis_dict[self.out_col] - self.df[c_mask] = basis.basis_from_unit(self.df[c_mask], - basis_dict, - self.col.unit_out) + self.df[c_mask] = basis.basis_from_unit( + self.df[c_mask], basis_dict, self.col.unit_out + ) except KeyError: pass # Finish by filling any NAs with char_val based default @@ -478,14 +476,15 @@ def check_basis(self, basis_col='MethodSpecificationName'): self.df.loc[c_mask, col] = self.df.loc[c_mask, col].fillna(char_val) # Drop instances of 'as ' - self.df.loc[c_mask, col] = [bas[3:] - if bas.startswith('as ') else bas - for bas in self.df.loc[c_mask, col]] + self.df.loc[c_mask, col] = [ + bas[3:] if bas.startswith("as ") else bas + for bas in self.df.loc[c_mask, col] + ] else: - self.df[c_mask] = basis.update_result_basis(self.df[c_mask], - basis_col, - self.col.unit_out) + self.df[c_mask] = basis.update_result_basis( + self.df[c_mask], basis_col, self.col.unit_out + ) def update_ureg(self): """Update class unit registry to define units based on out_col.""" @@ -572,7 +571,7 @@ def measure_mask(self, column=None): return self.c_mask & self.df[column].notna() return self.c_mask & self.df[self.out_col].notna() - def convert_units(self, default_unit=None, errors='raise'): + def convert_units(self, default_unit=None, errors="raise"): """Update out-col to convert units. Update class out-col used to convert :class:`pandas.DataFrame`. from old @@ -622,11 +621,13 @@ def convert_units(self, default_unit=None, errors='raise'): df_out = self.df m_mask = self.measure_mask() - params = {'quantity_series': df_out.loc[m_mask, self.out_col], - 'unit_series': df_out.loc[m_mask, self.col.unit_out], - 'units': self.units, - 'ureg': self.ureg, - 'errors': errors} + params = { + "quantity_series": df_out.loc[m_mask, self.out_col], + "unit_series": df_out.loc[m_mask, self.col.unit_out], + "units": self.units, + "ureg": self.ureg, + "errors": errors, + } df_out.loc[m_mask, self.out_col] = convert_unit_series(**params) self.df = df_out @@ -681,13 +682,13 @@ def apply_conversion(self, convert_fun, unit, u_mask=None): unit = self.ureg.Quantity(unit) # Pint quantity object from unit old_vals = df_out.loc[u_mask, self.out_col] try: - new_quants = [convert_fun(x*unit) for x in old_vals] + new_quants = [convert_fun(x * unit) for x in old_vals] except ValueError: - #print(old_vals.iloc[0]*unit) + # print(old_vals.iloc[0]*unit) # string to avoid altered ureg issues - new_quants = [convert_fun(str(x*unit)) for x in old_vals] + new_quants = [convert_fun(str(x * unit)) for x in old_vals] # 1run=6505.62ms (may be slower) vs apply (5888.43ms) - #new_vals = old_vals.apply(lambda x: convert_fun(x*unit).magnitude) + # new_vals = old_vals.apply(lambda x: convert_fun(x*unit).magnitude) new_vals = [quant.magnitude for quant in new_quants] df_out.loc[u_mask, self.out_col] = new_vals df_out.loc[u_mask, self.col.unit_out] = str(new_quants[0].units) @@ -733,10 +734,9 @@ def dimensions_list(self, m_mask=None): """ if m_mask is None: m_mask = self.measure_mask() - return units_dimension(self.df.loc[m_mask, - self.col.unit_out], - self.units, - self.ureg) + return units_dimension( + self.df.loc[m_mask, self.col.unit_out], self.units, self.ureg + ) def replace_unit_str(self, old, new, mask=None): """Replace ALL instances of old with in WQCharData.col.unit_out column. @@ -841,8 +841,13 @@ def replace_unit_by_dict(self, val_dict, mask=None): for item in val_dict.items(): self._replace_in_col(col, item[0], item[1], mask) - def fraction(self, frac_dict=None, catch_all=None, suffix=None, - fract_col='ResultSampleFractionText'): + def fraction( + self, + frac_dict=None, + catch_all=None, + suffix=None, + fract_col="ResultSampleFractionText", + ): """Create columns for sample fractions using frac_dict to set names. Parameters @@ -921,7 +926,8 @@ def fraction(self, frac_dict=None, catch_all=None, suffix=None, 0 1.0 milligram / liter NaN 1 NaN 10.000000000000002 milligram / liter - Alternatively, the sample fraction lists from tada can be used, in this case they are added: + Alternatively, the sample fraction lists from tada can be used, in this case + they are added: >>> wq.fraction('TADA') >>> wq.df.columns @@ -942,25 +948,25 @@ def fraction(self, frac_dict=None, catch_all=None, suffix=None, fracs = list(set(self.df[c_mask][fract_col])) # List of fracs in data - if ' ' in fracs: - #TODO: new col instead of overwrite + if " " in fracs: + # TODO: new col instead of overwrite # Replace bad sample fraction w/ nan - self.df = self._replace_in_col(fract_col, ' ', nan, c_mask) - fracs.remove(' ') + self.df = self._replace_in_col(fract_col, " ", nan, c_mask) + fracs.remove(" ") df_out = self.df # Set var for easier referencing - char = list(set(df_out[self.c_mask]['CharacteristicName']))[0] + char = list(set(df_out[self.c_mask]["CharacteristicName"]))[0] # Deal with lack of args if suffix is None: suffix = self.out_col if catch_all is None: - catch_all = f'Other_{suffix}' + catch_all = f"Other_{suffix}" # Set up dict for what sample fraction to what col if frac_dict is None: frac_dict = {} - elif frac_dict=='TADA': + elif frac_dict == "TADA": # Get dictionary for updates from TADA (note keys are all caps) tada = domains.harmonize_TADA_dict()[char.upper()] frac_dict = {} @@ -969,40 +975,40 @@ def fraction(self, frac_dict=None, catch_all=None, suffix=None, frac_dict[key] = list(tada[key]) # Add their values frac_dict[key] += [x for v in tada[key].values() for x in v] - #else: dict was already provided + # else: dict was already provided if catch_all not in frac_dict.keys(): - frac_dict[catch_all] = ['', nan] + frac_dict[catch_all] = ["", nan] # Make sure catch_all exists if not isinstance(frac_dict[catch_all], list): frac_dict[catch_all] = [frac_dict[catch_all]] # First cut to make the keys work as column names for key in frac_dict: - frac_dict[key.replace(',', '_')] = frac_dict.pop(key) + frac_dict[key.replace(",", "_")] = frac_dict.pop(key) for key in frac_dict: if key == self.out_col: - #TODO: prevent it from over-writing any col + # TODO: prevent it from over-writing any col # If it is the same col name as the out_col add '_1' - frac_dict[key+'_1'] = frac_dict.pop(key) + frac_dict[key + "_1"] = frac_dict.pop(key) # Compare sample fractions against expected init_fracs = [x for v in frac_dict.values() for x in v] not_init = [frac for frac in fracs if frac not in init_fracs] - if len(not_init)>0: + if len(not_init) > 0: # TODO: when to add QA_flag? - smp = f'{char} sample fractions not in frac_dict' + smp = f"{char} sample fractions not in frac_dict" solution = f'expected domains, mapped to "{catch_all}"' - print(f'{len(not_init)} {smp}') + print(f"{len(not_init)} {smp}") # Compare against domains - all_fracs = list(domains.get_domain_dict('ResultSampleFraction')) + all_fracs = list(domains.get_domain_dict("ResultSampleFraction")) add_fracs = [frac for frac in not_init if frac in all_fracs] # Add new fractions to frac_dict mapped to catch_all - if len(add_fracs)>0: - print(f'{len(add_fracs)} {smp} found in {solution}') + if len(add_fracs) > 0: + print(f"{len(add_fracs)} {smp} found in {solution}") frac_dict[catch_all] += add_fracs bad_fracs = [frac for frac in not_init if frac not in all_fracs] - if len(bad_fracs)>0: - warn(f'{len(bad_fracs)} {smp} or {solution}') + if len(bad_fracs) > 0: + warn(f"{len(bad_fracs)} {smp} or {solution}") frac_dict[catch_all] += bad_fracs # Loop through dictionary making updates based on sample fraction @@ -1061,29 +1067,28 @@ def dimension_fixes(self): mol_list = [] # Empty list to append to # If converting to/from moles has extra steps - if self.ureg(self.units).check({'[substance]': 1}): + if self.ureg(self.units).check({"[substance]": 1}): # Convert everything to MOLES!!! # Must consider the different speciation for each - #TODO: This could be problematic given umol/l - warn('This feature is not available yet') + # TODO: This could be problematic given umol/l + warn("This feature is not available yet") return {}, [] for unit in self.dimensions_list(): - if self.ureg(unit).check({'[substance]': 1}): - mol_params = {'ureg': self.ureg, - 'Q_': self.ureg.Quantity(1, unit),} + if self.ureg(unit).check({"[substance]": 1}): + mol_params = { + "ureg": self.ureg, + "Q_": self.ureg.Quantity(1, unit), + } # Moles need to be further split by basis basis_lst = list(set(self.df.loc[self.c_mask, self.col.basis])) for speciation in basis_lst: - mol_params['basis'] = speciation + mol_params["basis"] = speciation quant = str(moles_to_mass(**mol_params)) - dim_tup = self._dimension_handling(unit, - quant, - self.ureg) + dim_tup = self._dimension_handling(unit, quant, self.ureg) dimension_dict.update(dim_tup[0]) - mol_list+= dim_tup[1] + mol_list += dim_tup[1] else: - dim_tup = self._dimension_handling(unit, - ureg = self.ureg) + dim_tup = self._dimension_handling(unit, ureg=self.ureg) dimension_dict.update(dim_tup[0]) return dimension_dict, mol_list