From 62292bdb7588ad64186053d5e7f779dc7ffe9a3c Mon Sep 17 00:00:00 2001 From: Bousquin Date: Mon, 5 Feb 2024 09:16:45 -0600 Subject: [PATCH] pylint --- harmonize_wq/basis.py | 11 +++++----- harmonize_wq/clean.py | 4 ++-- harmonize_wq/convert.py | 6 +++--- harmonize_wq/domains.py | 7 +++---- harmonize_wq/location.py | 8 +++----- harmonize_wq/visualize.py | 13 ++++++------ harmonize_wq/wq_data.py | 43 +++++++++++++++++---------------------- harmonize_wq/wrangle.py | 23 +++++++++++---------- 8 files changed, 53 insertions(+), 62 deletions(-) diff --git a/harmonize_wq/basis.py b/harmonize_wq/basis.py index 47d602f..1b1b85c 100644 --- a/harmonize_wq/basis.py +++ b/harmonize_wq/basis.py @@ -160,13 +160,12 @@ def basis_from_unit(df_in, basis_dict, unit_col='Units', basis_col='Speciation') mask = df[unit_col] == old_unit # Update mask if basis_col in df.columns: # Add flags anywhere the values are updated - flag1 = '{}: updated from '.format(basis_col) + flag1 = f'{basis_col}: updated from ' # List of unique basis values basis_list = list(set(df.loc[mask, basis_col].dropna())) # Loop over existing values in basis field for old_basis in basis_list: - flag = '{}{} to {} (units)'.format(flag1, old_basis, - base) + flag = f'{flag1}{old_basis} to {base} (units)' if old_basis != base: qa_mask = mask & (df[basis_col] == old_basis) warn(f'Mismatched {flag}', UserWarning) @@ -178,7 +177,7 @@ def basis_from_unit(df_in, basis_dict, unit_col='Units', basis_col='Speciation') return df -def basis_from_methodSpec(df_in): +def basis_from_method_spec(df_in): """Copy speciation from MethodSpecificationName to new 'Speciation' column. Parameters @@ -207,7 +206,7 @@ def basis_from_methodSpec(df_in): 1 Phosphorus NaN NWIS >>> from harmonize_wq import basis - >>> basis.basis_from_methodSpec(df) + >>> basis.basis_from_method_spec(df) CharacteristicName MethodSpecificationName ProviderName Speciation 0 Phosphorus as P NWIS as P 1 Phosphorus NaN NWIS NaN @@ -302,7 +301,7 @@ def update_result_basis(df_in, basis_col, unit_col): elif basis_col == 'ResultTimeBasisText': df_out = df_in.copy() else: - raise ValueError('{} not recognized basis column'.format(basis_col)) + raise ValueError(f'{basis_col} not recognized basis column') return df_out diff --git a/harmonize_wq/clean.py b/harmonize_wq/clean.py index cf3be3e..8fa549c 100644 --- a/harmonize_wq/clean.py +++ b/harmonize_wq/clean.py @@ -146,7 +146,7 @@ def check_precision(df_in, col, limit=3): df_out = df_in.copy() # Create T/F mask based on len of everything after the decimal c_mask = [len(str(x).split('.')[1]) < limit for x in df_out[col]] - flag = '{}: Imprecise: lessthan{}decimaldigits'.format(col, limit) + flag = f'{col}: Imprecise: lessthan{limit}decimaldigits' df_out = harmonize.add_qa_flag(df_out, c_mask, flag) # Assign flags return df_out @@ -221,7 +221,7 @@ def wet_dry_checks(df_in, mask=None): 'ResultSampleFractionText', 'ResultWeightBasisText']) # QA - Sample Media, fix assigned 'Water' that are actually 'Sediment' - qa_flag = '{}: Water changed to Sediment'.format(media_col) + qa_flag = f'{media_col}: Water changed to Sediment' # Create mask for bad data media_mask = ((df_out['ResultSampleFractionText'] == 'Bed Sediment') & (df_out['ResultWeightBasisText'] == 'Dry') & diff --git a/harmonize_wq/convert.py b/harmonize_wq/convert.py index 413a147..784e9c9 100644 --- a/harmonize_wq/convert.py +++ b/harmonize_wq/convert.py @@ -3,8 +3,8 @@ Contains several unit conversion functions not in :mod:`pint`. """ -import pint import math +import pint from harmonize_wq import domains @@ -560,7 +560,7 @@ def DO_concentration(val, >>> from harmonize_wq import convert >>> convert.DO_concentration(input_DO) 6995.603308586222 - """ + """ p, t = pressure, temperature if p == 1 & (t == 25): cP = 8.262332418 @@ -582,7 +582,7 @@ def _DO_concentration_eq(p, t): cStar = math.exp(7.7117 - 1.31403 * math.log(t + 45.93)) numerator = (1-Pwv/p)*(1-(standard*p)) denominator = (1-Pwv)*(1-standard) - + return cStar*p*(numerator/denominator) diff --git a/harmonize_wq/domains.py b/harmonize_wq/domains.py index 850f4d9..bf75b90 100644 --- a/harmonize_wq/domains.py +++ b/harmonize_wq/domains.py @@ -6,7 +6,6 @@ list. """ import requests -import pint import pandas @@ -109,11 +108,11 @@ def get_domain_dict(table, cols=None): cols = ['Name', 'Description'] if not table.endswith('_CSV'): table += '_CSV' - url = '{}{}.zip'.format(BASE_URL, table) + url = f'{BASE_URL}{table}.zip' # Very limited url handling if requests.get(url).status_code != 200: status_code = requests.get(url).status_code - print("{} web service response {}".format(url, status_code)) + print(f"{url} web service response {status_code}") df = pandas.read_csv(url, usecols=cols) return dict(df.values) @@ -155,7 +154,7 @@ def harmonize_TADA_dict(): # Replace old smaple fraction dict with new using keys full_dict[k_char][k_target] = new_target - return full_dict + return full_dict def re_case(word, domain_list): diff --git a/harmonize_wq/location.py b/harmonize_wq/location.py index e58fc72..d75b6a7 100644 --- a/harmonize_wq/location.py +++ b/harmonize_wq/location.py @@ -4,7 +4,7 @@ from shapely.geometry import shape import geopandas import pandas -import dataretrieval.wqp as wqp +from dataretrieval import wqp from harmonize_wq import harmonize from harmonize_wq import domains from harmonize_wq import wrangle @@ -71,13 +71,11 @@ def infer_CRS(df_in, df_out = df_in.copy() if bad_crs_val: # QA flag for bad CRS based on bad_crs_val - flag = '{}: Bad datum {}, EPSG:{} assumed'.format(crs_col, - bad_crs_val, - out_EPSG) + flag = f'{crs_col}: Bad datum {bad_crs_val}, EPSG:{out_EPSG} assumed' c_mask = df_out[crs_col] == bad_crs_val # Mask for bad CRS value else: # QA flag for missing CRS - flag = '{}: MISSING datum, EPSG:{} assumed'.format(crs_col, out_EPSG) + flag = f'{crs_col}: MISSING datum, EPSG:{out_EPSG} assumed' c_mask = df_out[crs_col].isna() # Mask for missing units df_out = harmonize.add_qa_flag(df_out, c_mask, flag) # Assign flag df_out.loc[c_mask, out_col] = out_EPSG # Update with infered unit diff --git a/harmonize_wq/visualize.py b/harmonize_wq/visualize.py index 34cba37..fdbbf7d 100644 --- a/harmonize_wq/visualize.py +++ b/harmonize_wq/visualize.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- """Functions to help visualize data.""" +from math import sqrt import pandas import geopandas -from math import sqrt from harmonize_wq import wrangle @@ -40,11 +40,11 @@ def print_report(results_in, out_col, unit_col_in, threshold=None): # Series with just magnitude results_s = pandas.Series([x.magnitude for x in results]) # Number of usable results - print('-Usable results-\n{}'.format(results_s.describe())) + print(f'-Usable results-\n{results_s.describe()}') # Number measures unused - print('Unusable results: {}'.format(len(results_in)-len(results))) + print(f'Unusable results: {len(results_in)-len(results)}') # Number of infered result units - print('Usable results with inferred units: {}'.format(len(inferred))) + print(f'Usable results with inferred units: {len(inferred)}') # Results outside thresholds if not threshold: # TODO: Default mean +/-1 standard deviation works here but generally 6 @@ -53,9 +53,8 @@ def print_report(results_in, out_col, unit_col_in, threshold=None): inside = results_s[(results_s <= threshold['max']) & (results_s >= threshold['min'])] diff = len(results) - len(inside) - print('Results outside threshold ({} to {}): {}'.format(threshold['min'], - threshold['max'], - diff)) + threshold_range = f"{threshold['min']} to {threshold['max']}" + print(f'Results outside threshold ({threshold_range}): {diff}') # Graphic representation of stats inside.hist(bins=int(sqrt(inside.count()))) diff --git a/harmonize_wq/wq_data.py b/harmonize_wq/wq_data.py index 7f211a3..ffba069 100644 --- a/harmonize_wq/wq_data.py +++ b/harmonize_wq/wq_data.py @@ -8,7 +8,7 @@ from harmonize_wq import domains from harmonize_wq import basis from harmonize_wq import convert -from harmonize_wq import harmonize +from harmonize_wq import harmonize class WQCharData(): """Class for specific characteristic in Water Quality Portal results. @@ -106,11 +106,10 @@ def _coerce_measure(self): for bad_meas in pandas.unique(bad_measures): # Flag each unique bad measure one measure (not row) at a time if pandas.isna(bad_meas): - flag = '{}: missing (NaN) result'.format(meas_col) + flag = f'{meas_col}: missing (NaN) result' cond = c_mask & (df_out[meas_col].isna()) else: - flag = '{}: "{}" result cannot be used'.format(meas_col, - bad_meas) + flag = f'{meas_col}: "{bad_meas}" result cannot be used' cond = c_mask & (df_out[meas_col] == bad_meas) # Flag bad measures df_out = harmonize.add_qa_flag(df_out, cond, flag) @@ -149,7 +148,7 @@ def _unit_qa_flag(self, trouble, flag_col=None): """Generate a QA_flag flag string for the units column. If unit_col is a copy flag_col can specify the original column name for - the flag. + the flag. The default units, self.units replaces the problem unit. Parameters ---------- @@ -164,13 +163,11 @@ def _unit_qa_flag(self, trouble, flag_col=None): string Flag to use in QA_flag column. """ - unit = self.units # The default unit that replaced the problem unit if flag_col: - return '{}: {} UNITS, {} assumed'.format(flag_col, trouble, unit) + return f'{flag_col}: {trouble} UNITS, {self.units} assumed' # Else: Used when flag_col is None, typically the column being checked - unit_col = self.col.unit_out - return '{}: {} UNITS, {} assumed'.format(unit_col, trouble, unit) - + return f'{self.col.unit_out}: {trouble} UNITS, {self.units} assumed' + def _replace_in_col(self, col, old_val, new_val, mask=None): """Replace string throughout column, filter rows to skip by mask. @@ -203,7 +200,7 @@ def _replace_in_col(self, col, old_val, new_val, mask=None): #str.replace did not work for short str to long str (over-replaces) #df.loc[mask, col] = df.loc[mask, col].str.replace(old_val, new_val) df_in.loc[mask_old, col] = new_val # This should be more explicit - + return df_in def _dimension_handling(self, unit, quant=None, ureg=None): @@ -230,7 +227,7 @@ def _dimension_handling(self, unit, quant=None, ureg=None): units = self.units if ureg is None: ureg = pint.UnitRegistry() - + # Conversion to moles performed a level up from here (class method) if ureg(units).check({'[length]': -3, '[mass]': 1}): # Convert to density, e.g., '%' -> 'mg/l' @@ -328,7 +325,7 @@ def check_units(self, flag_col=None): except pint.UndefinedUnitError: # WARNING: Does not catch '%' or bad units in ureg (eg deg F) # If bad, flag and replace - problem = "'{}' UNDEFINED UNIT for {}".format(unit, self.out_col) + problem = f"'{unit}' UNDEFINED UNIT for {self.out_col}" warn("WARNING: " + problem) flag = self._unit_qa_flag(problem, flag_col) # New mask for bad units @@ -406,13 +403,13 @@ def check_basis(self, basis_col='MethodSpecificationName'): # Basis from MethodSpecificationName if basis_col == 'MethodSpecificationName': - + # Add basis out column (i.e., 'Speciation') if it doesn't exist if self.col.basis not in self.df.columns: self.df[self.col.basis] = nan - + # Mask to characteristic - self.df[c_mask] = basis.basis_from_methodSpec(self.df[c_mask]) + self.df[c_mask] = basis.basis_from_method_spec(self.df[c_mask]) # Basis from unit try: @@ -824,7 +821,7 @@ def fraction(self, frac_dict=None, suffix=None, if suffix is None: suffix = self.out_col - catch_all = 'Other_{}'.format(suffix) + catch_all = f'Other_{suffix}' if frac_dict is None: frac_dict = {catch_all: ''} else: @@ -859,7 +856,7 @@ def fraction(self, frac_dict=None, suffix=None, harmonize_fract = harmonize_dict[char.upper()] # Loop through dictionary making updates to sample fraction for fract_set in harmonize_fract.values(): - for row in fract_set.items(): + for row in fract_set.items(): fract_mask = df_out[c_mask][fract_col].isin(row[1]) # Mask by values df_out[c_mask][fract_mask][fract_col] = row[0] # Update to key # Compare df_out againt self.df to add QA flag if changed @@ -869,20 +866,18 @@ def fraction(self, frac_dict=None, suffix=None, # TODO: LEFT OFF ABOVE IS STILL EMPTY self.df = df_out - + # Make column for any unexpected Sample Fraction values, loudly for s_f in set(df_out[c_mask][fract_col].dropna()): if s_f not in samp_fract_set: - char = '{}_{}'.format(s_f.replace(' ', '_'), suffix) + char = f"{s_f.replace(' ', '_')}_{suffix}" frac_dict[char] = s_f - prob = '"{}" column for {}, may be error'.format(char, s_f) - warn('Warning: ' + prob) + warn(f'Warning: "{char}" column for {s_f}, may be error') # TODO: add QA_flag # Test we didn't skip any SampleFraction samp_fract_set = sorted({x for v in frac_dict.values() for x in v}) for s_f in set(df_out[c_mask][fract_col].dropna()): - assert s_f in samp_fract_set, '{} check in {}'.format(s_f, - fract_col) + assert s_f in samp_fract_set, f'{s_f} check in {fract_col}' # Create out columns for each sample fraction for frac in frac_dict.items(): col = frac[0] # New column name diff --git a/harmonize_wq/wrangle.py b/harmonize_wq/wrangle.py index 1d8d445..bead6bf 100644 --- a/harmonize_wq/wrangle.py +++ b/harmonize_wq/wrangle.py @@ -5,7 +5,7 @@ from harmonize_wq import domains from harmonize_wq import harmonize from harmonize_wq.clean import datetime, harmonize_depth -import dataretrieval.wqp as wqp +from dataretrieval import wqp def split_table(df_in): @@ -169,13 +169,14 @@ def collapse_results(df_in, cols=None): df = df.drop_duplicates() # TODO: use date instead of datetime if na? (date_idx) - idx_cols = ['MonitoringLocationIdentifier', + if not cols: + cols = ['MonitoringLocationIdentifier', 'Activity_datetime', 'ActivityIdentifier', 'OrganizationIdentifier'] - df_indexed = df.groupby(by=idx_cols, dropna=False).first() + df_indexed = df.groupby(by=cols, dropna=False).first() # TODO: warn about multi-lines with values (only returns first) - problems = df.groupby(by=idx_cols, dropna=False).first(min_count=2) + problems = df.groupby(by=cols, dropna=False).first(min_count=2) problems = problems.dropna(axis=1, how='all') return df_indexed @@ -561,14 +562,14 @@ def merge_tables(df1, df2, df2_cols='all', merge_cols='activity'): # Check columns in both tables shared = [x for x in list(df1.columns) if x in col2_list] for col in merge_cols: - assert col in shared, '{} not in both DataFrames'.format(col) + assert col in shared, f'{col} not in both DataFrames' # Columns to add from df2 if df2_cols == 'all': # All columns not in df1 df2_cols = [x for x in col2_list if x not in list(df1.columns)] else: for col in df2_cols: - assert col in col2_list, '{} not in DataFrame'.format(col) + assert col in col2_list, f'{col} not in DataFrame' # Merge activity columns to narrow results df2 = df2[merge_cols + df2_cols] # Limit df2 to columns we want @@ -640,15 +641,15 @@ def get_bounding_box(shp, idx=None): shp = as_gdf(shp) if idx is None: - bBox = shp.total_bounds + bbox = shp.total_bounds else: xmin = shp.bounds['minx'][idx] xmax = shp.bounds['maxx'][idx] ymin = shp.bounds['miny'][idx] ymax = shp.bounds['maxy'][idx] - bBox = [xmin, ymin, xmax, ymax] + bbox = [xmin, ymin, xmax, ymax] - return ','.join(map(str, bBox)) + return ','.join(map(str, bbox)) def clip_stations(stations, aoi): @@ -756,10 +757,10 @@ def to_simple_shape(gdf, out_shp): cols = gdf.columns # List of current column names names_dict = domains.stations_rename() # Dict of column names to update # Rename non-results columns that are too long for shp field names - renaming_list = [col for col in cols if col in names_dict.keys()] + renaming_list = [col for col in cols if col in names_dict] renaming_dict = {old_col: names_dict[old_col] for old_col in renaming_list} # Identify possible results columns before renaming columns - possible_results = [col for col in cols if col not in names_dict.keys()] + possible_results = [col for col in cols if col not in names_dict] gdf = gdf.rename(columns=renaming_dict) # Rename columns # TODO: old_field should be assigned to alias if output driver allows # field_map1...