Skip to content

Commit

Permalink
pylint
Browse files Browse the repository at this point in the history
  • Loading branch information
jbousquin committed Feb 5, 2024
1 parent e66c573 commit 62292bd
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 62 deletions.
11 changes: 5 additions & 6 deletions harmonize_wq/basis.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,13 +160,12 @@ def basis_from_unit(df_in, basis_dict, unit_col='Units', basis_col='Speciation')
mask = df[unit_col] == old_unit # Update mask
if basis_col in df.columns:
# Add flags anywhere the values are updated
flag1 = '{}: updated from '.format(basis_col)
flag1 = f'{basis_col}: updated from '
# List of unique basis values
basis_list = list(set(df.loc[mask, basis_col].dropna()))
# Loop over existing values in basis field
for old_basis in basis_list:
flag = '{}{} to {} (units)'.format(flag1, old_basis,
base)
flag = f'{flag1}{old_basis} to {base} (units)'
if old_basis != base:
qa_mask = mask & (df[basis_col] == old_basis)
warn(f'Mismatched {flag}', UserWarning)
Expand All @@ -178,7 +177,7 @@ def basis_from_unit(df_in, basis_dict, unit_col='Units', basis_col='Speciation')
return df


def basis_from_methodSpec(df_in):
def basis_from_method_spec(df_in):
"""Copy speciation from MethodSpecificationName to new 'Speciation' column.
Parameters
Expand Down Expand Up @@ -207,7 +206,7 @@ def basis_from_methodSpec(df_in):
1 Phosphorus NaN NWIS
>>> from harmonize_wq import basis
>>> basis.basis_from_methodSpec(df)
>>> basis.basis_from_method_spec(df)
CharacteristicName MethodSpecificationName ProviderName Speciation
0 Phosphorus as P NWIS as P
1 Phosphorus NaN NWIS NaN
Expand Down Expand Up @@ -302,7 +301,7 @@ def update_result_basis(df_in, basis_col, unit_col):
elif basis_col == 'ResultTimeBasisText':
df_out = df_in.copy()
else:
raise ValueError('{} not recognized basis column'.format(basis_col))
raise ValueError(f'{basis_col} not recognized basis column')

return df_out

Expand Down
4 changes: 2 additions & 2 deletions harmonize_wq/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def check_precision(df_in, col, limit=3):
df_out = df_in.copy()
# Create T/F mask based on len of everything after the decimal
c_mask = [len(str(x).split('.')[1]) < limit for x in df_out[col]]
flag = '{}: Imprecise: lessthan{}decimaldigits'.format(col, limit)
flag = f'{col}: Imprecise: lessthan{limit}decimaldigits'
df_out = harmonize.add_qa_flag(df_out, c_mask, flag) # Assign flags
return df_out

Expand Down Expand Up @@ -221,7 +221,7 @@ def wet_dry_checks(df_in, mask=None):
'ResultSampleFractionText',
'ResultWeightBasisText'])
# QA - Sample Media, fix assigned 'Water' that are actually 'Sediment'
qa_flag = '{}: Water changed to Sediment'.format(media_col)
qa_flag = f'{media_col}: Water changed to Sediment'
# Create mask for bad data
media_mask = ((df_out['ResultSampleFractionText'] == 'Bed Sediment') &
(df_out['ResultWeightBasisText'] == 'Dry') &
Expand Down
6 changes: 3 additions & 3 deletions harmonize_wq/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
Contains several unit conversion functions not in :mod:`pint`.
"""
import pint
import math
import pint
from harmonize_wq import domains


Expand Down Expand Up @@ -560,7 +560,7 @@ def DO_concentration(val,
>>> from harmonize_wq import convert
>>> convert.DO_concentration(input_DO)
6995.603308586222
"""
"""
p, t = pressure, temperature
if p == 1 & (t == 25):
cP = 8.262332418
Expand All @@ -582,7 +582,7 @@ def _DO_concentration_eq(p, t):
cStar = math.exp(7.7117 - 1.31403 * math.log(t + 45.93))
numerator = (1-Pwv/p)*(1-(standard*p))
denominator = (1-Pwv)*(1-standard)

return cStar*p*(numerator/denominator)


Expand Down
7 changes: 3 additions & 4 deletions harmonize_wq/domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
list.
"""
import requests
import pint
import pandas


Expand Down Expand Up @@ -109,11 +108,11 @@ def get_domain_dict(table, cols=None):
cols = ['Name', 'Description']
if not table.endswith('_CSV'):
table += '_CSV'
url = '{}{}.zip'.format(BASE_URL, table)
url = f'{BASE_URL}{table}.zip'
# Very limited url handling
if requests.get(url).status_code != 200:
status_code = requests.get(url).status_code
print("{} web service response {}".format(url, status_code))
print(f"{url} web service response {status_code}")
df = pandas.read_csv(url, usecols=cols)
return dict(df.values)

Expand Down Expand Up @@ -155,7 +154,7 @@ def harmonize_TADA_dict():
# Replace old smaple fraction dict with new using keys
full_dict[k_char][k_target] = new_target

return full_dict
return full_dict


def re_case(word, domain_list):
Expand Down
8 changes: 3 additions & 5 deletions harmonize_wq/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from shapely.geometry import shape
import geopandas
import pandas
import dataretrieval.wqp as wqp
from dataretrieval import wqp
from harmonize_wq import harmonize
from harmonize_wq import domains
from harmonize_wq import wrangle
Expand Down Expand Up @@ -71,13 +71,11 @@ def infer_CRS(df_in,
df_out = df_in.copy()
if bad_crs_val:
# QA flag for bad CRS based on bad_crs_val
flag = '{}: Bad datum {}, EPSG:{} assumed'.format(crs_col,
bad_crs_val,
out_EPSG)
flag = f'{crs_col}: Bad datum {bad_crs_val}, EPSG:{out_EPSG} assumed'
c_mask = df_out[crs_col] == bad_crs_val # Mask for bad CRS value
else:
# QA flag for missing CRS
flag = '{}: MISSING datum, EPSG:{} assumed'.format(crs_col, out_EPSG)
flag = f'{crs_col}: MISSING datum, EPSG:{out_EPSG} assumed'
c_mask = df_out[crs_col].isna() # Mask for missing units
df_out = harmonize.add_qa_flag(df_out, c_mask, flag) # Assign flag
df_out.loc[c_mask, out_col] = out_EPSG # Update with infered unit
Expand Down
13 changes: 6 additions & 7 deletions harmonize_wq/visualize.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
"""Functions to help visualize data."""
from math import sqrt
import pandas
import geopandas
from math import sqrt
from harmonize_wq import wrangle


Expand Down Expand Up @@ -40,11 +40,11 @@ def print_report(results_in, out_col, unit_col_in, threshold=None):
# Series with just magnitude
results_s = pandas.Series([x.magnitude for x in results])
# Number of usable results
print('-Usable results-\n{}'.format(results_s.describe()))
print(f'-Usable results-\n{results_s.describe()}')
# Number measures unused
print('Unusable results: {}'.format(len(results_in)-len(results)))
print(f'Unusable results: {len(results_in)-len(results)}')
# Number of infered result units
print('Usable results with inferred units: {}'.format(len(inferred)))
print(f'Usable results with inferred units: {len(inferred)}')
# Results outside thresholds
if not threshold:
# TODO: Default mean +/-1 standard deviation works here but generally 6
Expand All @@ -53,9 +53,8 @@ def print_report(results_in, out_col, unit_col_in, threshold=None):
inside = results_s[(results_s <= threshold['max']) &
(results_s >= threshold['min'])]
diff = len(results) - len(inside)
print('Results outside threshold ({} to {}): {}'.format(threshold['min'],
threshold['max'],
diff))
threshold_range = f"{threshold['min']} to {threshold['max']}"
print(f'Results outside threshold ({threshold_range}): {diff}')

# Graphic representation of stats
inside.hist(bins=int(sqrt(inside.count())))
Expand Down
43 changes: 19 additions & 24 deletions harmonize_wq/wq_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from harmonize_wq import domains
from harmonize_wq import basis
from harmonize_wq import convert
from harmonize_wq import harmonize
from harmonize_wq import harmonize

class WQCharData():
"""Class for specific characteristic in Water Quality Portal results.
Expand Down Expand Up @@ -106,11 +106,10 @@ def _coerce_measure(self):
for bad_meas in pandas.unique(bad_measures):
# Flag each unique bad measure one measure (not row) at a time
if pandas.isna(bad_meas):
flag = '{}: missing (NaN) result'.format(meas_col)
flag = f'{meas_col}: missing (NaN) result'
cond = c_mask & (df_out[meas_col].isna())
else:
flag = '{}: "{}" result cannot be used'.format(meas_col,
bad_meas)
flag = f'{meas_col}: "{bad_meas}" result cannot be used'
cond = c_mask & (df_out[meas_col] == bad_meas)
# Flag bad measures
df_out = harmonize.add_qa_flag(df_out, cond, flag)
Expand Down Expand Up @@ -149,7 +148,7 @@ def _unit_qa_flag(self, trouble, flag_col=None):
"""Generate a QA_flag flag string for the units column.
If unit_col is a copy flag_col can specify the original column name for
the flag.
the flag. The default units, self.units replaces the problem unit.
Parameters
----------
Expand All @@ -164,13 +163,11 @@ def _unit_qa_flag(self, trouble, flag_col=None):
string
Flag to use in QA_flag column.
"""
unit = self.units # The default unit that replaced the problem unit
if flag_col:
return '{}: {} UNITS, {} assumed'.format(flag_col, trouble, unit)
return f'{flag_col}: {trouble} UNITS, {self.units} assumed'
# Else: Used when flag_col is None, typically the column being checked
unit_col = self.col.unit_out
return '{}: {} UNITS, {} assumed'.format(unit_col, trouble, unit)

return f'{self.col.unit_out}: {trouble} UNITS, {self.units} assumed'

def _replace_in_col(self, col, old_val, new_val, mask=None):
"""Replace string throughout column, filter rows to skip by mask.
Expand Down Expand Up @@ -203,7 +200,7 @@ def _replace_in_col(self, col, old_val, new_val, mask=None):
#str.replace did not work for short str to long str (over-replaces)
#df.loc[mask, col] = df.loc[mask, col].str.replace(old_val, new_val)
df_in.loc[mask_old, col] = new_val # This should be more explicit

return df_in

def _dimension_handling(self, unit, quant=None, ureg=None):
Expand All @@ -230,7 +227,7 @@ def _dimension_handling(self, unit, quant=None, ureg=None):
units = self.units
if ureg is None:
ureg = pint.UnitRegistry()

# Conversion to moles performed a level up from here (class method)
if ureg(units).check({'[length]': -3, '[mass]': 1}):
# Convert to density, e.g., '%' -> 'mg/l'
Expand Down Expand Up @@ -328,7 +325,7 @@ def check_units(self, flag_col=None):
except pint.UndefinedUnitError:
# WARNING: Does not catch '%' or bad units in ureg (eg deg F)
# If bad, flag and replace
problem = "'{}' UNDEFINED UNIT for {}".format(unit, self.out_col)
problem = f"'{unit}' UNDEFINED UNIT for {self.out_col}"
warn("WARNING: " + problem)
flag = self._unit_qa_flag(problem, flag_col)
# New mask for bad units
Expand Down Expand Up @@ -406,13 +403,13 @@ def check_basis(self, basis_col='MethodSpecificationName'):

# Basis from MethodSpecificationName
if basis_col == 'MethodSpecificationName':

# Add basis out column (i.e., 'Speciation') if it doesn't exist
if self.col.basis not in self.df.columns:
self.df[self.col.basis] = nan

# Mask to characteristic
self.df[c_mask] = basis.basis_from_methodSpec(self.df[c_mask])
self.df[c_mask] = basis.basis_from_method_spec(self.df[c_mask])

# Basis from unit
try:
Expand Down Expand Up @@ -824,7 +821,7 @@ def fraction(self, frac_dict=None, suffix=None,
if suffix is None:
suffix = self.out_col

catch_all = 'Other_{}'.format(suffix)
catch_all = f'Other_{suffix}'
if frac_dict is None:
frac_dict = {catch_all: ''}
else:
Expand Down Expand Up @@ -859,7 +856,7 @@ def fraction(self, frac_dict=None, suffix=None,
harmonize_fract = harmonize_dict[char.upper()]
# Loop through dictionary making updates to sample fraction
for fract_set in harmonize_fract.values():
for row in fract_set.items():
for row in fract_set.items():
fract_mask = df_out[c_mask][fract_col].isin(row[1]) # Mask by values
df_out[c_mask][fract_mask][fract_col] = row[0] # Update to key
# Compare df_out againt self.df to add QA flag if changed
Expand All @@ -869,20 +866,18 @@ def fraction(self, frac_dict=None, suffix=None,
# TODO: LEFT OFF ABOVE IS STILL EMPTY

self.df = df_out

# Make column for any unexpected Sample Fraction values, loudly
for s_f in set(df_out[c_mask][fract_col].dropna()):
if s_f not in samp_fract_set:
char = '{}_{}'.format(s_f.replace(' ', '_'), suffix)
char = f"{s_f.replace(' ', '_')}_{suffix}"
frac_dict[char] = s_f
prob = '"{}" column for {}, may be error'.format(char, s_f)
warn('Warning: ' + prob)
warn(f'Warning: "{char}" column for {s_f}, may be error')
# TODO: add QA_flag
# Test we didn't skip any SampleFraction
samp_fract_set = sorted({x for v in frac_dict.values() for x in v})
for s_f in set(df_out[c_mask][fract_col].dropna()):
assert s_f in samp_fract_set, '{} check in {}'.format(s_f,
fract_col)
assert s_f in samp_fract_set, f'{s_f} check in {fract_col}'
# Create out columns for each sample fraction
for frac in frac_dict.items():
col = frac[0] # New column name
Expand Down
23 changes: 12 additions & 11 deletions harmonize_wq/wrangle.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from harmonize_wq import domains
from harmonize_wq import harmonize
from harmonize_wq.clean import datetime, harmonize_depth
import dataretrieval.wqp as wqp
from dataretrieval import wqp


def split_table(df_in):
Expand Down Expand Up @@ -169,13 +169,14 @@ def collapse_results(df_in, cols=None):
df = df.drop_duplicates()

# TODO: use date instead of datetime if na? (date_idx)
idx_cols = ['MonitoringLocationIdentifier',
if not cols:
cols = ['MonitoringLocationIdentifier',
'Activity_datetime',
'ActivityIdentifier',
'OrganizationIdentifier']
df_indexed = df.groupby(by=idx_cols, dropna=False).first()
df_indexed = df.groupby(by=cols, dropna=False).first()
# TODO: warn about multi-lines with values (only returns first)
problems = df.groupby(by=idx_cols, dropna=False).first(min_count=2)
problems = df.groupby(by=cols, dropna=False).first(min_count=2)
problems = problems.dropna(axis=1, how='all')
return df_indexed

Expand Down Expand Up @@ -561,14 +562,14 @@ def merge_tables(df1, df2, df2_cols='all', merge_cols='activity'):
# Check columns in both tables
shared = [x for x in list(df1.columns) if x in col2_list]
for col in merge_cols:
assert col in shared, '{} not in both DataFrames'.format(col)
assert col in shared, f'{col} not in both DataFrames'
# Columns to add from df2
if df2_cols == 'all':
# All columns not in df1
df2_cols = [x for x in col2_list if x not in list(df1.columns)]
else:
for col in df2_cols:
assert col in col2_list, '{} not in DataFrame'.format(col)
assert col in col2_list, f'{col} not in DataFrame'

# Merge activity columns to narrow results
df2 = df2[merge_cols + df2_cols] # Limit df2 to columns we want
Expand Down Expand Up @@ -640,15 +641,15 @@ def get_bounding_box(shp, idx=None):
shp = as_gdf(shp)

if idx is None:
bBox = shp.total_bounds
bbox = shp.total_bounds
else:
xmin = shp.bounds['minx'][idx]
xmax = shp.bounds['maxx'][idx]
ymin = shp.bounds['miny'][idx]
ymax = shp.bounds['maxy'][idx]
bBox = [xmin, ymin, xmax, ymax]
bbox = [xmin, ymin, xmax, ymax]

return ','.join(map(str, bBox))
return ','.join(map(str, bbox))


def clip_stations(stations, aoi):
Expand Down Expand Up @@ -756,10 +757,10 @@ def to_simple_shape(gdf, out_shp):
cols = gdf.columns # List of current column names
names_dict = domains.stations_rename() # Dict of column names to update
# Rename non-results columns that are too long for shp field names
renaming_list = [col for col in cols if col in names_dict.keys()]
renaming_list = [col for col in cols if col in names_dict]
renaming_dict = {old_col: names_dict[old_col] for old_col in renaming_list}
# Identify possible results columns before renaming columns
possible_results = [col for col in cols if col not in names_dict.keys()]
possible_results = [col for col in cols if col not in names_dict]
gdf = gdf.rename(columns=renaming_dict) # Rename columns
# TODO: old_field should be assigned to alias if output driver allows
# field_map1...
Expand Down

0 comments on commit 62292bd

Please sign in to comment.