Skip to content

Commit

Permalink
Fix up docs for Fraction
Browse files Browse the repository at this point in the history
  • Loading branch information
jbousquin committed Feb 5, 2024
1 parent 48fa750 commit 9434fca
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 8 deletions.
8 changes: 7 additions & 1 deletion harmonize_wq/harmonize.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,8 +467,14 @@ def harmonize_all(df_in, errors='raise'):
>>> df1.shape
(359505, 35)
When running the function there may be read outs or warnings, as things are
encountered such as unexpected nutrient sample fractions:
>>> from harmonize_wq import harmonize
>>> df_result_all = harmonize.harmonize_all(df1)
2 Phosphorus sample fractions not in frac_dict
1 Phosphorus sample fractions not in frac_dict found in expected domains, mapped to "Other_Phosphorus"
>>> df_result_all
OrganizationIdentifier ... Other_Phosphorus
0 21FLHILL_WQX ... NaN
Expand Down Expand Up @@ -654,7 +660,7 @@ def harmonize(df_in, char_val, units_out=None, errors='raise',
if out_col=='Phosphorus':
frac_dict = {'TP_Phosphorus': ['Total'],
'TDP_Phosphorus': ['Dissolved'],
'Other_Phosphorus': [''],}
'Other_Phosphorus': ['', nan],}
else:
frac_dict = 'TADA'
frac_dict = wqp.fraction(frac_dict) # Run sample fraction on WQP
Expand Down
78 changes: 71 additions & 7 deletions harmonize_wq/wq_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,11 +817,78 @@ def fraction(self, frac_dict=None, catch_all=None, suffix=None,
Examples
--------
Not fully implemented with TADA table yet.
Build pandas DataFrame to use as input:
>>> from pandas import DataFrame
>>> df = DataFrame({'CharacteristicName': ['Phosphorus', 'Phosphorus',],
... 'ResultMeasure/MeasureUnitCode': ['mg/l', 'mg/kg',],
... 'ResultMeasureValue': ['1.0', '10',],
... 'ResultSampleFractionText': ['Dissolved', ''],
... })
>>> df
CharacteristicName ... ResultSampleFractionText
0 Phosphorus ... Dissolved
1 Phosphorus ...
<BLANKLINE>
[2 rows x 4 columns]
Build WQ Characteristic Data class from pandas DataFrame:
>>> from harmonize_wq import wq_data
>>> wq = wq_data.WQCharData(df, 'Phosphorus')
Go through required checks and conversions
>>> wq.check_units()
>>> dimension_dict, mol_list = wq.dimension_fixes()
>>> wq.replace_unit_by_dict(dimension_dict, wq.measure_mask())
>>> wq.moles_convert(mol_list)
>>> wq.convert_units()
>>> wq.df.columns
Index(['CharacteristicName', 'ResultMeasure/MeasureUnitCode',
'ResultMeasureValue', 'ResultSampleFractionText', 'Units', 'Phosphorus',
'QA_flag'],
dtype='object')
>>> wq.df['Phosphorus']
0 1.0 milligram / liter
1 10.000000000000002 milligram / liter
Name: Phosphorus, dtype: object
These results may have differen, non-comprable sample fractions. First,
split results using a provided frac_dict (as used in harmonize()):
>>> from numpy import nan
>>> frac_dict = {'TP_Phosphorus': ['Total'],
'TDP_Phosphorus': ['Dissolved'],
'Other_Phosphorus': ['', nan],}
>>> wq.fraction(frac_dict)
>>> wq.df.columns
Index(['CharacteristicName', 'ResultMeasure/MeasureUnitCode',
'ResultMeasureValue', 'ResultSampleFractionText', 'Units', 'Phosphorus',
'QA_flag', 'TDP_Phosphorus', 'Other_Phosphorus'],
dtype='object')
>>> wq.df[['TDP_Phosphorus', 'Other_Phosphorus']]
TDP_Phosphorus Other_Phosphorus
0 1.0 milligram / liter NaN
1 NaN 10.000000000000002 milligram / liter
Alternatively, the sample fraction lists from tada can be used, in this case they are added:
>>> wq.fraction('TADA')
>>> wq.df.columns
Index(['CharacteristicName', 'ResultMeasure/MeasureUnitCode',
'ResultMeasureValue', 'ResultSampleFractionText', 'Units', 'Phosphorus',
'QA_flag', 'TDP_Phosphorus', 'Other_Phosphorus',
'TOTAL PHOSPHORUS_ MIXED FORMS'],
dtype='object')
>>> wq.df[['TOTAL PHOSPHORUS_ MIXED FORMS', 'Other_Phosphorus']]
TOTAL PHOSPHORUS_ MIXED FORMS Other_Phosphorus
0 1.0 milligram / liter NaN
1 NaN 10.000000000000002 milligram / liter
"""
# Check for sample fraction column
harmonize.df_checks(self.df, [fract_col])

c_mask = self.c_mask

fracs = list(set(self.df[c_mask][fract_col])) # List of fracs in data
Expand All @@ -831,10 +898,9 @@ def fraction(self, frac_dict=None, catch_all=None, suffix=None,
# Replace bad sample fraction w/ nan
self.df = self._replace_in_col(fract_col, ' ', nan, c_mask)
fracs.remove(' ')

df_out = self.df # Set var for easier referencing
char = list(set(df_out[self.c_mask]['CharacteristicName']))[0]


# Deal with lack of args
if suffix is None:
Expand All @@ -857,7 +923,7 @@ def fraction(self, frac_dict=None, catch_all=None, suffix=None,
#else: dict was already provided
if catch_all not in frac_dict.keys():
frac_dict[catch_all] = ['', nan]
# Make sure catch_all exists
# Make sure catch_all exists
if not isinstance(frac_dict[catch_all], list):
frac_dict[catch_all] = [frac_dict[catch_all]]

Expand Down Expand Up @@ -900,8 +966,6 @@ def fraction(self, frac_dict=None, catch_all=None, suffix=None,

self.df = df_out

return frac_dict

def dimension_fixes(self):
"""
Input/output for dimension handling.
Expand Down

0 comments on commit 9434fca

Please sign in to comment.