diff --git a/contributing.rst b/contributing.rst index 6e8c647..6f02d89 100644 --- a/contributing.rst +++ b/contributing.rst @@ -38,6 +38,7 @@ and submit the changes using a pull request against the **main** branch. - If you are submitting new code, add tests (see below) and documentation. - Write "Closes #" in the PR description or a comment, as described in the `GitHub docs`_. +- Classes, methods, functions, etc. should have docstrings. - Check tests and resolve any issues. In any case, feel free to use the `issue tracker`_ to discuss ideas for new features or improvements. @@ -49,6 +50,16 @@ There might be multiple reasons for this but these are some of the most common: - Your new code does not work for other operating systems or Python versions. - The documentation is not being built properly or the examples in the docs are not working. +Development environment setup +----------------------------- + +- pip install the latest development version of the package from `GitHub `_ +- Install the requirements for the development environment by pip installing the additional requirements-dev.txt file. + +docs are built using sphinx +tests are run using pytest + +There are workflows using GitHub actions for both docs and tests to help avoid 'it worked on my machine' type development issues. .. _`issue tracker`: https://github.com/USEPA/harmonize-wq/issues .. _`GitHub docs`: https://help.github.com/articles/closing-issues-via-commit-messages/ diff --git a/harmonize_wq/__init__.py b/harmonize_wq/__init__.py index c49cce6..3fec437 100644 --- a/harmonize_wq/__init__.py +++ b/harmonize_wq/__init__.py @@ -1,9 +1,5 @@ from harmonize_wq import harmonize - -try: - from importlib.metadata import version, PackageNotFoundError -except ImportError: - from importlib_metadata import version, PackageNotFoundError +from importlib.metadata import version, PackageNotFoundError try: __version__ = version('harmonize_wq') diff --git a/harmonize_wq/basis.py b/harmonize_wq/basis.py index a7074fd..03926d1 100644 --- a/harmonize_wq/basis.py +++ b/harmonize_wq/basis.py @@ -1,97 +1,89 @@ # -*- coding: utf-8 -*- """Functions to process characteristic basis or return basis dictionary.""" + +import numpy from warnings import warn -from numpy import nan from harmonize_wq.clean import add_qa_flag -def unit_basis_dict(out_col): - """Characteristic specific basis dictionary to define basis from units. - - The out_col is often derived from :attr:`WQCharData.char_val`. The desired - basis can be used as a key to subset result. - - Parameters - ---------- - out_col : str - Column name where results are written. - - Returns - ------- - dict - Dictionary with logic for determining basis from units string and - standard :mod:`pint` units to replace those with. - The structure is {Basis: {standard units: [unit strings with basis]}}. - Examples - -------- - Get dictionary for Phosphorus and subset for 'as P': - - >>> from harmonize_wq import basis - >>> basis.unit_basis_dict('Phosphorus')['as P'] - {'mg/l': ['mg/l as P', 'mg/l P'], 'mg/kg': ['mg/kg as P', 'mg/kg P']} - """ - dictionary = {'Phosphorus': {'as P': {'mg/l': ['mg/l as P', 'mg/l P'], - 'mg/kg': ['mg/kg as P', 'mg/kg P']}, - 'as PO4': {'mg/l': ['mg/l as PO4', - 'mg/l PO4'], - 'mg/kg': ['mg/kg as PO4', - 'mg/kg PO4']}}, - 'Nitrogen': {'as N': {'mg/l': ['mg/l as N', 'mg/l N']}}, - 'Carbon': {}, - } - return dictionary[out_col] - - -def basis_conversion(): - """Get dictionary of conversion factors to convert basis/speciation. - - For example, this is used to convert 'as PO4' to 'as P'. - - Returns - ------- - dict - Dictionary with structure {basis: conversion factor} - - See Also - -------- - :func:`convert.moles_to_mass` +"""Characteristic specific basis dictionary to define basis from units. + +The out_col is often derived from :attr:`WQCharData.char_val`. The desired +basis can be used as a key to subset result. + +Parameters +---------- +out_col : str + Column name where results are written. + +Returns +------- + dict + Dictionary with logic for determining basis from units string and + standard :mod:`pint` units to replace those with. + The structure is {Basis: {standard units: [unit strings with basis]}}. + +Examples +-------- +Get dictionary for Phosphorus and subset for 'as P': + +>>> from harmonize_wq import basis +>>> basis.unit_basis_dict['Phosphorus']['as P'] +{'mg/l': ['mg/l as P', 'mg/l P'], 'mg/kg': ['mg/kg as P', 'mg/kg P']} +""" +unit_basis_dict = { + "Phosphorus": { + "as P": {"mg/l": ["mg/l as P", "mg/l P"], "mg/kg": ["mg/kg as P", "mg/kg P"]}, + "as PO4": { + "mg/l": ["mg/l as PO4", "mg/l PO4"], + "mg/kg": ["mg/kg as PO4", "mg/kg PO4"], + }, + }, + "Nitrogen": {"as N": {"mg/l": ["mg/l as N", "mg/l N"]}}, + "Carbon": {}, +} + +"""basis.bass_conversionGet dictionary of conversion factors to convert basis/speciation. + +basis.bass_conversion. For example, this is used to convert 'as PO4' to 'as P'. + +Returns +------- +dict + Dictionary with structure {basis: conversion factor} + +See Also +-------- +:func:`convert.moles_to_mass` + +`Best Practices for Submitting Nutrient Data to the Water Quality eXchange +`_ +""" +basis_conversion = { + "NH3": 0.822, + "NH4": 0.776, + "NO2": 0.304, + "NO3": 0.225, + "PO4": 0.326, +} + +"""basis.stp_dict: Get standard temperature and pressure to define basis from units. + +Notes +----- + This needs to be updated to include pressure or needs to be renamed. - `Best Practices for Submitting Nutrient Data to the Water Quality eXchange - `_ - """ - return {'NH3': 0.822, - 'NH4': 0.776, - 'NO2': 0.304, - 'NO3': 0.225, - 'PO4': 0.326} +Returns +------- +dict + Dictionary with {'standard temp' : {'units': [values to replace]}}. +""" +stp_dict = {"@25C": {"mg/mL": ["mg/mL @25C"]}} -def stp_dict(): - """Get standard temperature and pressure to define basis from units. - - Notes - ----- - This needs to be updated to include pressure or needs to be renamed. - - Returns - ------- - dict - Dictionary with {'standard temp' : {'units': [values to replace]}}. - - Examples - -------- - Get dictionary for taking temperature basis our of units: - - >>> from harmonize_wq import basis - >>> basis.stp_dict() - {'@25C': {'mg/mL': ['mg/mL @25C']}} - """ - return {'@25C': {'mg/mL': ['mg/mL @25C']}} - - -def basis_from_unit(df_in, basis_dict, unit_col='Units', basis_col='Speciation'): +def basis_from_unit(df_in, basis_dict, unit_col="Units", basis_col="Speciation"): """Move basis from units to basis column in :class:`pandas.DataFrame`. - + Move basis information from units in unit_col column to basis in basis_col column based on basis_dict. If basis_col does not exist in df_in it will be created. The unit_col column is updated in place. To maintain data @@ -119,7 +111,7 @@ def basis_from_unit(df_in, basis_dict, unit_col='Units', basis_col='Speciation') Examples -------- Build pandas DataFrame for example: - + >>> from pandas import DataFrame >>> df = DataFrame({'CharacteristicName': ['Phosphorus', 'Phosphorus',], ... 'ResultMeasure/MeasureUnitCode': ['mg/l as P', 'mg/kg as P'], @@ -131,16 +123,16 @@ def basis_from_unit(df_in, basis_dict, unit_col='Units', basis_col='Speciation') 1 Phosphorus mg/kg as P mg/kg as P >>> from harmonize_wq import basis - >>> basis_dict = basis.unit_basis_dict('Phosphorus') + >>> basis_dict = basis.unit_basis_dict['Phosphorus'] >>> unit_col = 'Units' >>> basis.basis_from_unit(df, basis_dict, unit_col) CharacteristicName ResultMeasure/MeasureUnitCode Units Speciation 0 Phosphorus mg/l as P mg/l as P 1 Phosphorus mg/kg as P mg/kg as P - - If an existing basis_col value is different, a warning is issued when it is + + If an existing basis_col value is different, a warning is issued when it is updated and a QA_flag is assigned: - + >>> from numpy import nan >>> df['Speciation'] = [nan, 'as PO4'] >>> df_speciation_change = basis.basis_from_unit(df, basis_dict, unit_col) @@ -161,7 +153,7 @@ def basis_from_unit(df_in, basis_dict, unit_col='Units', basis_col='Speciation') # Add flags anywhere the values are updated flag1 = f'{basis_col}: updated from ' # List of unique basis values - basis_list = list(set(df.loc[mask, basis_col].dropna())) + basis_list = df.loc[mask, basis_col].dropna().unique() # Loop over existing values in basis field for old_basis in basis_list: flag = f'{flag1}{old_basis} to {base} (units)' @@ -178,7 +170,7 @@ def basis_from_unit(df_in, basis_dict, unit_col='Units', basis_col='Speciation') def basis_from_method_spec(df_in): """Copy speciation from MethodSpecificationName to new 'Speciation' column. - + Parameters ---------- df_in : pandas.DataFrame @@ -192,7 +184,7 @@ def basis_from_method_spec(df_in): Examples -------- Build pandas DataFrame for example: - + >>> from pandas import DataFrame >>> from numpy import nan >>> df = DataFrame({'CharacteristicName': ['Phosphorus', 'Phosphorus',], @@ -204,7 +196,7 @@ def basis_from_method_spec(df_in): 0 Phosphorus as P NWIS 1 Phosphorus NaN NWIS - >>> from harmonize_wq import basis + >>> from harmonize_wq import basis >>> basis.basis_from_method_spec(df) CharacteristicName MethodSpecificationName ProviderName Speciation 0 Phosphorus as P NWIS as P @@ -221,23 +213,23 @@ def basis_from_method_spec(df_in): mask = df[old_col] == base df = set_basis(df, mask, base) # Remove basis from MethodSpecificationName - #TODO: why update old field? - #df[old_col] = [nan if x == base else x for x in df[old_col]] + # TODO: why update old field? + # df[old_col] = [nan if x == base else x for x in df[old_col]] # Test we didn't miss any methodSpec - #assert set(df[old_col].dropna()) == set(), (set(df[old_col].dropna())) + # assert set(df[old_col].dropna()) == set(), (set(df[old_col].dropna())) return df def update_result_basis(df_in, basis_col, unit_col): """Move basis from unit_col column to basis_col column. - + This is usually used in place of basis_from_unit when the basis_col is not 'ResultMeasure/MeasureUnitCode' (i.e., not speciation). - + Notes ----- - Rather than creating many new empty columns this function currently overwrites the original + Rather than creating many new empty columns this function currently overwrites the original basis_col values. The original values are noted in the QA_flag. Parameters @@ -258,7 +250,7 @@ def update_result_basis(df_in, basis_col, unit_col): Examples -------- Build pandas DataFrame for example: - + >>> from pandas import DataFrame >>> from numpy import nan >>> df = DataFrame({'CharacteristicName': ['Salinity', 'Salinity',], @@ -269,8 +261,8 @@ def update_result_basis(df_in, basis_col, unit_col): CharacteristicName ResultTemperatureBasisText Units 0 Salinity 25 deg C mg/mL @25C 1 Salinity NaN mg/mL @25C - - >>> from harmonize_wq import basis + + >>> from harmonize_wq import basis >>> df_temp_basis = basis.update_result_basis(df, ... 'ResultTemperatureBasisText', ... 'Units') @@ -290,7 +282,7 @@ def update_result_basis(df_in, basis_col, unit_col): # Basis from unit if basis_col == 'ResultTemperatureBasisText': - df_out = basis_from_unit(df_in.copy(), stp_dict(), unit_col, basis_col) + df_out = basis_from_unit(df_in.copy(), stp_dict, unit_col, basis_col) # NOTE: in the test case 25 deg C -> @25C elif basis_col == 'ResultParticleSizeBasisText': # NOTE: These are normally 'less than x mm', no errors so far to fix @@ -328,7 +320,7 @@ def set_basis(df_in, mask, basis, basis_col='Speciation'): Examples -------- Build pandas DataFrame for example: - + >>> from pandas import DataFrame >>> df = DataFrame({'CharacteristicName': ['Phosphorus', ... 'Phosphorus', @@ -339,12 +331,12 @@ def set_basis(df_in, mask, basis, basis_col='Speciation'): CharacteristicName MethodSpecificationName 0 Phosphorus as P 1 Phosphorus as PO4 - 2 Salinity - + 2 Salinity + Build mask for example: >>> mask = df['CharacteristicName']=='Phosphorus' - + >>> from harmonize_wq import basis >>> basis.set_basis(df, mask, basis='as P') CharacteristicName MethodSpecificationName Speciation @@ -353,9 +345,8 @@ def set_basis(df_in, mask, basis, basis_col='Speciation'): 2 Salinity NaN """ df_out = df_in.copy() - # Add Basis column if it doesn't exist if basis_col not in df_out.columns: - df_out[basis_col] = nan - # Populate Basis column where expected value with basis + df_out[basis_col] = numpy.nan + # Otherwise don't mess with existing values that are not part of mask df_out.loc[mask, basis_col] = basis return df_out diff --git a/harmonize_wq/clean.py b/harmonize_wq/clean.py index 52ac28e..4007527 100644 --- a/harmonize_wq/clean.py +++ b/harmonize_wq/clean.py @@ -221,7 +221,7 @@ def methods_check(df_in, char_val, methods=None): """ if methods is None: - methods = accepted_methods() + methods = accepted_methods method_col = 'ResultAnalyticalMethod/MethodIdentifier' df2 = df_in.copy() # TODO: check df for method_col diff --git a/harmonize_wq/domains.py b/harmonize_wq/domains.py index 0415e0c..85a258b 100644 --- a/harmonize_wq/domains.py +++ b/harmonize_wq/domains.py @@ -137,7 +137,7 @@ def harmonize_TADA_dict(): df = pandas.read_csv(csv) # Read csv url to DataFrame full_dict = {} # Setup results dict # Loop over one unique characteristicName at a time - for char in list(set(df['TADA.CharacteristicName'].to_list())): + for char in df['TADA.CharacteristicName'].unique(): sub_df = df[df['TADA.CharacteristicName']==char] # Mask by char full_dict[char] = char_tbl_TADA(sub_df, char) # Build dictionary @@ -178,7 +178,7 @@ def re_case(word, domain_list): domain_list_upper = [x.upper() for x in domain_list] try: idx = domain_list_upper.index(word) - except: + except ValueError: return word return domain_list[idx] @@ -211,15 +211,15 @@ def char_tbl_TADA(df, char): # loop over new chars, getting {new_fract: [old fracts]} new_char_dict = {} - for new_char in list(set(sub_df[cols[0]])): + for new_char in sub_df[cols[0]].unique(): new_char_df = sub_df[sub_df[cols[0]]==new_char] # Mask by new_char new_fract_dict = {} - for new_fract in list(set(new_char_df[cols[2]])): + for new_fract in new_char_df[cols[2]].unique(): # TODO: {nan: []}? Doesn't break but needs handling later # Mask by new_fract new_fract_df = new_char_df[new_char_df[cols[2]]==new_fract] # Add a list of possible old_fract for new_fract key - new_fract_dict[new_fract] = list(set(new_fract_df[cols[1]])) + new_fract_dict[new_fract] = new_fract_df[cols[1]].unique() new_char_dict[new_char] = new_fract_dict return new_char_dict @@ -292,43 +292,43 @@ def registry_adds_list(out_col): return ureg_adds[out_col] -def out_col_lookup(): - """Get {CharacteristicName: out_column_name}. - - This is often subset and used to write results to a new column from the - 'CharacteristicName' column. +"""Get {CharacteristicName: out_column_name}. - Returns - ------- - dict - {WQP CharacteristicName:Column Name}. +This is often subset and used to write results to a new column from the +'CharacteristicName' column. - Examples - -------- - The function returns the full dictionary {CharacteristicName: out_column_name}. - It can be subset by a 'CharactisticName' column value to get the name of - the column for results: - - >>> domains.out_col_lookup()['Escherichia coli'] - 'E_coli' - """ - # TODO: something special for phosphorus? Currently return suffix. - # 'Phosphorus' -> ['TP_Phosphorus', 'TDP_Phosphorus', 'Other_Phosphorus'] - return {'Depth, Secchi disk depth': 'Secchi', - 'Dissolved oxygen (DO)': 'DO', - 'Temperature, water': 'Temperature', - 'Salinity': 'Salinity', - 'pH': 'pH', - 'Nitrogen': 'Nitrogen', - 'Conductivity': 'Conductivity', - 'Organic carbon': 'Carbon', - 'Chlorophyll a': 'Chlorophyll', - 'Turbidity': 'Turbidity', - 'Sediment': 'Sediment', - 'Fecal Coliform': 'Fecal_Coliform', - 'Escherichia coli': 'E_coli', - 'Phosphorus': 'Phosphorus', - } +Returns +------- +dict + {WQP CharacteristicName:Column Name}. + +Examples +-------- +The function returns the full dictionary {CharacteristicName: out_column_name}. +It can be subset by a 'CharactisticName' column value to get the name of +the column for results: + +>>> domains.out_col_lookup['Escherichia coli'] +'E_coli' +""" +# TODO: something special for phosphorus? Currently return suffix. +# 'Phosphorus' -> ['TP_Phosphorus', 'TDP_Phosphorus', 'Other_Phosphorus'] +out_col_lookup = { + "Depth, Secchi disk depth": "Secchi", + "Dissolved oxygen (DO)": "DO", + "Temperature, water": "Temperature", + "Salinity": "Salinity", + "pH": "pH", + "Nitrogen": "Nitrogen", + "Conductivity": "Conductivity", + "Organic carbon": "Carbon", + "Chlorophyll a": "Chlorophyll", + "Turbidity": "Turbidity", + "Sediment": "Sediment", + "Fecal Coliform": "Fecal_Coliform", + "Escherichia coli": "E_coli", + "Phosphorus": "Phosphorus", + } def characteristic_cols(category=None): @@ -503,452 +503,462 @@ def characteristic_cols(category=None): return col_list -def xy_datum(): - """Get dictionary of expected horizontal datums. +"""Get dictionary of expected horizontal datums. - The structure has {key as expected string: value as {"Description": string - and "EPSG": integer (4-digit code)}. +The structure has {key as expected string: value as {"Description": string +and "EPSG": integer (4-digit code)}. - Notes - ----- - source WQP: HorizontalCoordinateReferenceSystemDatum_CSV.zip - - Anything not in dict will be NaN, and non-integer EPSG will be missing: - "OTHER": {"Description": 'Other', "EPSG": nan}, - "UNKWN": {"Description": 'Unknown', "EPSG": nan} +Notes +----- +source WQP: HorizontalCoordinateReferenceSystemDatum_CSV.zip - Returns - ------- - dict - Dictionary where exhaustive: - {HorizontalCoordinateReferenceSystemDatumName: {Description:str, - EPSG:int}} +Anything not in dict will be NaN, and non-integer EPSG will be missing: +"OTHER": {"Description": 'Other', "EPSG": nan}, +"UNKWN": {"Description": 'Unknown', "EPSG": nan} - Examples - -------- - Running the function returns the full dictionary with {abbreviation: - {'Description':values, 'EPSG':values}}. The abbreviation key can be used to - get the EPSG code: - - >>> domains.xy_datum()['NAD83'] - {'Description': 'North American Datum 1983', 'EPSG': 4269} - >>> domains.xy_datum()['NAD83']['EPSG'] - 4269 - """ - return {"NAD27": {"Description": 'North American Datum 1927', - "EPSG": 4267}, - "NAD83": {"Description": 'North American Datum 1983', - "EPSG": 4269}, - "AMSMA": {"Description": 'American Samoa Datum', - "EPSG": 4169}, - "ASTRO": {"Description": 'Midway Astro 1961', - "EPSG": 4727}, - "GUAM": {"Description": 'Guam 1963', - "EPSG": 4675}, - "JHNSN": {"Description": 'Johnson Island 1961', - "EPSG": 4725}, - "OLDHI": {"Description": 'Old Hawaiian Datum', - "EPSG": 4135}, - "PR": {"Description": 'Puerto Rico Datum', - "EPSG": 6139}, - "SGEOR": {"Description": 'St. George Island Datum', - "EPSG": 4138}, - "SLAWR": {"Description": 'St. Lawrence Island Datum', - "EPSG": 4136}, - "SPAUL": {"Description": 'St. Paul Island Datum', - "EPSG": 4137}, - "WAKE": {"Description": 'Wake-Eniwetok 1960', - "EPSG": 6732}, - "WGS72": {"Description": 'World Geodetic System 1972', - "EPSG": 6322}, - "WGS84": {"Description": 'World Geodetic System 1984', - "EPSG": 4326}, - "HARN": {"Description": 'High Accuracy Reference Network for NAD83', - "EPSG": 4152}, - } - - -def stations_rename(): -# Default field mapping writes full name to alias but a short name to field - """Get shortened column names for shapefile (.shp) fields. - - ESRI places a length restriction on shapefile (.shp) field names. This - returns a dictionary with the original water quality portal field name (as - key) and shortened column name for writing as .shp. We suggest using the - longer original name as the field alias when writing as .shp. +Returns +------- +dict + Dictionary where exhaustive: + {HorizontalCoordinateReferenceSystemDatumName: {Description:str, + EPSG:int}} - Returns - ------- - field_mapping : dict - Dictionary where key = WQP field name and value = short name for .shp. - - Examples - -------- - Although running the function returns the full dictionary of Key:Value - pairs, here we show how the current name can be used as a key to get the - new name: - - >>> domains.stations_rename()['OrganizationIdentifier'] - 'org_ID' - """ - return {'OrganizationIdentifier': 'org_ID', - 'OrganizationFormalName': 'org_name', - 'MonitoringLocationIdentifier': 'loc_ID', - 'MonitoringLocationName': 'loc_name', - 'MonitoringLocationTypeName': 'loc_type', - 'MonitoringLocationDescriptionText': 'loc_desc', - 'HUCEightDigitCode': 'HUC08_code', - 'DrainageAreaMeasure/MeasureValue': 'DA_val', - 'DrainageAreaMeasure/MeasureUnitCode': 'DA_unit', - 'ContributingDrainageAreaMeasure/MeasureValue': 'CA_val', - 'ContributingDrainageAreaMeasure/MeasureUnitCode': 'CA_unit', - 'LatitudeMeasure': 'Latitude', - 'LongitudeMeasure': 'Longitude', - 'SourceMapScaleNumeric': 'SRC_Scale', - 'HorizontalAccuracyMeasure/MeasureValue': 'xy_acc', - 'HorizontalAccuracyMeasure/MeasureUnitCode': 'xy_accUnit', - 'HorizontalCollectionMethodName': 'xy_method', - 'HorizontalCoordinateReferenceSystemDatumName': 'xy_datum', - 'VerticalMeasure/MeasureValue': 'z', - 'VerticalMeasure/MeasureUnitCode': 'z_unit', - 'VerticalAccuracyMeasure/MeasureValue': 'z_acc', - 'VerticalAccuracyMeasure/MeasureUnitCode': 'z_accUnit', - 'VerticalCollectionMethodName': 'z_method', - 'VerticalCoordinateReferenceSystemDatumName': 'z_datum', - 'CountryCode': 'country', - 'StateCode': 'state', - 'CountyCode': 'county', - 'AquiferName': 'aquifer', - 'FormationTypeText': 'form_type', - 'AquiferTypeName': 'aquiferType', - 'ConstructionDateText': 'constrDate', - 'WellDepthMeasure/MeasureValue': 'well_depth', - 'WellDepthMeasure/MeasureUnitCode': 'well_unit', - 'WellHoleDepthMeasure/MeasureValue': 'wellhole', - 'WellHoleDepthMeasure/MeasureUnitCode': 'wellHole_unit', - 'ProviderName': 'provider', - 'ActivityIdentifier': 'activity_ID', - 'ResultIdentifier': 'result_ID', - } - - -def accepted_methods(): - """Get accepted methods for each characteristic. - - Notes - ----- - Source should be in 'ResultAnalyticalMethod/MethodIdentifierContext' - column. This is not fully implemented. +Examples +-------- +Running the function returns the full dictionary with {abbreviation: +{'Description':values, 'EPSG':values}}. The abbreviation key can be used to +get the EPSG code: - Returns - ------- - dict - Dictionary where key is characteristic column name and value is list of - dictionaries each with Source and Method keys. +>>> domains.xy_datum['NAD83'] +{'Description': 'North American Datum 1983', 'EPSG': 4269} +>>> domains.xy_datum['NAD83']['EPSG'] +4269 +""" +xy_datum = { + "NAD27": {"Description": "North American Datum 1927", "EPSG": 4267}, + "NAD83": {"Description": "North American Datum 1983", "EPSG": 4269}, + "AMSMA": {"Description": "American Samoa Datum", "EPSG": 4169}, + "ASTRO": {"Description": "Midway Astro 1961", "EPSG": 4727}, + "GUAM": {"Description": "Guam 1963", "EPSG": 4675}, + "JHNSN": {"Description": "Johnson Island 1961", "EPSG": 4725}, + "OLDHI": {"Description": "Old Hawaiian Datum", "EPSG": 4135}, + "PR": {"Description": "Puerto Rico Datum", "EPSG": 6139}, + "SGEOR": {"Description": "St. George Island Datum", "EPSG": 4138}, + "SLAWR": {"Description": "St. Lawrence Island Datum", "EPSG": 4136}, + "SPAUL": {"Description": "St. Paul Island Datum", "EPSG": 4137}, + "WAKE": {"Description": "Wake-Eniwetok 1960", "EPSG": 6732}, + "WGS72": {"Description": "World Geodetic System 1972", "EPSG": 6322}, + "WGS84": {"Description": "World Geodetic System 1984", "EPSG": 4326}, + "HARN": { + "Description": "High Accuracy Reference Network for NAD83", + "EPSG": 4152, + }, + } - """ - return {'Secchi': [{'Source': 'APHA', 'Method': '2320-B'}, - {'Source': 'ASTM', 'Method': 'D1889'}, - {'Source': 'USEPA', 'Method': 'NRSA09 W QUAL (BOAT)'}, - {'Source': 'USEPA', 'Method': '841-B-11-003'},], - 'DO': [{'Source': 'USEPA', 'Method': '360.2',}, - {'Source': 'USEPA', 'Method': '130.1',}, - {'Source': 'APHA', 'Method': '4500-O-G',}, - {'Source': 'USEPA', 'Method': '160.3',}, - {'Source': 'AOAC', 'Method': '973.45',}, - {'Source': 'USDOI/USGS', 'Method': 'I-1576-78',}, - {'Source': 'USDOI/USGS', 'Method': 'NFM 6.2.1-LUM',}, - {'Source': 'ASTM', 'Method': 'D888(B)',}, - {'Source': 'HACH', 'Method': '8157',}, - {'Source': 'HACH', 'Method': '10360',}, - {'Source': 'ASTM', 'Method': 'D3858',}, - {'Source': 'ASTM', 'Method': 'D888(C)',}, - {'Source': 'APHA', 'Method': ' 4500-O-C',}, - {'Source': 'USEPA', 'Method': '1002-8-2009',}, - {'Source': 'APHA', 'Method': '2550',}, - {'Source': 'USEPA', 'Method': '360.1',}, - {'Source': 'USEPA', 'Method': '841-B-11-003',}, - {'Source': 'ASTM', 'Method': 'D888-12',}, - {'Source': 'YSI', 'Method': 'EXO WQ SONDE',},], - 'Temperature': [{'Source': 'USEPA', 'Method': '170.1'}, - {'Source': 'USEPA', 'Method': '130.1'}, - {'Source': 'USEPA', 'Method': '841-B-11-003'}, - {'Source': 'APHA', 'Method': '2550'}, - {'Source': 'YSI', 'Method': 'EXO WQ SONDE'}, - {'Source': 'APHA', 'Method': '2550 B'},], - 'Salinity': [{'Source': 'YSI', 'Method': 'EXO WQ SONDE'}, - {'Source': 'HACH', 'Method': '8160'}, - {'Source': 'APHA', 'Method': '2520-B'}, - {'Source': 'APHA', 'Method': '2130'}, - {'Source': 'APHA', 'Method': '3.2-B'}, - {'Source': 'APHA', 'Method': '2520-C'},], - 'pH': [{'Source': 'ASTM', 'Method': 'D1293(B)'}, - {'Source': 'YSI', 'Method': 'EXO WQ SONDE'}, - {'Source': 'USEPA', 'Method': '360.2'}, - {'Source': 'USEPA', 'Method': '130.1'}, - {'Source': 'USDOI/USGS', 'Method': 'I1586'}, - {'Source': 'USDOI/USGS', 'Method': 'I-2587-85'}, - {'Source': 'APHA', 'Method': '3.2-B'}, - {'Source': 'HACH', 'Method': '8219'}, - {'Source': 'AOAC', 'Method': '973.41'}, - {'Source': 'APHA', 'Method': '4500-H'}, - {'Source': 'APHA', 'Method': '2320'}, - {'Source': 'USEPA', 'Method': '150.2'}, - {'Source': 'USEPA', 'Method': '150.1'}, - {'Source': 'USDOI/USGS', 'Method': 'I-1586-85'}, - {'Source': 'USEPA', 'Method': '9040B'}, - {'Source': 'HACH', 'Method': '8156'}, - {'Source': 'ASTM', 'Method': 'D1293(A)'}, - {'Source': 'APHA', 'Method': '4500-H+B'},], - 'Nitrogen': [{'Source': 'USEPA', 'Method': '353.1'}, - {'Source': 'USEPA', 'Method': '353.2'}, - {'Source': 'USEPA', 'Method': '353.2_M'}, - {'Source': 'USEPA', 'Method': '353.3'}, - {'Source': 'USEPA', 'Method': '6020'}, - {'Source': 'USEPA', 'Method': '200.7'}, - {'Source': 'USEPA', 'Method': '8321'}, - {'Source': 'USEPA', 'Method': '365.1'}, - {'Source': 'USEPA', 'Method': '365.3'}, - {'Source': 'USEPA', 'Method': '300'}, - {'Source': 'USEPA', 'Method': '300(A)'}, - {'Source': 'USEPA', 'Method': '350.1'}, - {'Source': 'USEPA', 'Method': '350.3'}, - {'Source': 'USEPA', 'Method': '351.1'}, - {'Source': 'USEPA', 'Method': '351.2'}, - {'Source': 'USEPA', 'Method': '351.3 (TITRATION)'}, - {'Source': 'USEPA', 'Method': '440'}, - {'Source': 'USEPA', 'Method': '440(W)'}, - {'Source': 'USEPA', 'Method': '440(S)'}, - {'Source': 'AOAC', 'Method': '973.48'}, - {'Source': 'USDOI/USGS', 'Method': 'I-4650-03'}, - {'Source': 'USDOI/USGS', 'Method': 'I-2650-03'}, - {'Source': 'USDOI/USGS', 'Method': 'I-4540-85'}, - {'Source': 'ASTM', 'Method': 'D8083-16'}, - {'Source': 'ASTM', 'Method': 'D5176'}, - {'Source': 'ASTM', 'Method': 'D888(B)'}, - {'Source': 'ASTM', 'Method': 'D3590(B)'}, - {'Source': 'HACH', 'Method': '10208'}, - {'Source': 'HACH', 'Method': '10071'}, - {'Source': 'HACH', 'Method': '10072'}, - {'Source': 'HACH', 'Method': '10242'}, - {'Source': 'USDOE/ASD', 'Method': 'MS100'}, - {'Source': 'LACHAT', 'Method': '31-107-04-3-A'}, - {'Source': 'LACHAT', 'Method': '31-107-04-4-A'}, - {'Source': 'BL', 'Method': '818-87T'}, - {'Source': 'APHA_SM20ED', 'Method': '4500-N-C'}, - {'Source': 'APHA_SM21ED', 'Method': '4500-N-B'}, - {'Source': 'APHA', 'Method': '4500-N D'}, - {'Source': 'APHA', 'Method': '4500-N'}, - {'Source': 'APHA', 'Method': '4500-NOR(C)'}, - {'Source': 'APHA', 'Method': '4500-NH3 B'}, - {'Source': 'APHA', 'Method': '4500-NH3 D'}, - {'Source': 'APHA', 'Method': '4500-NH3(G)'}, - {'Source': 'APHA', 'Method': '4500-NH3(H)'}, - {'Source': 'APHA', 'Method': '4500-NO3(C)'}, - {'Source': 'APHA', 'Method': '4500-NO3(B)'}, - {'Source': 'APHA', 'Method': '4500-NO3(E)'}, - {'Source': 'APHA', 'Method': '4500-NO3(I)'}, - {'Source': 'APHA', 'Method': '4500-NO3(F)'}, - {'Source': 'APHA', 'Method': '4500-NOR(B)'}, - {'Source': 'APHA', 'Method': '4500-NORGB'}, - {'Source': 'APHA', 'Method': '4500-NORG D'}, - {'Source': 'APHA', 'Method': '4500-CL(E)'}, - {'Source': 'APHA', 'Method': '5310-B'}, - {'Source': 'APHA', 'Method': '4500-P-J'}, - {'Source': 'APHA', 'Method': '4500-N-C'},], - 'Conductivity': [{'Source': 'ASTM', 'Method': 'D1125(A)'}, - {'Source': 'APHA', 'Method': '2510'}, - {'Source': 'USEPA', 'Method': '9050A'}, - {'Source': 'USEPA', 'Method': '360.2'}, - {'Source': 'USEPA', 'Method': '130.1'}, - {'Source': 'USEPA', 'Method': '9050'}, - {'Source': 'APHA', 'Method': '2510B'}, - {'Source': 'APHA', 'Method': '2550'}, - {'Source': 'HACH', 'Method': '8160'}, - {'Source': 'USEPA', 'Method': '120.1'}, - {'Source': 'USEPA', 'Method': '841-B-11-003'}, - {'Source': 'YSI', 'Method': 'EXO WQ SONDE'},], - 'Carbon': [{'Source': 'USEPA', 'Method': '9060'}, - {'Source': 'APHA_SM20ED', 'Method': '5310-B'}, - {'Source': 'APHA', 'Method': '5310C'}, - {'Source': 'APHA', 'Method': '5310-C'}, - {'Source': 'USEPA', 'Method': '9060A'}, - {'Source': 'AOAC', 'Method': '973.47'}, - {'Source': 'USDOI/USGS', 'Method': 'O-1122-92'}, - {'Source': 'USDOI/USGS', 'Method': 'O3100'}, - {'Source': 'APHA', 'Method': '5310-D'}, - {'Source': 'APHA (2011)', 'Method': '5310-C'}, - {'Source': 'USEPA', 'Method': '415.1'}, - {'Source': 'USEPA', 'Method': '415.3'}, - {'Source': 'USEPA', 'Method': '502.1'}, - {'Source': 'APHA', 'Method': '9222B'}, - {'Source': 'USEPA', 'Method': '415.2'}, - {'Source': 'APHA', 'Method': '5310-B'}, - {'Source': 'APHA', 'Method': '4500-H+B'},], - 'Chlorophyll': [{'Source': 'YSI', 'Method': 'EXO WQ SONDE'}, - {'Source': 'USEPA', 'Method': '446'}, - {'Source': 'USEPA', 'Method': '170.1'}, - {'Source': 'USEPA', 'Method': '445'}, - {'Source': 'APHA', 'Method': '10200H(3)'}, - {'Source': 'APHA', 'Method': '10200-H'}, - {'Source': 'USEPA', 'Method': '353.2'}, - {'Source': 'USEPA', 'Method': '447'}, - {'Source': 'APHA', 'Method': '10200H(2)'}, - {'Source': 'APHA', 'Method': '9222B'}, - {'Source': 'APHA', 'Method': '5310-C'},], - 'Turbidity': [{'Source': 'USEPA', 'Method': '160.2_M'}, - {'Source': 'USDOI/USGS', 'Method': 'I3860'}, - {'Source': 'USEPA', 'Method': '180.1'}, - {'Source': 'USEPA', 'Method': '360.2'}, - {'Source': 'USEPA', 'Method': '130.1'}, - {'Source': 'APHA', 'Method': '2130'}, - {'Source': 'APHA', 'Method': '2310 B'}, - {'Source': 'APHA', 'Method': '2130-B'}, - {'Source': 'HACH', 'Method': '8195'}, - {'Source': 'LECK MITCHELL', 'Method': 'M5331'}, - {'Source': 'ASTM', 'Method': 'D1889'},], - 'Sediment': [], - 'Fecal_Coliform': [{'Source': 'IDEXX', 'Method': 'COLILERT-18'}, - {'Source': 'APHA_SM22ED', 'Method': '9222D'}, - {'Source': 'APHA', 'Method': '9221-E'}, - {'Source': 'AOAC', 'Method': '978.23'}, - {'Source': 'NIOSH', 'Method': '600'}, - {'Source': 'HACH', 'Method': '8001(A2)'}, - {'Source': 'HACH', 'Method': '8074(A)'}, - {'Source': 'APHA', 'Method': '9230-D'}, - {'Source': 'USEPA', 'Method': '1103.1'}, - {'Source': 'APHA', 'Method': '9222D'}, - {'Source': 'APHA', 'Method': '9222A'}, - {'Source': 'APHA', 'Method': '3.2-B'}, - {'Source': 'APHA', 'Method': '10200-G'}, - {'Source': 'APHA', 'Method': '9222-E'}, - {'Source': 'APHA', 'Method': '9221-B'},], - 'E_coli': [{'Source': 'APHA', 'Method': '9221A-B-C-F'}, - {'Source': 'IDEXX', 'Method': 'COLILERT/2000'}, - {'Source': 'MICROLOGY LABS', 'Method': 'EASYGEL'}, - {'Source': 'IDEXX', 'Method': 'COLILERT'}, - {'Source': 'IDEXX', 'Method': 'COLISURE'}, - {'Source': 'USEPA', 'Method': '360.2'}, - {'Source': 'APHA_SM22ED', 'Method': '9223-B'}, - {'Source': 'IDEXX', 'Method': 'COLILERT-18'}, - {'Source': 'IDEXX', 'Method': 'COLILERT-182000'}, - {'Source': 'USEPA', 'Method': '130.1'}, - {'Source': 'USEPA', 'Method': '1103.1 (MODIFIED)'}, - {'Source': 'MICROLOGY LABS', 'Method': 'COLISCAN'}, - {'Source': 'APHA', 'Method': '9222D'}, - {'Source': 'APHA', 'Method': '9213-D'}, - {'Source': 'HACH', 'Method': '10029'}, - {'Source': 'APHA', 'Method': '9222G'}, - {'Source': 'CDC', - 'Method':'CDC - E. coli and Shigella'}, - {'Source': 'CDC', 'Method': 'E. COLI AND SHIGELLA'}, - {'Source': 'USEPA', 'Method': '1603'}, - {'Source': 'APHA', 'Method': '9213D'}, - {'Source': 'USEPA', 'Method': '1103.1'}, - {'Source': 'USEPA', 'Method': '1604'}, - {'Source': 'APHA', 'Method': '9223-B'}, - {'Source': 'APHA', 'Method': '9223-B-04'}, - {'Source': 'APHA', 'Method': '9222B,G'}, - {'Source': 'USEPA', 'Method': '600-R-00-013'}, - {'Source': 'APHA', 'Method': '9221-F'}, - {'Source': 'USDOI/USGS', 'Method': '10029'}, - {'Source': 'NIOSH', 'Method': '1604'}, - {'Source': 'APHA', 'Method': '"9222B G"'}, - {'Source': 'APHA', 'Method': '9223B'}, - {'Source': 'MODIFIED COLITAG', - 'Method': 'ATP D05-0035'}, - {'Source': 'ASTM', 'Method': 'D5392'}, - {'Source': 'HACH', 'Method': '10018'}, - {'Source': 'USEPA', 'Method': '1600'},], - 'Phosphorus': [{'Source': 'APHA', 'Method': '3125'}, - {'Source': 'APHA', 'Method': '4500-P-C'}, - {'Source': 'USEPA', 'Method': 'IO-3.3'}, - {'Source': 'USEPA', 'Method': '200.7_M'}, - {'Source': 'USEPA', 'Method': '200.9'}, - {'Source': 'USEPA', 'Method': '200.7(S)'}, - {'Source': 'LACHAT', 'Method': '10-115-01-1-F'}, - {'Source': 'APHA_SM21ED', 'Method': '4500-P-G'}, - {'Source': 'USEPA', 'Method': '351.3(C)'}, - {'Source': 'LACHAT', 'Method': '10-115-01-4-B'}, - {'Source': 'USEPA', 'Method': '365.2'}, - {'Source': 'ASA(2ND ED.)', 'Method': '24-5.4'}, - {'Source': 'USEPA', 'Method': '300.1'}, - {'Source': 'USEPA', 'Method': '365_M'}, - {'Source': 'USEPA', 'Method': '365.1'}, - {'Source': 'APHA', 'Method': '4500-NH3(C)'}, - {'Source': 'USEPA', 'Method': '300'}, - {'Source': 'APHA', 'Method': '4500-NO2(B)'}, - {'Source': 'APHA', 'Method': '4500-P-H'}, - {'Source': 'USEPA', 'Method': '300(A)'}, - {'Source': 'USEPA', 'Method': '350.1'}, - {'Source': 'USEPA', 'Method': '200.7(W)'}, - {'Source': 'USEPA', 'Method': '351.2'}, - {'Source': 'USEPA', 'Method': '365.3'}, - {'Source': 'USDOI/USGS', 'Method': 'I2600(W)'}, - {'Source': 'USDOI/USGS', 'Method': 'I2601'}, - {'Source': 'APHA', 'Method': '4500-P B'}, - {'Source': 'USEPA', 'Method': '6010B'}, - {'Source': 'USEPA', 'Method': 'ICP-AES'}, - {'Source': 'USDOI/USGS', 'Method': 'I-4610-91'}, - {'Source': 'APHA', 'Method': '3030 E'}, - {'Source': 'APHA', 'Method': '10200-F'}, - {'Source': 'ASTM', 'Method': 'D3977'}, - {'Source': 'USDOI/USGS', 'Method': 'I-4650-03'}, - {'Source': 'USEPA', 'Method': '440(S)'}, - {'Source': 'USEPA', 'Method': '200.8(W)'}, - {'Source': 'USDOI/USGS', 'Method': 'I1602'}, - {'Source': 'APHA', 'Method': '4500-P-E'}, - {'Source': 'USDOI/USGS', 'Method': 'I-2650-03'}, - {'Source': 'APHA', 'Method': '4500-NOR(C)'}, - {'Source': 'APHA', 'Method': '4500-P'}, - {'Source': 'ASTM', 'Method': 'D888(B)'}, - {'Source': 'ASTM', 'Method': 'D515(A)'}, - {'Source': 'HACH', 'Method': '10210'}, - {'Source': 'HACH', 'Method': '8190'}, - {'Source': 'HACH', 'Method': '10242'}, - {'Source': 'USDOE/ASD', 'Method': 'MS100'}, - {'Source': 'USEPA', 'Method': '6010A'}, - {'Source': 'APHA', 'Method': '4500-F-E'}, - {'Source': 'USEPA', 'Method': '200.7'}, - {'Source': 'APHA', 'Method': '2540-D'}, - {'Source': 'APHA', 'Method': '4500-P-F'}, - {'Source': 'USEPA', 'Method': '8321'}, - {'Source': 'USEPA', 'Method': '200.15'}, - {'Source': 'USEPA', 'Method': '353.2'}, - {'Source': 'USEPA', 'Method': '6020A'}, - {'Source': 'USDOI/USGS', 'Method': 'I-1601-85'}, - {'Source': 'USEPA', 'Method': '200.2'}, - {'Source': 'USDOI/USGS', 'Method': 'I-4600-85'}, - {'Source': 'USDOI/USGS', 'Method': 'I-4607'}, - {'Source': 'USDOI/USGS', 'Method': 'I-4602'}, - {'Source': 'APHA (1999)', 'Method': '4500-P-E'}, - {'Source': 'APHA', 'Method': '4500-H'}, - {'Source': 'USEPA', 'Method': '6010C'}, - {'Source': 'USEPA', 'Method': '365.4'}, - {'Source': 'USDOI/USGS', 'Method': 'I6600'}, - {'Source': 'USEPA', 'Method': '200.8'}, - {'Source': 'USEPA', 'Method': '351.1'}, - {'Source': 'HACH', 'Method': '10209'}, - {'Source': 'USEPA ', 'Method': '6020'}, - {'Source': 'ASTM', 'Method': 'D515(B)'}, - {'Source': 'USEPA', 'Method': '624'}, - {'Source': 'APHA', 'Method': '2340B'}, - {'Source': 'APHA', 'Method': '9222B'}, - {'Source': 'USEPA', 'Method': '440'}, - {'Source': 'APHA', 'Method': '2540-C'}, - {'Source': 'USEPA', 'Method': '353.2_M'}, - {'Source': 'APHA', 'Method': '4500-P-J'}, - {'Source': 'APHA', 'Method': '9223-B'}, - {'Source': 'APHA', 'Method': '4500-P-I'}, - {'Source': 'USEPA', 'Method': '610'}, - {'Source': 'APHA', 'Method': '4500-N-C'}, - {'Source': 'APHA', 'Method': '4500-P-D'}, - {'Source': 'APHA', 'Method': '4500-P E'}, - {'Source': 'APHA', 'Method': '4500-P F'}, - {'Source': 'USDOI/USGS', 'Method': 'I-2610-91'}, - {'Source': 'USDOI/USGS', 'Method': 'I-2607'}, - {'Source': 'USDOI/USGS', 'Method': 'I-2606'}, - {'Source': 'USDOI/USGS', 'Method': 'I-2601-90'}, - {'Source': 'USDOI/USGS', 'Method': 'I-6600-88'}, - {'Source': 'ASTM', 'Method': 'D515'},], - } +# Default field mapping writes full name to alias but a short name to field +"""Get shortened column names for shapefile (.shp) fields. + +ESRI places a length restriction on shapefile (.shp) field names. This +returns a dictionary with the original water quality portal field name (as +key) and shortened column name for writing as .shp. We suggest using the +longer original name as the field alias when writing as .shp. + +Returns +------- +field_mapping : dict + Dictionary where key = WQP field name and value = short name for .shp. + +Examples +-------- +Although running the function returns the full dictionary of Key:Value +pairs, here we show how the current name can be used as a key to get the +new name: + +>>> domains.stations_rename['OrganizationIdentifier'] +'org_ID' +""" +stations_rename = { + "OrganizationIdentifier": "org_ID", + "OrganizationFormalName": "org_name", + "MonitoringLocationIdentifier": "loc_ID", + "MonitoringLocationName": "loc_name", + "MonitoringLocationTypeName": "loc_type", + "MonitoringLocationDescriptionText": "loc_desc", + "HUCEightDigitCode": "HUC08_code", + "DrainageAreaMeasure/MeasureValue": "DA_val", + "DrainageAreaMeasure/MeasureUnitCode": "DA_unit", + "ContributingDrainageAreaMeasure/MeasureValue": "CA_val", + "ContributingDrainageAreaMeasure/MeasureUnitCode": "CA_unit", + "LatitudeMeasure": "Latitude", + "LongitudeMeasure": "Longitude", + "SourceMapScaleNumeric": "SRC_Scale", + "HorizontalAccuracyMeasure/MeasureValue": "xy_acc", + "HorizontalAccuracyMeasure/MeasureUnitCode": "xy_accUnit", + "HorizontalCollectionMethodName": "xy_method", + "HorizontalCoordinateReferenceSystemDatumName": "xy_datum", + "VerticalMeasure/MeasureValue": "z", + "VerticalMeasure/MeasureUnitCode": "z_unit", + "VerticalAccuracyMeasure/MeasureValue": "z_acc", + "VerticalAccuracyMeasure/MeasureUnitCode": "z_accUnit", + "VerticalCollectionMethodName": "z_method", + "VerticalCoordinateReferenceSystemDatumName": "z_datum", + "CountryCode": "country", + "StateCode": "state", + "CountyCode": "county", + "AquiferName": "aquifer", + "FormationTypeText": "form_type", + "AquiferTypeName": "aquiferType", + "ConstructionDateText": "constrDate", + "WellDepthMeasure/MeasureValue": "well_depth", + "WellDepthMeasure/MeasureUnitCode": "well_unit", + "WellHoleDepthMeasure/MeasureValue": "wellhole", + "WellHoleDepthMeasure/MeasureUnitCode": "wellHole_unit", + "ProviderName": "provider", + "ActivityIdentifier": "activity_ID", + "ResultIdentifier": "result_ID", + } + +"""Get accepted methods for each characteristic. + +Notes +----- +Source should be in 'ResultAnalyticalMethod/MethodIdentifierContext' +column. This is not fully implemented. + +Returns +------- +dict + Dictionary where key is characteristic column name and value is list of + dictionaries each with Source and Method keys. + +""" +accepted_methods = { + "Secchi": [ + {"Source": "APHA", "Method": "2320-B"}, + {"Source": "ASTM", "Method": "D1889"}, + {"Source": "USEPA", "Method": "NRSA09 W QUAL (BOAT)"}, + {"Source": "USEPA", "Method": "841-B-11-003"}, + ], + "DO": [ + {"Source": "USEPA", "Method": "360.2"}, + {"Source": "USEPA", "Method": "130.1"}, + {"Source": "APHA", "Method": "4500-O-G"}, + {"Source": "USEPA", "Method": "160.3"}, + {"Source": "AOAC", "Method": "973.45"}, + {"Source": "USDOI/USGS", "Method": "I-1576-78"}, + {"Source": "USDOI/USGS", "Method": "NFM 6.2.1-LUM"}, + {"Source": "ASTM", "Method": "D888(B)"}, + {"Source": "HACH", "Method": "8157"}, + {"Source": "HACH", "Method": "10360"}, + {"Source": "ASTM", "Method": "D3858"}, + {"Source": "ASTM", "Method": "D888(C)"}, + {"Source": "APHA", "Method": " 4500-O-C"}, + {"Source": "USEPA", "Method": "1002-8-2009"}, + {"Source": "APHA", "Method": "2550"}, + {"Source": "USEPA", "Method": "360.1"}, + {"Source": "USEPA", "Method": "841-B-11-003"}, + {"Source": "ASTM", "Method": "D888-12"}, + {"Source": "YSI", "Method": "EXO WQ SONDE"}, + ], + "Temperature": [ + {"Source": "USEPA", "Method": "170.1"}, + {"Source": "USEPA", "Method": "130.1"}, + {"Source": "USEPA", "Method": "841-B-11-003"}, + {"Source": "APHA", "Method": "2550"}, + {"Source": "YSI", "Method": "EXO WQ SONDE"}, + {"Source": "APHA", "Method": "2550 B"}, + ], + "Salinity": [ + {"Source": "YSI", "Method": "EXO WQ SONDE"}, + {"Source": "HACH", "Method": "8160"}, + {"Source": "APHA", "Method": "2520-B"}, + {"Source": "APHA", "Method": "2130"}, + {"Source": "APHA", "Method": "3.2-B"}, + {"Source": "APHA", "Method": "2520-C"}, + ], + "pH": [ + {"Source": "ASTM", "Method": "D1293(B)"}, + {"Source": "YSI", "Method": "EXO WQ SONDE"}, + {"Source": "USEPA", "Method": "360.2"}, + {"Source": "USEPA", "Method": "130.1"}, + {"Source": "USDOI/USGS", "Method": "I1586"}, + {"Source": "USDOI/USGS", "Method": "I-2587-85"}, + {"Source": "APHA", "Method": "3.2-B"}, + {"Source": "HACH", "Method": "8219"}, + {"Source": "AOAC", "Method": "973.41"}, + {"Source": "APHA", "Method": "4500-H"}, + {"Source": "APHA", "Method": "2320"}, + {"Source": "USEPA", "Method": "150.2"}, + {"Source": "USEPA", "Method": "150.1"}, + {"Source": "USDOI/USGS", "Method": "I-1586-85"}, + {"Source": "USEPA", "Method": "9040B"}, + {"Source": "HACH", "Method": "8156"}, + {"Source": "ASTM", "Method": "D1293(A)"}, + {"Source": "APHA", "Method": "4500-H+B"}, + ], + "Nitrogen": [ + {"Source": "USEPA", "Method": "353.1"}, + {"Source": "USEPA", "Method": "353.2"}, + {"Source": "USEPA", "Method": "353.2_M"}, + {"Source": "USEPA", "Method": "353.3"}, + {"Source": "USEPA", "Method": "6020"}, + {"Source": "USEPA", "Method": "200.7"}, + {"Source": "USEPA", "Method": "8321"}, + {"Source": "USEPA", "Method": "365.1"}, + {"Source": "USEPA", "Method": "365.3"}, + {"Source": "USEPA", "Method": "300"}, + {"Source": "USEPA", "Method": "300(A)"}, + {"Source": "USEPA", "Method": "350.1"}, + {"Source": "USEPA", "Method": "350.3"}, + {"Source": "USEPA", "Method": "351.1"}, + {"Source": "USEPA", "Method": "351.2"}, + {"Source": "USEPA", "Method": "351.3 (TITRATION)"}, + {"Source": "USEPA", "Method": "440"}, + {"Source": "USEPA", "Method": "440(W)"}, + {"Source": "USEPA", "Method": "440(S)"}, + {"Source": "AOAC", "Method": "973.48"}, + {"Source": "USDOI/USGS", "Method": "I-4650-03"}, + {"Source": "USDOI/USGS", "Method": "I-2650-03"}, + {"Source": "USDOI/USGS", "Method": "I-4540-85"}, + {"Source": "ASTM", "Method": "D8083-16"}, + {"Source": "ASTM", "Method": "D5176"}, + {"Source": "ASTM", "Method": "D888(B)"}, + {"Source": "ASTM", "Method": "D3590(B)"}, + {"Source": "HACH", "Method": "10208"}, + {"Source": "HACH", "Method": "10071"}, + {"Source": "HACH", "Method": "10072"}, + {"Source": "HACH", "Method": "10242"}, + {"Source": "USDOE/ASD", "Method": "MS100"}, + {"Source": "LACHAT", "Method": "31-107-04-3-A"}, + {"Source": "LACHAT", "Method": "31-107-04-4-A"}, + {"Source": "BL", "Method": "818-87T"}, + {"Source": "APHA_SM20ED", "Method": "4500-N-C"}, + {"Source": "APHA_SM21ED", "Method": "4500-N-B"}, + {"Source": "APHA", "Method": "4500-N D"}, + {"Source": "APHA", "Method": "4500-N"}, + {"Source": "APHA", "Method": "4500-NOR(C)"}, + {"Source": "APHA", "Method": "4500-NH3 B"}, + {"Source": "APHA", "Method": "4500-NH3 D"}, + {"Source": "APHA", "Method": "4500-NH3(G)"}, + {"Source": "APHA", "Method": "4500-NH3(H)"}, + {"Source": "APHA", "Method": "4500-NO3(C)"}, + {"Source": "APHA", "Method": "4500-NO3(B)"}, + {"Source": "APHA", "Method": "4500-NO3(E)"}, + {"Source": "APHA", "Method": "4500-NO3(I)"}, + {"Source": "APHA", "Method": "4500-NO3(F)"}, + {"Source": "APHA", "Method": "4500-NOR(B)"}, + {"Source": "APHA", "Method": "4500-NORGB"}, + {"Source": "APHA", "Method": "4500-NORG D"}, + {"Source": "APHA", "Method": "4500-CL(E)"}, + {"Source": "APHA", "Method": "5310-B"}, + {"Source": "APHA", "Method": "4500-P-J"}, + {"Source": "APHA", "Method": "4500-N-C"}, + ], + "Conductivity": [ + {"Source": "ASTM", "Method": "D1125(A)"}, + {"Source": "APHA", "Method": "2510"}, + {"Source": "USEPA", "Method": "9050A"}, + {"Source": "USEPA", "Method": "360.2"}, + {"Source": "USEPA", "Method": "130.1"}, + {"Source": "USEPA", "Method": "9050"}, + {"Source": "APHA", "Method": "2510B"}, + {"Source": "APHA", "Method": "2550"}, + {"Source": "HACH", "Method": "8160"}, + {"Source": "USEPA", "Method": "120.1"}, + {"Source": "USEPA", "Method": "841-B-11-003"}, + {"Source": "YSI", "Method": "EXO WQ SONDE"}, + ], + "Carbon": [ + {"Source": "USEPA", "Method": "9060"}, + {"Source": "APHA_SM20ED", "Method": "5310-B"}, + {"Source": "APHA", "Method": "5310C"}, + {"Source": "APHA", "Method": "5310-C"}, + {"Source": "USEPA", "Method": "9060A"}, + {"Source": "AOAC", "Method": "973.47"}, + {"Source": "USDOI/USGS", "Method": "O-1122-92"}, + {"Source": "USDOI/USGS", "Method": "O3100"}, + {"Source": "APHA", "Method": "5310-D"}, + {"Source": "APHA (2011)", "Method": "5310-C"}, + {"Source": "USEPA", "Method": "415.1"}, + {"Source": "USEPA", "Method": "415.3"}, + {"Source": "USEPA", "Method": "502.1"}, + {"Source": "APHA", "Method": "9222B"}, + {"Source": "USEPA", "Method": "415.2"}, + {"Source": "APHA", "Method": "5310-B"}, + {"Source": "APHA", "Method": "4500-H+B"}, + ], + "Chlorophyll": [ + {"Source": "YSI", "Method": "EXO WQ SONDE"}, + {"Source": "USEPA", "Method": "446"}, + {"Source": "USEPA", "Method": "170.1"}, + {"Source": "USEPA", "Method": "445"}, + {"Source": "APHA", "Method": "10200H(3)"}, + {"Source": "APHA", "Method": "10200-H"}, + {"Source": "USEPA", "Method": "353.2"}, + {"Source": "USEPA", "Method": "447"}, + {"Source": "APHA", "Method": "10200H(2)"}, + {"Source": "APHA", "Method": "9222B"}, + {"Source": "APHA", "Method": "5310-C"}, + ], + "Turbidity": [ + {"Source": "USEPA", "Method": "160.2_M"}, + {"Source": "USDOI/USGS", "Method": "I3860"}, + {"Source": "USEPA", "Method": "180.1"}, + {"Source": "USEPA", "Method": "360.2"}, + {"Source": "USEPA", "Method": "130.1"}, + {"Source": "APHA", "Method": "2130"}, + {"Source": "APHA", "Method": "2310 B"}, + {"Source": "APHA", "Method": "2130-B"}, + {"Source": "HACH", "Method": "8195"}, + {"Source": "LECK MITCHELL", "Method": "M5331"}, + {"Source": "ASTM", "Method": "D1889"}, + ], + "Sediment": [], + "Fecal_Coliform": [ + {"Source": "IDEXX", "Method": "COLILERT-18"}, + {"Source": "APHA_SM22ED", "Method": "9222D"}, + {"Source": "APHA", "Method": "9221-E"}, + {"Source": "AOAC", "Method": "978.23"}, + {"Source": "NIOSH", "Method": "600"}, + {"Source": "HACH", "Method": "8001(A2)"}, + {"Source": "HACH", "Method": "8074(A)"}, + {"Source": "APHA", "Method": "9230-D"}, + {"Source": "USEPA", "Method": "1103.1"}, + {"Source": "APHA", "Method": "9222D"}, + {"Source": "APHA", "Method": "9222A"}, + {"Source": "APHA", "Method": "3.2-B"}, + {"Source": "APHA", "Method": "10200-G"}, + {"Source": "APHA", "Method": "9222-E"}, + {"Source": "APHA", "Method": "9221-B"}, + ], + "E_coli": [ + {"Source": "APHA", "Method": "9221A-B-C-F"}, + {"Source": "IDEXX", "Method": "COLILERT/2000"}, + {"Source": "MICROLOGY LABS", "Method": "EASYGEL"}, + {"Source": "IDEXX", "Method": "COLILERT"}, + {"Source": "IDEXX", "Method": "COLISURE"}, + {"Source": "USEPA", "Method": "360.2"}, + {"Source": "APHA_SM22ED", "Method": "9223-B"}, + {"Source": "IDEXX", "Method": "COLILERT-18"}, + {"Source": "IDEXX", "Method": "COLILERT-182000"}, + {"Source": "USEPA", "Method": "130.1"}, + {"Source": "USEPA", "Method": "1103.1 (MODIFIED)"}, + {"Source": "MICROLOGY LABS", "Method": "COLISCAN"}, + {"Source": "APHA", "Method": "9222D"}, + {"Source": "APHA", "Method": "9213-D"}, + {"Source": "HACH", "Method": "10029"}, + {"Source": "APHA", "Method": "9222G"}, + {"Source": "CDC", "Method": "CDC - E. coli and Shigella"}, + {"Source": "CDC", "Method": "E. COLI AND SHIGELLA"}, + {"Source": "USEPA", "Method": "1603"}, + {"Source": "APHA", "Method": "9213D"}, + {"Source": "USEPA", "Method": "1103.1"}, + {"Source": "USEPA", "Method": "1604"}, + {"Source": "APHA", "Method": "9223-B"}, + {"Source": "APHA", "Method": "9223-B-04"}, + {"Source": "APHA", "Method": "9222B,G"}, + {"Source": "USEPA", "Method": "600-R-00-013"}, + {"Source": "APHA", "Method": "9221-F"}, + {"Source": "USDOI/USGS", "Method": "10029"}, + {"Source": "NIOSH", "Method": "1604"}, + {"Source": "APHA", "Method": '"9222B G"'}, + {"Source": "APHA", "Method": "9223B"}, + {"Source": "MODIFIED COLITAG", "Method": "ATP D05-0035"}, + {"Source": "ASTM", "Method": "D5392"}, + {"Source": "HACH", "Method": "10018"}, + {"Source": "USEPA", "Method": "1600"}, + ], + "Phosphorus": [ + {"Source": "APHA", "Method": "3125"}, + {"Source": "APHA", "Method": "4500-P-C"}, + {"Source": "USEPA", "Method": "IO-3.3"}, + {"Source": "USEPA", "Method": "200.7_M"}, + {"Source": "USEPA", "Method": "200.9"}, + {"Source": "USEPA", "Method": "200.7(S)"}, + {"Source": "LACHAT", "Method": "10-115-01-1-F"}, + {"Source": "APHA_SM21ED", "Method": "4500-P-G"}, + {"Source": "USEPA", "Method": "351.3(C)"}, + {"Source": "LACHAT", "Method": "10-115-01-4-B"}, + {"Source": "USEPA", "Method": "365.2"}, + {"Source": "ASA(2ND ED.)", "Method": "24-5.4"}, + {"Source": "USEPA", "Method": "300.1"}, + {"Source": "USEPA", "Method": "365_M"}, + {"Source": "USEPA", "Method": "365.1"}, + {"Source": "APHA", "Method": "4500-NH3(C)"}, + {"Source": "USEPA", "Method": "300"}, + {"Source": "APHA", "Method": "4500-NO2(B)"}, + {"Source": "APHA", "Method": "4500-P-H"}, + {"Source": "USEPA", "Method": "300(A)"}, + {"Source": "USEPA", "Method": "350.1"}, + {"Source": "USEPA", "Method": "200.7(W)"}, + {"Source": "USEPA", "Method": "351.2"}, + {"Source": "USEPA", "Method": "365.3"}, + {"Source": "USDOI/USGS", "Method": "I2600(W)"}, + {"Source": "USDOI/USGS", "Method": "I2601"}, + {"Source": "APHA", "Method": "4500-P B"}, + {"Source": "USEPA", "Method": "6010B"}, + {"Source": "USEPA", "Method": "ICP-AES"}, + {"Source": "USDOI/USGS", "Method": "I-4610-91"}, + {"Source": "APHA", "Method": "3030 E"}, + {"Source": "APHA", "Method": "10200-F"}, + {"Source": "ASTM", "Method": "D3977"}, + {"Source": "USDOI/USGS", "Method": "I-4650-03"}, + {"Source": "USEPA", "Method": "440(S)"}, + {"Source": "USEPA", "Method": "200.8(W)"}, + {"Source": "USDOI/USGS", "Method": "I1602"}, + {"Source": "APHA", "Method": "4500-P-E"}, + {"Source": "USDOI/USGS", "Method": "I-2650-03"}, + {"Source": "APHA", "Method": "4500-NOR(C)"}, + {"Source": "APHA", "Method": "4500-P"}, + {"Source": "ASTM", "Method": "D888(B)"}, + {"Source": "ASTM", "Method": "D515(A)"}, + {"Source": "HACH", "Method": "10210"}, + {"Source": "HACH", "Method": "8190"}, + {"Source": "HACH", "Method": "10242"}, + {"Source": "USDOE/ASD", "Method": "MS100"}, + {"Source": "USEPA", "Method": "6010A"}, + {"Source": "APHA", "Method": "4500-F-E"}, + {"Source": "USEPA", "Method": "200.7"}, + {"Source": "APHA", "Method": "2540-D"}, + {"Source": "APHA", "Method": "4500-P-F"}, + {"Source": "USEPA", "Method": "8321"}, + {"Source": "USEPA", "Method": "200.15"}, + {"Source": "USEPA", "Method": "353.2"}, + {"Source": "USEPA", "Method": "6020A"}, + {"Source": "USDOI/USGS", "Method": "I-1601-85"}, + {"Source": "USEPA", "Method": "200.2"}, + {"Source": "USDOI/USGS", "Method": "I-4600-85"}, + {"Source": "USDOI/USGS", "Method": "I-4607"}, + {"Source": "USDOI/USGS", "Method": "I-4602"}, + {"Source": "APHA (1999)", "Method": "4500-P-E"}, + {"Source": "APHA", "Method": "4500-H"}, + {"Source": "USEPA", "Method": "6010C"}, + {"Source": "USEPA", "Method": "365.4"}, + {"Source": "USDOI/USGS", "Method": "I6600"}, + {"Source": "USEPA", "Method": "200.8"}, + {"Source": "USEPA", "Method": "351.1"}, + {"Source": "HACH", "Method": "10209"}, + {"Source": "USEPA ", "Method": "6020"}, + {"Source": "ASTM", "Method": "D515(B)"}, + {"Source": "USEPA", "Method": "624"}, + {"Source": "APHA", "Method": "2340B"}, + {"Source": "APHA", "Method": "9222B"}, + {"Source": "USEPA", "Method": "440"}, + {"Source": "APHA", "Method": "2540-C"}, + {"Source": "USEPA", "Method": "353.2_M"}, + {"Source": "APHA", "Method": "4500-P-J"}, + {"Source": "APHA", "Method": "9223-B"}, + {"Source": "APHA", "Method": "4500-P-I"}, + {"Source": "USEPA", "Method": "610"}, + {"Source": "APHA", "Method": "4500-N-C"}, + {"Source": "APHA", "Method": "4500-P-D"}, + {"Source": "APHA", "Method": "4500-P E"}, + {"Source": "APHA", "Method": "4500-P F"}, + {"Source": "USDOI/USGS", "Method": "I-2610-91"}, + {"Source": "USDOI/USGS", "Method": "I-2607"}, + {"Source": "USDOI/USGS", "Method": "I-2606"}, + {"Source": "USDOI/USGS", "Method": "I-2601-90"}, + {"Source": "USDOI/USGS", "Method": "I-6600-88"}, + {"Source": "ASTM", "Method": "D515"}, + ], + } \ No newline at end of file diff --git a/harmonize_wq/location.py b/harmonize_wq/location.py index 791735c..bc545a8 100644 --- a/harmonize_wq/location.py +++ b/harmonize_wq/location.py @@ -169,8 +169,8 @@ def harmonize_locations(df_in, out_EPSG=4326, df2['geom_orig'] = list(zip(df2[lon_col], df2[lat_col])) # Create/populate EPSG column - crs_mask = df2[crs_col].isin(xy_datum().keys()) # w/ known datum - df2.loc[crs_mask, 'EPSG'] = [xy_datum()[crs]['EPSG'] for crs + crs_mask = df2[crs_col].isin(xy_datum.keys()) # w/ known datum + df2.loc[crs_mask, 'EPSG'] = [xy_datum[crs]['EPSG'] for crs in df2.loc[crs_mask, crs_col]] # Fix/flag missing diff --git a/harmonize_wq/wq_data.py b/harmonize_wq/wq_data.py index 7488507..4d1cd25 100644 --- a/harmonize_wq/wq_data.py +++ b/harmonize_wq/wq_data.py @@ -134,7 +134,7 @@ def __init__(self, df_in, char_val): self.col.unit_in] self.df = df_out # Deal with values: set out_col = in - self.out_col = domains.out_col_lookup()[char_val] + self.out_col = domains.out_col_lookup[char_val] self._coerce_measure() self.ureg = pint.UnitRegistry() # Add standard unit registry self.units = domains.OUT_UNITS[self.out_col] @@ -463,7 +463,7 @@ def check_basis(self, basis_col='MethodSpecificationName'): # Basis from unit try: - basis_dict = basis.unit_basis_dict(self.out_col) + basis_dict = basis.unit_basis_dict[self.out_col] self.df[c_mask] = basis.basis_from_unit(self.df[c_mask], basis_dict, self.col.unit_out) @@ -473,7 +473,7 @@ def check_basis(self, basis_col='MethodSpecificationName'): col = self.col.basis # Get built-in char_val based on out_col attribute - char_keys, char_vals = zip(*domains.out_col_lookup().items()) + char_keys, char_vals = zip(*domains.out_col_lookup.items()) char_val = list(char_keys)[list(char_vals).index(self.out_col)] self.df.loc[c_mask, col] = self.df.loc[c_mask, col].fillna(char_val) diff --git a/harmonize_wq/wrangle.py b/harmonize_wq/wrangle.py index b84dced..799308f 100644 --- a/harmonize_wq/wrangle.py +++ b/harmonize_wq/wrangle.py @@ -87,13 +87,13 @@ def split_col(df_in, result_col='QA_flag', col_prefix='QA'): char_list = list(set(df_out['CharacteristicName'])) # TODO: try/catch on key error - col_list = [domains.out_col_lookup()[char_name] for char_name in char_list] + col_list = [domains.out_col_lookup[char_name] for char_name in char_list] # TODO: generalize to multi-characteristics other than phosphorus char = 'Phosphorus' if char in char_list: i = char_list.index(char) - suffix = '_' + domains.out_col_lookup()[char] + suffix = '_' + domains.out_col_lookup[char] col_list[i] = [col for col in df_out.columns if col.endswith(suffix)] # Drop rows where result na @@ -754,7 +754,7 @@ def to_simple_shape(gdf, out_shp): >>> wrangle.to_simple_shape(gdf, 'dataframe.shp') """ cols = gdf.columns # List of current column names - names_dict = domains.stations_rename() # Dict of column names to update + names_dict = domains.stations_rename # Dict of column names to update # Rename non-results columns that are too long for shp field names renaming_list = [col for col in cols if col in names_dict] renaming_dict = {old_col: names_dict[old_col] for old_col in renaming_list} @@ -766,7 +766,7 @@ def to_simple_shape(gdf, out_shp): # Results columns need to be str not pint (.astype(str)) # Narrow based on out_col lookup dictionary - results_cols = [col for col in possible_results if col in domains.out_col_lookup().values()] + results_cols = [col for col in possible_results if col in domains.out_col_lookup.values()] # TODO: check based on suffix: e.g. Phosphorus # Rename each column w/ units and write results as str for col in results_cols: diff --git a/pyproject.toml b/pyproject.toml index 6f5b0f6..0af85cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,9 @@ dependencies = [ "geopandas", "pint", "dataretrieval", + "pandas", + "numpy", + "requests", ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index b017201..6755148 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,5 @@ pint>=0.18 # May work on earlier versions but this is what it was teste dataretrieval>=1.*, <1.0.5 # 1.0.5 will break datetime handling, <1.0 doesn't have required functions descartes>=1.1.0 # May be required for mapping in some jupyter notebooks mapclassify>=2.4.0 # May be required for mapping in some jupyter notebooks +requests +numpy