From 9f095258062d5ee5c8db243b3cf24d29ae9afdce Mon Sep 17 00:00:00 2001 From: Elise Hinman <121896266+ehinman@users.noreply.github.com> Date: Tue, 17 Sep 2024 16:38:48 -0500 Subject: [PATCH] First crack at changing the url for one function (#152) * Add basic support for WQX3 profiles --------- Co-authored-by: thodson Co-authored-by: Timothy Hodson <34148978+thodson-usgs@users.noreply.github.com> --- dataretrieval/wqp.py | 489 ++++++++++++++++++++++++------------ tests/data/wqp3_results.txt | 6 + tests/wqp_test.py | 84 ++++--- 3 files changed, 382 insertions(+), 197 deletions(-) create mode 100644 tests/data/wqp3_results.txt diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index cad5e736..b006a0ec 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -8,6 +8,9 @@ - implement other services like Organization, Activity, etc. """ +from __future__ import annotations +from typing import TYPE_CHECKING + import warnings from io import StringIO @@ -15,64 +18,90 @@ from .utils import BaseMetadata, query - -def get_results(ssl_check=True, **kwargs): +if TYPE_CHECKING: + from pandas import DataFrame + + +result_profiles_wqx3 = ['basicPhysChem', 'fullPhysChem', 'narrow'] +result_profiles_legacy = ['resultPhysChem', 'biological', 'narrowResult'] +activity_profiles_legacy = ['activityAll'] +services_wqx3 = ['Activity', 'Result', 'Station'] +services_legacy = [ + 'Activity', + 'ActivityMetric', + 'BiologicalMetric', + 'Organization', + 'Project', + 'ProjectMonitoringLocationWeighting', + 'Result', + 'ResultDetectionQuantitationLimit', + 'Station', + ] + + +def get_results( + ssl_check=True, + legacy=True, + **kwargs, +) -> tuple[DataFrame, WQP_Metadata]: """Query the WQP for results. Any WQP API parameter can be passed as a keyword argument to this function. More information about the API can be found at: https://www.waterqualitydata.us/#advanced=true + or the beta version of the WQX3.0 API at: + https://www.waterqualitydata.us/beta/#mimeType=csv&providers=NWIS&providers=STORET or the Swagger documentation at: https://www.waterqualitydata.us/data/swagger-ui/index.html?docExpansion=none&url=/data/v3/api-docs#/ Parameters ---------- - ssl_check: bool - If True, check the SSL certificate. Default is True. If False, SSL - certificate is not checked. - siteid: string - Concatenate an agency code, a hyphen ("-"), and a site-identification - number. - statecode: string - Concatenate 'US', a colon (":"), and a FIPS numeric code - (Example: Illinois is US:17) - countycode: string - A FIPS county code - huc: string - One or more eight-digit hydrologic units, delimited by semicolons. - bBox: string - Bounding box (Example: bBox=-92.8,44.2,-88.9,46.0) - lat: string - Latitude for radial search, expressed in decimal degrees, WGS84 - long: string - Longitude for radial search - within: string - Distance for a radial search, expressed in decimal miles - pCode: string - One or more five-digit USGS parameter codes, separated by semicolons. + ssl_check : bool, optional + Check the SSL certificate. + legacy : bool, optional + Return the legacy WQX data profile. Default is True. + dataProfile : string, optional + Specifies the data fields returned by the query. + WQX3.0 profiles include 'fullPhysChem', 'narrow', and 'basicPhysChem'. + Legacy profiles include 'resultPhysChem','biological', and + 'narrowResult'. Default is 'fullPhysChem'. + siteid : string + Monitoring location identified by agency code, a hyphen, and + identification number (Example: "USGS-05586100"). + statecode : string + US state FIPS code (Example: Illinois is "US:17"). + countycode : string + US county FIPS code. + huc : string + Eight-digit hydrologic unit (HUC), delimited by semicolons. + bBox : string + Search bounding box (Example: bBox=-92.8,44.2,-88.9,46.0) + lat : string + Radial-search central latitude in WGS84 decimal degrees. + long : string + Radial-search central longitude in WGS84 decimal degrees. + within : string + Radial-search distance in decimal miles. + pCode : string + Five-digit USGS parameter code, delimited by semicolons. NWIS only. - startDateLo: string + startDateLo : string Date of earliest desired data-collection activity, expressed as 'MM-DD-YYYY' - startDateHi: string + startDateHi : string Date of last desired data-collection activity, expressed as 'MM-DD-YYYY' - characteristicName: string + characteristicName : string One or more case-sensitive characteristic names, separated by - semicolons. (See https://www.waterqualitydata.us/public_srsnames/ - for available characteristic names) - mimeType: string - String specifying the output format which is 'csv' by default but can - be 'geojson' - zip: string - Parameter to stream compressed data, if 'yes', or uncompressed data - if 'no'. Default is 'no'. + semicolons (https://www.waterqualitydata.us/public_srsnames/). + mimeType : string + Output format. Only 'csv' is supported at this time. Returns ------- - df: ``pandas.DataFrame`` + df : ``pandas.DataFrame`` Formatted data returned from the API query. - md: :obj:`dataretrieval.utils.Metadata` + md : :obj:`dataretrieval.utils.Metadata` Custom ``dataretrieval`` metadata object pertaining to the query. Examples @@ -87,38 +116,74 @@ def get_results(ssl_check=True, **kwargs): >>> # Get results within a bounding box >>> df, md = dataretrieval.wqp.get_results(bBox='-92.8,44.2,-88.9,46.0') + >>> # Get results using a new WQX3.0 profile + >>> df, md = dataretrieval.wqp.get_results( + ... legacy=False, siteid='UTAHDWQ_WQX-4993795', dataProfile='narrow' + ... ) + """ - _warn_v3_profiles_outage() - kwargs = _alter_kwargs(kwargs) - response = query(wqp_url('Result'), kwargs, delimiter=';', ssl_check=ssl_check) + kwargs = _check_kwargs(kwargs) - df = pd.read_csv(StringIO(response.text), delimiter=',') + if legacy is True: + if "dataProfile" in kwargs: + if kwargs["dataProfile"] not in result_profiles_legacy: + raise TypeError( + f"dataProfile {kwargs['dataProfile']} is not a legacy profile.", + f"Valid options are {result_profiles_legacy}.", + ) + + url = wqp_url("Result") + + else: + if 'dataProfile' in kwargs: + if kwargs['dataProfile'] not in result_profiles_wqx3: + raise TypeError( + f"dataProfile {kwargs['dataProfile']} is not a valid WQX3.0" + f"profile. Valid options are {result_profiles_wqx3}.", + ) + else: + kwargs["dataProfile"] = "fullPhysChem" + + url = wqx3_url('Result') + + response = query(url, kwargs, delimiter=";", ssl_check=ssl_check) + + df = pd.read_csv(StringIO(response.text), delimiter=",") return df, WQP_Metadata(response) -def what_sites(ssl_check=True, **kwargs): +def what_sites( + ssl_check=True, + legacy=True, + **kwargs, +) -> tuple[DataFrame, WQP_Metadata]: """Search WQP for sites within a region with specific data. Any WQP API parameter can be passed as a keyword argument to this function. More information about the API can be found at: https://www.waterqualitydata.us/#advanced=true + or the beta version of the WQX3.0 API at: + https://www.waterqualitydata.us/beta/#mimeType=csv&providers=NWIS&providers=STORET or the Swagger documentation at: https://www.waterqualitydata.us/data/swagger-ui/index.html?docExpansion=none&url=/data/v3/api-docs#/ Parameters ---------- - ssl_check: bool - If True, check the SSL certificate. Default is True. If False, SSL - certificate is not checked. - **kwargs: optional + ssl_check : bool, optional + Check the SSL certificate. Default is True. + legacy : bool, optional + If True, returns the legacy WQX data profile and warns the user of + the issues associated with it. If False, returns the new WQX3.0 + profile, if available. Defaults to True. + **kwargs : optional Accepts the same parameters as :obj:`dataretrieval.wqp.get_results` Returns ------- - df: ``pandas.DataFrame`` + df : ``pandas.DataFrame`` Formatted data returned from the API query. - md: :obj:`dataretrieval.utils.Metadata` + md : :obj:`dataretrieval.utils.Metadata` Custom metadata object pertaining to the query. Examples @@ -131,11 +196,14 @@ def what_sites(ssl_check=True, **kwargs): ... ) """ - _warn_v3_profiles_outage() - kwargs = _alter_kwargs(kwargs) + kwargs = _check_kwargs(kwargs) + + if legacy is True: + url = wqp_url('Station') + else: + url = wqx3_url('Station') - url = wqp_url('Station') response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) df = pd.read_csv(StringIO(response.text), delimiter=',') @@ -143,28 +211,35 @@ def what_sites(ssl_check=True, **kwargs): return df, WQP_Metadata(response) -def what_organizations(ssl_check=True, **kwargs): +def what_organizations( + ssl_check=True, + legacy=True, + **kwargs, +) -> tuple[DataFrame, WQP_Metadata]: """Search WQP for organizations within a region with specific data. Any WQP API parameter can be passed as a keyword argument to this function. More information about the API can be found at: https://www.waterqualitydata.us/#advanced=true + or the beta version of the WQX3.0 API at: + https://www.waterqualitydata.us/beta/#mimeType=csv&providers=NWIS&providers=STORET or the Swagger documentation at: https://www.waterqualitydata.us/data/swagger-ui/index.html?docExpansion=none&url=/data/v3/api-docs#/ Parameters ---------- - ssl_check: bool - If True, check the SSL certificate. Default is True. If False, SSL - certificate is not checked. - **kwargs: optional + ssl_check : bool, optional + Check the SSL certificate. Default is True. + legacy : bool, optional + Return the legacy WQX data profile. Default is True. + **kwargs : optional Accepts the same parameters as :obj:`dataretrieval.wqp.get_results` Returns ------- - df: ``pandas.DataFrame`` + df : ``pandas.DataFrame`` Formatted data returned from the API query. - md: :obj:`dataretrieval.utils.Metadata` + md : :obj:`dataretrieval.utils.Metadata` Custom metadata object pertaining to the query. Examples @@ -175,11 +250,15 @@ def what_organizations(ssl_check=True, **kwargs): >>> df, md = dataretrieval.wqp.what_organizations() """ - _warn_v3_profiles_outage() - kwargs = _alter_kwargs(kwargs) + kwargs = _check_kwargs(kwargs) + + if legacy is True: + url = wqp_url('Organization') + else: + print('WQX3.0 profile not available, returning legacy profile.') + url = wqp_url('Organization') - url = wqp_url('Organization') response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) df = pd.read_csv(StringIO(response.text), delimiter=',') @@ -187,28 +266,31 @@ def what_organizations(ssl_check=True, **kwargs): return df, WQP_Metadata(response) -def what_projects(ssl_check=True, **kwargs): +def what_projects(ssl_check=True, legacy=True, **kwargs): """Search WQP for projects within a region with specific data. Any WQP API parameter can be passed as a keyword argument to this function. More information about the API can be found at: https://www.waterqualitydata.us/#advanced=true + or the beta version of the WQX3.0 API at: + https://www.waterqualitydata.us/beta/#mimeType=csv&providers=NWIS&providers=STORET or the Swagger documentation at: https://www.waterqualitydata.us/data/swagger-ui/index.html?docExpansion=none&url=/data/v3/api-docs#/ Parameters ---------- - ssl_check: bool - If True, check the SSL certificate. Default is True. If False, SSL - certificate is not checked. - **kwargs: optional + ssl_check : bool, optional + Check the SSL certificate. Default is True. + legacy : bool, optional + Return the legacy WQX data profile. Default is True. + **kwargs : optional Accepts the same parameters as :obj:`dataretrieval.wqp.get_results` Returns ------- - df: ``pandas.DataFrame`` + df : ``pandas.DataFrame`` Formatted data returned from the API query. - md: :obj:`dataretrieval.utils.Metadata` + md : :obj:`dataretrieval.utils.Metadata` Custom metadata object pertaining to the query. Examples @@ -219,11 +301,15 @@ def what_projects(ssl_check=True, **kwargs): >>> df, md = dataretrieval.wqp.what_projects(huc='19') """ - _warn_v3_profiles_outage() - kwargs = _alter_kwargs(kwargs) + kwargs = _check_kwargs(kwargs) + + if legacy is True: + url = wqp_url('Project') + else: + print('WQX3.0 profile not available, returning legacy profile.') + url = wqp_url('Project') - url = wqp_url('Project') response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) df = pd.read_csv(StringIO(response.text), delimiter=',') @@ -231,28 +317,35 @@ def what_projects(ssl_check=True, **kwargs): return df, WQP_Metadata(response) -def what_activities(ssl_check=True, **kwargs): +def what_activities( + ssl_check=True, + legacy=True, + **kwargs, +) -> tuple[DataFrame, WQP_Metadata]: """Search WQP for activities within a region with specific data. Any WQP API parameter can be passed as a keyword argument to this function. More information about the API can be found at: https://www.waterqualitydata.us/#advanced=true + or the beta version of the WQX3.0 API at: + https://www.waterqualitydata.us/beta/#mimeType=csv&providers=NWIS&providers=STORET or the Swagger documentation at: https://www.waterqualitydata.us/data/swagger-ui/index.html?docExpansion=none&url=/data/v3/api-docs#/ Parameters ---------- - ssl_check: bool - If True, check the SSL certificate. Default is True. If False, SSL - certificate is not checked. - **kwargs: optional + ssl_check : bool, optional + Check the SSL certificate. Default is True. + legacy : bool, optional + Return the legacy WQX data profile. Default is True. + **kwargs : optional Accepts the same parameters as :obj:`dataretrieval.wqp.get_results` Returns ------- - df: ``pandas.DataFrame`` + df : ``pandas.DataFrame`` Formatted data returned from the API query. - md: :obj:`dataretrieval.utils.Metadata` + md : :obj:`dataretrieval.utils.Metadata` Custom metadata object pertaining to the query. Examples @@ -265,42 +358,60 @@ def what_activities(ssl_check=True, **kwargs): ... statecode='US:11', startDateLo='12-30-2019', startDateHi='01-01-2020' ... ) + >>> # Get activities within Washington D.C. + >>> # using the WQX3.0 profile during a specific time period + >>> df, md = dataretrieval.wqp.what_activities( + ... legacy=False, + ... statecode='US:11', + ... startDateLo='12-30-2019', + ... startDateHi='01-01-2020' + ... ) """ - _warn_v3_profiles_outage() - kwargs = _alter_kwargs(kwargs) + kwargs = _check_kwargs(kwargs) - url = wqp_url('Activity') - response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) + if legacy is True: + url = wqp_url("Activity") + else: + url = wqx3_url("Activity") - df = pd.read_csv(StringIO(response.text), delimiter=',') + response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) + + df = pd.read_csv(StringIO(response.text), delimiter=",") return df, WQP_Metadata(response) -def what_detection_limits(ssl_check=True, **kwargs): +def what_detection_limits( + ssl_check=True, + legacy=True, + **kwargs, +) -> tuple[DataFrame, WQP_Metadata]: """Search WQP for result detection limits within a region with specific data. Any WQP API parameter can be passed as a keyword argument to this function. More information about the API can be found at: https://www.waterqualitydata.us/#advanced=true + or the beta version of the WQX3.0 API at: + https://www.waterqualitydata.us/beta/#mimeType=csv&providers=NWIS&providers=STORET or the Swagger documentation at: https://www.waterqualitydata.us/data/swagger-ui/index.html?docExpansion=none&url=/data/v3/api-docs#/ Parameters ---------- - ssl_check: bool - If True, check the SSL certificate. Default is True. If False, SSL - certificate is not checked. - **kwargs: optional + ssl_check : bool + Check the SSL certificate. Default is True. + legacy : bool + Return the legacy WQX data profile. Default is True. + **kwargs : optional Accepts the same parameters as :obj:`dataretrieval.wqp.get_results` Returns ------- - df: ``pandas.DataFrame`` + df : ``pandas.DataFrame`` Formatted data returned from the API query. - md: :obj:`dataretrieval.utils.Metadata` + md : :obj:`dataretrieval.utils.Metadata` Custom metadata object pertaining to the query. Examples @@ -317,11 +428,15 @@ def what_detection_limits(ssl_check=True, **kwargs): ... ) """ - _warn_v3_profiles_outage() - kwargs = _alter_kwargs(kwargs) + kwargs = _check_kwargs(kwargs) + + if legacy is True: + url = wqp_url('ResultDetectionQuantitationLimit') + else: + print('WQX3.0 profile not available, returning legacy profile.') + url = wqp_url('ResultDetectionQuantitationLimit') - url = wqp_url('ResultDetectionQuantitationLimit') response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) df = pd.read_csv(StringIO(response.text), delimiter=',') @@ -329,28 +444,35 @@ def what_detection_limits(ssl_check=True, **kwargs): return df, WQP_Metadata(response) -def what_habitat_metrics(ssl_check=True, **kwargs): +def what_habitat_metrics( + ssl_check=True, + legacy=True, + **kwargs, +) -> tuple[DataFrame, WQP_Metadata]: """Search WQP for habitat metrics within a region with specific data. Any WQP API parameter can be passed as a keyword argument to this function. More information about the API can be found at: https://www.waterqualitydata.us/#advanced=true + or the beta version of the WQX3.0 API at: + https://www.waterqualitydata.us/beta/#mimeType=csv&providers=NWIS&providers=STORET or the Swagger documentation at: https://www.waterqualitydata.us/data/swagger-ui/index.html?docExpansion=none&url=/data/v3/api-docs#/ Parameters ---------- - ssl_check: bool - If True, check the SSL certificate. Default is True. If False, SSL - certificate is not checked. - **kwargs: optional + ssl_check : bool + Check the SSL certificate. Default is True. + legacy : bool + Return the legacy WQX data profile. Default is True. + **kwargs : optional Accepts the same parameters as :obj:`dataretrieval.wqp.get_results` Returns ------- - df: ``pandas.DataFrame`` + df : ``pandas.DataFrame`` Formatted data returned from the API query. - md: :obj:`dataretrieval.utils.Metadata` + md : :obj:`dataretrieval.utils.Metadata` Custom metadata object pertaining to the query. Examples @@ -361,11 +483,15 @@ def what_habitat_metrics(ssl_check=True, **kwargs): >>> df, md = dataretrieval.wqp.what_habitat_metrics(statecode='US:44') """ - _warn_v3_profiles_outage() - kwargs = _alter_kwargs(kwargs) + kwargs = _check_kwargs(kwargs) + + if legacy is True: + url = wqp_url('BiologicalMetric') + else: + print('WQX3.0 profile not available, returning legacy profile.') + url = wqp_url('BiologicalMetric') - url = wqp_url('BiologicalMetric') response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) df = pd.read_csv(StringIO(response.text), delimiter=',') @@ -373,28 +499,31 @@ def what_habitat_metrics(ssl_check=True, **kwargs): return df, WQP_Metadata(response) -def what_project_weights(ssl_check=True, **kwargs): +def what_project_weights(ssl_check=True, legacy=True, **kwargs): """Search WQP for project weights within a region with specific data. Any WQP API parameter can be passed as a keyword argument to this function. More information about the API can be found at: https://www.waterqualitydata.us/#advanced=true + or the beta version of the WQX3.0 API at: + https://www.waterqualitydata.us/beta/#mimeType=csv&providers=NWIS&providers=STORET or the Swagger documentation at: https://www.waterqualitydata.us/data/swagger-ui/index.html?docExpansion=none&url=/data/v3/api-docs#/ Parameters ---------- - ssl_check: bool - If True, check the SSL certificate. Default is True. If False, SSL - certificate is not checked. - **kwargs: optional + ssl_check : bool + Check the SSL certificate. Default is True. + legacy : bool + Retrun the legacy WQX data profile. Default is True. + **kwargs : optional Accepts the same parameters as :obj:`dataretrieval.wqp.get_results` Returns ------- - df: ``pandas.DataFrame`` + df : ``pandas.DataFrame`` Formatted data returned from the API query. - md: :obj:`dataretrieval.utils.Metadata` + md : :obj:`dataretrieval.utils.Metadata` Custom metadata object pertaining to the query. Examples @@ -408,11 +537,15 @@ def what_project_weights(ssl_check=True, **kwargs): ... ) """ - _warn_v3_profiles_outage() - kwargs = _alter_kwargs(kwargs) + kwargs = _check_kwargs(kwargs) + + if legacy is True: + url = wqp_url('ProjectMonitoringLocationWeighting') + else: + print('WQX3.0 profile not available, returning legacy profile.') + url = wqp_url('ProjectMonitoringLocationWeighting') - url = wqp_url('ProjectMonitoringLocationWeighting') response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) df = pd.read_csv(StringIO(response.text), delimiter=',') @@ -420,28 +553,31 @@ def what_project_weights(ssl_check=True, **kwargs): return df, WQP_Metadata(response) -def what_activity_metrics(ssl_check=True, **kwargs): +def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): """Search WQP for activity metrics within a region with specific data. Any WQP API parameter can be passed as a keyword argument to this function. More information about the API can be found at: https://www.waterqualitydata.us/#advanced=true + or the beta version of the WQX3.0 API at: + https://www.waterqualitydata.us/beta/#mimeType=csv&providers=NWIS&providers=STORET or the Swagger documentation at: https://www.waterqualitydata.us/data/swagger-ui/index.html?docExpansion=none&url=/data/v3/api-docs#/ Parameters ---------- - ssl_check: bool - If True, check the SSL certificate. Default is True. If False, SSL - certificate is not checked. - **kwargs: optional + ssl_check : bool + Check the SSL certificate. Default is True. + legacy : bool + Return the legacy WQX data profile. Default is True. + **kwargs : optional Accepts the same parameters as :obj:`dataretrieval.wqp.get_results` Returns ------- - df: ``pandas.DataFrame`` + df : ``pandas.DataFrame`` Formatted data returned from the API query. - md: :obj:`dataretrieval.utils.Metadata` + md : :obj:`dataretrieval.utils.Metadata` Custom metadata object pertaining to the query. Examples @@ -455,11 +591,15 @@ def what_activity_metrics(ssl_check=True, **kwargs): ... ) """ - _warn_v3_profiles_outage() - kwargs = _alter_kwargs(kwargs) + kwargs = _check_kwargs(kwargs) + + if legacy is True: + url = wqp_url('ActivityMetric') + else: + print('WQX3.0 profile not available, returning legacy profile.') + url = wqp_url('ActivityMetric') - url = wqp_url('ActivityMetric') response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) df = pd.read_csv(StringIO(response.text), delimiter=',') @@ -469,10 +609,34 @@ def what_activity_metrics(ssl_check=True, **kwargs): def wqp_url(service): """Construct the WQP URL for a given service.""" + base_url = 'https://www.waterqualitydata.us/data/' + _warn_legacy_use() + + if service not in services_legacy: + raise TypeError( + 'Legacy service not recognized. Valid options are', + f'{services_legacy}.', + ) + return f'{base_url}{service}/Search?' +def wqx3_url(service): + """Construct the WQP URL for a given WQX 3.0 service.""" + + base_url = 'https://www.waterqualitydata.us/wqx3/' + _warn_wqx3_use() + + if service not in services_wqx3: + raise TypeError( + 'WQX3.0 service not recognized. Valid options are', + f'{services_wqx3}.', + ) + + return f'{base_url}{service}/search?' + + class WQP_Metadata(BaseMetadata): """Metadata class for WQP service, derived from BaseMetadata. @@ -480,13 +644,13 @@ class WQP_Metadata(BaseMetadata): ---------- url : str Response url - query_time: datetme.timedelta + query_time : datetme.timedelta Response elapsed time - header: requests.structures.CaseInsensitiveDict + header : requests.structures.CaseInsensitiveDict Response headers - comments: None + comments : None Metadata comments. WQP does not return comments. - site_info: tuple[pd.DataFrame, NWIS_Metadata] | None + site_info : tuple[pd.DataFrame, NWIS_Metadata] | None Site information if the query included `sites`, `site` or `site_no`. """ @@ -496,15 +660,15 @@ def __init__(self, response, **parameters) -> None: Parameters ---------- - response: Response + response : Response Response object from requests module - parameters: unpacked dictionary + parameters : dict Unpacked dictionary of the parameters supplied in the request Returns ------- - md: :obj:`dataretrieval.wqp.WQP_Metadata` + md : :obj:`dataretrieval.wqp.WQP_Metadata` A ``dataretrieval`` custom :obj:`dataretrieval.wqp.WQP_Metadata` object. """ @@ -515,45 +679,42 @@ def __init__(self, response, **parameters) -> None: @property def site_info(self): - if 'sites' in self._parameters: - return what_sites(sites=parameters['sites']) - elif 'site' in self._parameters: - return what_sites(sites=parameters['site']) - elif 'site_no' in self._parameters: - return what_sites(sites=parameters['site_no']) - - -def _alter_kwargs(kwargs): - """Private function to manipulate **kwargs. + if "sites" in self._parameters: + return what_sites(sites=parameters["sites"]) + elif "site" in self._parameters: + return what_sites(sites=parameters["site"]) + elif "site_no" in self._parameters: + return what_sites(sites=parameters["site_no"]) - Not all query parameters are currently supported by ``dataretrieval``, - so this function is used to set some of them and raise warnings to the - user so they are aware of which are being hard-set. +def _check_kwargs(kwargs): + """Private function to check kwargs for unsupported parameters. """ - if kwargs.get('zip', 'no') == 'yes': - warnings.warn('Compressed data not yet supported, zip set to no.') - kwargs['zip'] = 'no' - - if kwargs.get('mimeType', 'csv') == 'geojson': - warnings.warn('GeoJSON not yet supported, mimeType set to csv.') - kwargs['mimeType'] = 'csv' + mimetype = kwargs.get("mimeType") + if mimetype == "geojson": + raise NotImplementedError("GeoJSON not yet supported. Set 'mimeType=csv'.") + elif mimetype != "csv" and mimetype is not None: + raise ValueError("Invalid mimeType. Set 'mimeType=csv'.") + else: + kwargs["mimeType"] = "csv" return kwargs -def _warn_v3_profiles_outage(): - """Private function for warning message about WQX 3.0 profiles - """ +def _warn_wqx3_use(): + message = ( + "Support for the WQX3.0 profiles is experimental. " + "Queries may be slow or fail intermitttently." + ) + warnings.warn(message, UserWarning) + - warnings.warn( - 'USGS discrete water quality data availability ' - 'and format are changing. Beginning in March 2024 ' - 'the data obtained from legacy profiles will not ' - 'include new USGS data or recent updates to existing ' - 'data. To view the status of changes in data ' - 'availability and code functionality, visit: ' - 'https://doi-usgs.github.io/dataRetrieval/articles/Status.html. ' - 'If you have additional questions about these changes, ' - 'email CompTools@usgs.gov.' +def _warn_legacy_use(): + message = ( + "This function call will return the legacy WQX format, " + "which means USGS data have not been updated since March 2024. " + "Please review the dataretrieval-python documentation for more " + "information on updated WQX3.0 profiles. Setting `legacy=False` " + "will remove this warning." ) + warnings.warn(message, DeprecationWarning) diff --git a/tests/data/wqp3_results.txt b/tests/data/wqp3_results.txt new file mode 100644 index 00000000..634144ca --- /dev/null +++ b/tests/data/wqp3_results.txt @@ -0,0 +1,6 @@ +Org_Identifier,Org_FormalName,Project_Identifier,Project_Name,Project_QAPPApproved,Project_QAPPApprovalAgency,ProjectAttachment_FileName,ProjectAttachment_FileType,Location_Identifier,Location_Name,Location_Type,Location_Description,Location_State,Location_CountryName,Location_CountyName,Location_CountryCode,Location_StatePostalCode,Location_CountyCode,Location_HUCEightDigitCode,Location_HUCTwelveDigitCode,Location_TribalLandIndicator,Location_TribalLand,Location_Latitude,Location_Longitude,Location_HorzCoordReferenceSystemDatum,Location_LatitudeStandardized,Location_LongitudeStandardized,Location_HorzCoordStandardizedDatum,AlternateLocation_IdentifierCount,Activity_ActivityIdentifier,Activity_ActivityIdentifierUserSupplied,Activity_TypeCode,Activity_Media,Activity_MediaSubdivisionName,Activity_BottomDepthSamplingComponent,ActivityBiological_AssemblageSampled,ActivityBiological_ToxicityTestType,Activity_ConductingOrganization,Activity_Comment,ActivityLocation_Latitude,ActivityLocation_Longitude,ActivityLocation_HorzCoordReferenceSystemDatum,ActivityLocation_SourceMapScale,ActivityLocation_LatitudeStandardized,ActivityLocation_LongitudeStandardized,ActivityLocation_HorzCoordStandardizedDatum,ActivityLocation_HorzAccuracyMeasure,ActivityLocation_HorzAccuracyMeasureUnit,ActivityLocation_HorizontalAccuracyHorzCollectionMethod,ActivityLocation_Description,Activity_StartDate,Activity_StartTime,Activity_StartTimeZone,Activity_EndDate,Activity_EndTime,Activity_EndTimeZone,Activity_DepthHeightMeasure,Activity_DepthHeightMeasureUnit,Activity_BottomDepthAltitudeReferencePoint,Activity_ActivityRelativeDepth,Activity_TopDepthMeasure,Activity_TopDepthMeasureUnit,Activity_BottomDepthMeasure,Activity_BottomDepthMeasureUnit,SampleCollectionMethod_Identifier,SampleCollectionMethod_IdentifierContext,SampleCollectionMethod_Name,SampleCollectionMethod_QualifierTypeName,SampleCollectionMethod_Description,SampleCollectionMethod_EquipmentName,SampleCollectionMethod_EquipmentComment,SamplePrepMethod_Identifier,SamplePrepMethod_IdentifierContext,SamplePrepMethod_Name,SamplePrepMethod_QualifierType,SamplePrepMethod_Description,SamplePrepMethod_ContainerLabel,SamplePrepMethod_ContainerType,SamplePrepMethod_ContainerColor,SamplePrepMethod_ChemicalPreservativeUsed,SamplePrepMethod_ThermalPreservativeUsed,SamplePrepMethod_TransportStorageDescription,Activity_HydrologicCondition,Activity_HydrologicEvent,ActivityAttachment_FileName,ActivityAttachment_FileType,ActivityAttachment_FileDownload,Result_DataLoggerLine,Result_ResultDetectionCondition,Result_Characteristic,Result_CharacteristicUserSupplied,Result_CASNumber,Result_MethodSpeciation,Result_SampleFraction,ResultBiological_Intent,ResultBiological_IndividualIdentifier,ResultBiological_Taxon,ResultBiological_TaxonUserSupplied,ResultBiological_TaxonUserSuppliedReference,ResultBiological_UnidentifiedSpeciesIdentifier,ResultBiological_SampleTissueAnatomy,ResultBiological_GroupSummaryCount,GroupSummaryWeight_Measure,GroupSummaryWeightMeasure_Unit,ResultDepthHeight_Measure,ResultDepthHeight_MeasureUnit,ResultDepthHeight_AltitudeReferencePoint,ResultDepthHeight_SamplingPointName,ResultDepthHeight_SamplingPointType,ResultDepthHeight_SamplingPointPlaceInSeries,ResultDepthHeight_SamplingPointComment,ResultDepthHeight_RecordIdentifierUserSupplied,Result_MeasureIdentifier,Result_Measure,Result_MeasureUnit,Result_MeasureQualifierCode,Result_MeasureStatusIdentifier,Result_StatisticalBase,Result_StatisticalNValue,Result_MeasureType,Result_WeightBasis,Result_TimeBasis,Result_MeasureTemperatureBasis,Result_MeasureParticleSizeBasis,DataQuality_PrecisionValue,DataQuality_BiasValue,DataQuality_ConfidenceIntervalValue,DataQuality_UpperConfidenceLimitValue,DataQuality_LowerConfidenceLimitValue,DataQuality_ResultComment,DetectionLimit_TypeA,DetectionLimit_MeasureA,DetectionLimit_MeasureUnitA,DetectionLimit_CommentA,DetectionLimit_TypeB,DetectionLimit_MeasureB,DetectionLimit_MeasureUnitB,DetectionLimit_CommentB,LabInfo_LabSampleSplitRatio,LabInfo_LabAccreditationIndicator,LabInfo_LabAccreditationAuthority,LabInfo_TaxonAccreditationIndicator,LabInfo_TaxonAccreditationAuthority,ResultAnalyticalMethod_Identifier,ResultAnalyticalMethod_IdentifierContext,ResultAnalyticalMethod_Name,ResultAnalyticalMethod_QualifierType,ResultAnalyticalMethod_Description,Result_ComparableMethodIdentifier,Result_ComparableMethodIdentifierContext,Result_ComparableMethodModification,LabInfo_Name,LabInfo_AnalysisStartDate,LabInfo_AnalysisStartTime,LabInfo_AnalysisStartTimeZone,LabInfo_AnalysisEndDate,LabInfo_AnalysisEndTime,LabInfo_AnalysisEndTimeZone,LabInfo_LaboratoryComment,LabSamplePrepMethod_Identifier,LabSamplePrepMethod_IdentifierContext,LabSamplePrepMethod_Name,LabSamplePrepMethod_QualifierType,LabSamplePrepMethod_Description,LabSamplePrepMethod_StartDate,LabSamplePrepMethod_StartTime,LabSamplePrepMethod_StartTimeZone,LabSamplePrepMethod_EndDate,LabSamplePrepMethod_EndTime,LabSamplePrepMethod_EndTimeZone,LabSamplePrepMethod_DilutionFactor,ResultAttachment_FileName,ResultAttachment_FileType,ResultAttachment_FileDownload,ProviderName,Result_CharacteristicComparable,Result_CharacteristicGroup,Org_Type,LastChangeDate,USGSpcode +WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-49176537,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-08-08,13:55:00,CDT,2011-08-08,14:05:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777847,471,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018, +WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-47619240,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-07-06,08:35:00,CDT,2011-07-06,08:45:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777841,860,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018, +WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-45822640,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-05-09,12:20:00,CDT,2011-05-09,12:30:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777835,1000,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018, +WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-46495059,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-06-05,14:45:00,CDT,2011-06-05,14:55:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777838,800,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018, +WIDNR_WQX,Wisconsin Department of Natural Resources,"[""CBSM_URSS_Madison""]","[""Urban Road Salt Study - Madison Sites""]",,,,,WIDNR_WQX-10032762,North Branch Pheasant Branch downstream of pond at Deming Way,River/Stream,,Wisconsin,United States,Dane,US,WI,25,7090002,,,,43.102665,-89.51866,NAD83,43.102665,-89.51866,NAD83,0,WIDNR_WQX-50689894,,Field Msr/Obs,Water,,,,,WIDNR_WQX,,43.102665,-89.51866,NAD83,,43.102665,-89.51866,NAD83,,,Interpolation-Other,,2011-09-11,16:10:00,CDT,2011-09-11,16:20:00,CDT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Specific conductance,,,,,,,,,,,,,,,,,,,,,,,STORET-113777850,750,uS/cm,,Final,,,Actual,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STORET,,Physical,*State Government US,Fri Jun 15 09:44:33 GMT 2018, diff --git a/tests/wqp_test.py b/tests/wqp_test.py index a4690183..affb0966 100755 --- a/tests/wqp_test.py +++ b/tests/wqp_test.py @@ -4,18 +4,25 @@ from pandas import DataFrame -from dataretrieval.wqp import (get_results, what_sites, what_organizations, - what_projects, what_activities, - what_detection_limits, what_habitat_metrics, - what_project_weights, what_activity_metrics, - _alter_kwargs) - - -def test_get_ratings(requests_mock): +from dataretrieval.wqp import ( + get_results, + what_sites, + what_organizations, + what_projects, + what_activities, + what_detection_limits, + what_habitat_metrics, + what_project_weights, + what_activity_metrics, + _check_kwargs, +) + + +def test_get_results(requests_mock): """Tests water quality portal ratings query""" request_url = "https://www.waterqualitydata.us/data/Result/Search?siteid=WIDNR_WQX-10032762" \ "&characteristicName=Specific+conductance&startDateLo=05-01-2011&startDateHi=09-30-2011" \ - "&zip=no&mimeType=csv" + "&mimeType=csv" response_file_path = 'data/wqp_results.txt' mock_request(requests_mock, request_url, response_file_path) df, md = get_results(siteid='WIDNR_WQX-10032762', @@ -29,9 +36,28 @@ def test_get_ratings(requests_mock): assert md.comment is None +def test_get_results_WQX3(requests_mock): + """Tests water quality portal results query with new WQX3.0 profile""" + request_url = "https://www.waterqualitydata.us/wqx3/Result/search?siteid=WIDNR_WQX-10032762" \ + "&characteristicName=Specific+conductance&startDateLo=05-01-2011&startDateHi=09-30-2011" \ + "&mimeType=csv" \ + "&dataProfile=fullPhysChem" + response_file_path = 'data/wqp3_results.txt' + mock_request(requests_mock, request_url, response_file_path) + df, md = get_results(legacy=False, siteid='WIDNR_WQX-10032762', + characteristicName = 'Specific conductance', + startDateLo='05-01-2011', startDateHi='09-30-2011') + assert type(df) is DataFrame + assert df.size == 900 + assert md.url == request_url + assert isinstance(md.query_time, datetime.timedelta) + assert md.header == {"mock_header": "value"} + assert md.comment is None + + def test_what_sites(requests_mock): """Tests Water quality portal sites query""" - request_url = "https://www.waterqualitydata.us/data/Station/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" \ + request_url = "https://www.waterqualitydata.us/data/Station/Search?statecode=US%3A34&characteristicName=Chloride" \ "&mimeType=csv" response_file_path = 'data/wqp_sites.txt' mock_request(requests_mock, request_url, response_file_path) @@ -46,7 +72,7 @@ def test_what_sites(requests_mock): def test_what_organizations(requests_mock): """Tests Water quality portal organizations query""" - request_url = "https://www.waterqualitydata.us/data/Organization/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" \ + request_url = "https://www.waterqualitydata.us/data/Organization/Search?statecode=US%3A34&characteristicName=Chloride" \ "&mimeType=csv" response_file_path = 'data/wqp_organizations.txt' mock_request(requests_mock, request_url, response_file_path) @@ -61,7 +87,7 @@ def test_what_organizations(requests_mock): def test_what_projects(requests_mock): """Tests Water quality portal projects query""" - request_url = "https://www.waterqualitydata.us/data/Project/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" \ + request_url = "https://www.waterqualitydata.us/data/Project/Search?statecode=US%3A34&characteristicName=Chloride" \ "&mimeType=csv" response_file_path = 'data/wqp_projects.txt' mock_request(requests_mock, request_url, response_file_path) @@ -76,7 +102,7 @@ def test_what_projects(requests_mock): def test_what_activities(requests_mock): """Tests Water quality portal activities query""" - request_url = "https://www.waterqualitydata.us/data/Activity/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" \ + request_url = "https://www.waterqualitydata.us/data/Activity/Search?statecode=US%3A34&characteristicName=Chloride" \ "&mimeType=csv" response_file_path = 'data/wqp_activities.txt' mock_request(requests_mock, request_url, response_file_path) @@ -91,7 +117,7 @@ def test_what_activities(requests_mock): def test_what_detection_limits(requests_mock): """Tests Water quality portal detection limits query""" - request_url = "https://www.waterqualitydata.us/data/ResultDetectionQuantitationLimit/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" \ + request_url = "https://www.waterqualitydata.us/data/ResultDetectionQuantitationLimit/Search?statecode=US%3A34&characteristicName=Chloride" \ "&mimeType=csv" response_file_path = 'data/wqp_detection_limits.txt' mock_request(requests_mock, request_url, response_file_path) @@ -106,7 +132,7 @@ def test_what_detection_limits(requests_mock): def test_what_habitat_metrics(requests_mock): """Tests Water quality portal habitat metrics query""" - request_url = "https://www.waterqualitydata.us/data/BiologicalMetric/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" \ + request_url = "https://www.waterqualitydata.us/data/BiologicalMetric/Search?statecode=US%3A34&characteristicName=Chloride" \ "&mimeType=csv" response_file_path = 'data/wqp_habitat_metrics.txt' mock_request(requests_mock, request_url, response_file_path) @@ -121,7 +147,7 @@ def test_what_habitat_metrics(requests_mock): def test_what_project_weights(requests_mock): """Tests Water quality portal project weights query""" - request_url = "https://www.waterqualitydata.us/data/ProjectMonitoringLocationWeighting/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" \ + request_url = "https://www.waterqualitydata.us/data/ProjectMonitoringLocationWeighting/Search?statecode=US%3A34&characteristicName=Chloride" \ "&mimeType=csv" response_file_path = 'data/wqp_project_weights.txt' mock_request(requests_mock, request_url, response_file_path) @@ -136,7 +162,7 @@ def test_what_project_weights(requests_mock): def test_what_activity_metrics(requests_mock): """Tests Water quality portal activity metrics query""" - request_url = "https://www.waterqualitydata.us/data/ActivityMetric/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" \ + request_url = "https://www.waterqualitydata.us/data/ActivityMetric/Search?statecode=US%3A34&characteristicName=Chloride" \ "&mimeType=csv" response_file_path = 'data/wqp_activity_metrics.txt' mock_request(requests_mock, request_url, response_file_path) @@ -154,19 +180,11 @@ def mock_request(requests_mock, request_url, file_path): requests_mock.get(request_url, text=text.read(), headers={"mock_header": "value"}) -class TestAlterKwargs: - """Tests for keyword alteration. - """ - def test_alter_kwargs_zip(self): - """Tests that zip kwarg is altered correctly and warning is thrown.""" - kwargs = {"zip": "yes", "mimeType": "csv"} - with pytest.warns(UserWarning): - kwargs = _alter_kwargs(kwargs) - assert kwargs == {"zip": "no", "mimeType": "csv"} - - def test_alter_kwargs_mimetype(self): - """Tests that mimetype kwarg is altered correctly and warning is thrown.""" - kwargs = {"zip": "no", "mimeType": "geojson"} - with pytest.warns(UserWarning): - kwargs = _alter_kwargs(kwargs) - assert kwargs == {"zip": "no", "mimeType": "csv"} \ No newline at end of file +def test_check_kwargs(): + """Tests that correct errors are raised for invalid mimetypes.""" + kwargs = {"mimeType": "geojson"} + with pytest.raises(NotImplementedError): + kwargs = _check_kwargs(kwargs) + kwargs = {"mimeType": "foo"} + with pytest.raises(ValueError): + kwargs = _check_kwargs(kwargs)