Skip to content

Commit

Permalink
96 empty response instead of error if request is too large (#97)
Browse files Browse the repository at this point in the history
* only raise NoDataError for "geen waarnemingen aanwezig" error message (catched in ddlpy.measurements()), otherwise raise UnsuccessfulRequestError (not catched)

* added testcase

* added _send_post_request function to prevent duplicated code

* simplify (unindent) try/except logging code

* updated history.rst
  • Loading branch information
veenstrajelmer authored Apr 25, 2024
1 parent 4c27c47 commit ec667c2
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 86 deletions.
5 changes: 3 additions & 2 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ History

UNRELEASED
----------
* allow for different retrieval frequencies (including None) in `ddlpy.measurements()` https://github.com/Deltares/ddlpy/pull/95
* avoid duplicated periods in dataframe returned by `ddlpy.measurements_amount()` https://github.com/Deltares/ddlpy/pull/93
* avoid duplicated periods in dataframe returned by `ddlpy.measurements_amount()` in https://github.com/Deltares/ddlpy/pull/93
* allow for different retrieval frequencies (including None) in `ddlpy.measurements()` in https://github.com/Deltares/ddlpy/pull/95
* only catch "Geen waarnemingen aanwezig!" error message and raise all others (for instance for a too large request) in https://github.com/Deltares/ddlpy/pull/97

0.4.0 (2024-04-08)
------------------
Expand Down
154 changes: 70 additions & 84 deletions ddlpy/ddlpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@
ENDPOINTS = json.load(f)


class NoDataException(ValueError):
class NoDataError(ValueError):
pass


class UnsuccessfulRequestError(ValueError):
pass


Expand All @@ -30,26 +34,43 @@ class NoDataException(ValueError):
logger = logging.getLogger(__name__)


def _send_post_request(url, request, timeout=None):
logger.debug("Requesting at {} with request: {}".format(url, json.dumps(request)))
resp = requests.post(url, json=request, timeout=timeout)
if not resp.ok:
raise IOError("Request failed: {}".format(resp.text))

result = resp.json()
if not result['Succesvol']:
logger.debug('Response result is unsuccessful: {}'.format(result))
error_message = result.get('Foutmelding', 'No error returned')
if "Geen gegevens gevonden" in error_message:
# Foutmelding: "Geen gegevens gevonden!"
# this is a valid response for periods where there is no data
# this error is raised here, but catched in ddlpy.ddlpy.measurements() so the process can continue.
raise NoDataError(error_message)
else:
# Foutmelding: "Het max aantal waarnemingen (157681) is overschreven, beperk uw request."
# or any other possible error message
# are raised here and not catched elsewhere in the code
raise UnsuccessfulRequestError(error_message)

# continue if request was successful
return result


def catalog(catalog_filter=None):
endpoint = ENDPOINTS["collect_catalogue"]

if catalog_filter is None:
# use the default request from endpoints.json
catalog_request = endpoint["request"]
request = endpoint["request"]
else:
assert isinstance(catalog_filter, list)
catalog_request = {"CatalogusFilter": {x:True for x in catalog_filter}}
request = {"CatalogusFilter": {x:True for x in catalog_filter}}

result = _send_post_request(endpoint["url"], request, timeout=None)

msg = "{} with {}".format(endpoint["url"], json.dumps(catalog_request))
logger.debug("requesting: {}".format(msg))

resp = requests.post(endpoint["url"], json=catalog_request)
if not resp.ok:
raise IOError("Failed to request {}: {}".format(msg, resp.text))
result = resp.json()
if not result["Succesvol"]:
logger.exception(str(result))
raise ValueError(result.get("Foutmelding", "No error returned"))
return result


Expand Down Expand Up @@ -135,22 +156,14 @@ def measurements_available(location, start_date, end_date):
}
}

try:
logger.debug('requesting: {}'.format(request))
resp = requests.post(endpoint['url'], json=request, timeout=5)
result = resp.json()
if not result['Succesvol']:
logger.debug('Got invalid response: {}'.format(result))
raise NoDataException(result.get('Foutmelding', 'No error returned'))
except NoDataException as e:
logger.debug('No data availble for {} {}'.format(start_date, end_date))
raise e

if result['Succesvol']:
if result['WaarnemingenAanwezig'] == 'true' :
return True
else:
return False
result = _send_post_request(endpoint["url"], request, timeout=5)

# continue if request was successful
logger.debug('Got response: {}'.format(result))
if result['WaarnemingenAanwezig'] == 'true' :
return True
else:
return False


def measurements_amount(location, start_date, end_date, period="Jaar"):
Expand Down Expand Up @@ -179,35 +192,26 @@ def measurements_amount(location, start_date, end_date, period="Jaar"):
}
}

try:
logger.debug('requesting: {}'.format(request))
resp = requests.post(endpoint['url'], json=request)
result = resp.json()
if not result['Succesvol']:
logger.debug('Got invalid response: {}'.format(result))
raise NoDataException(result.get('Foutmelding', 'No error returned'))
except NoDataException as e:
logger.debug('No data availble for {} {}'.format(start_date, end_date))
raise e

if result['Succesvol']:
df_list = []
for one in result['AantalWaarnemingenPerPeriodeLijst']:
df = pd.json_normalize(one['AantalMetingenPerPeriodeLijst'])

# combine columns to a period string
df["Groeperingsperiode"] = df["Groeperingsperiode.Jaarnummer"].apply(lambda x: f"{x:04d}")
if period in ["Maand", "Dag"]:
df["Groeperingsperiode"] = (df["Groeperingsperiode"] + "-" +
df["Groeperingsperiode.Maandnummer"].apply(lambda x: f"{x:02d}"))
if period in ["Dag"]:
df["Groeperingsperiode"] = (df["Groeperingsperiode"] + "-" +
df["Groeperingsperiode.Dag"].apply(lambda x: f"{x:02d}"))

# select columns from dataframe and append to list
df = df.set_index("Groeperingsperiode")
df = df[["AantalMetingen"]]
df_list.append(df)
result = _send_post_request(endpoint["url"], request, timeout=None)

# continue if request was successful
df_list = []
for one in result['AantalWaarnemingenPerPeriodeLijst']:
df = pd.json_normalize(one['AantalMetingenPerPeriodeLijst'])

# combine columns to a period string
df["Groeperingsperiode"] = df["Groeperingsperiode.Jaarnummer"].apply(lambda x: f"{x:04d}")
if period in ["Maand", "Dag"]:
df["Groeperingsperiode"] = (df["Groeperingsperiode"] + "-" +
df["Groeperingsperiode.Maandnummer"].apply(lambda x: f"{x:02d}"))
if period in ["Dag"]:
df["Groeperingsperiode"] = (df["Groeperingsperiode"] + "-" +
df["Groeperingsperiode.Dag"].apply(lambda x: f"{x:02d}"))

# select columns from dataframe and append to list
df = df.set_index("Groeperingsperiode")
df = df[["AantalMetingen"]]
df_list.append(df)

# concatenate and sum duplicated index
amount_all = pd.concat(df_list).sort_index()
Expand Down Expand Up @@ -298,17 +302,8 @@ def _measurements_slice(location, start_date, end_date):
"Einddatumtijd": end_date_str},
}

try:
logger.debug("requesting: {}".format(request))
resp = requests.post(endpoint["url"], json=request)
result = resp.json()
if not result["Succesvol"]:
logger.debug("Got invalid response: {}".format(result))
raise NoDataException(result.get("Foutmelding", "No error returned"))
except NoDataException as e:
logger.debug("No data availble for {} {}".format(start_date, end_date))
raise e

result = _send_post_request(endpoint["url"], request, timeout=None)

df = _combine_waarnemingenlijst(result, location)
return df

Expand Down Expand Up @@ -381,7 +376,7 @@ def measurements(location, start_date, end_date, freq=dateutil.rrule.MONTHLY, cl
location, start_date=start_date_i, end_date=end_date_i
)
measurements.append(measurement)
except NoDataException:
except NoDataError:
continue

if len(measurements) == 0:
Expand Down Expand Up @@ -410,17 +405,8 @@ def measurements_latest(location):
"LocatieLijst":[request_dicts["Locatie"]]
}

try:
logger.debug('requesting: {}'.format(request))
resp = requests.post(endpoint['url'], json=request, timeout=5)
result = resp.json()
if not result['Succesvol']:
logger.debug('Got invalid response: {}'.format(result))
raise NoDataException(result.get('Foutmelding', 'No error returned'))
except NoDataException as e:
logger.debug('No data availble')
raise e

if result['Succesvol']:
df = _combine_waarnemingenlijst(result, location)
return df
result = _send_post_request(endpoint["url"], request, timeout=5)

# continue if request was successful
df = _combine_waarnemingenlijst(result, location)
return df
30 changes: 30 additions & 0 deletions tests/test_ddlpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,36 @@ def test_measurements_duplicated(measurements):
assert isinstance(meas_clean.index, pd.DatetimeIndex)


def test_nodataerror(location):
"""
Test whether a request that returns no data is indeed properly catched
This is important since it is derived from the returned error message "Geen gegevens gevonden!"
In case this error message changes in the future,
this test will fail and the ddlpy code needs to be updated accordingly
"""
start_date = dt.datetime(2180, 1, 1)
end_date = dt.datetime(2180, 4, 1)
with pytest.raises(ddlpy.ddlpy.NoDataError):
# ddlpy.measurements() catches NoDataError, so we have to test it with _measurements_slice
_ = ddlpy.ddlpy._measurements_slice(location, start_date=start_date, end_date=end_date)
with pytest.raises(ddlpy.ddlpy.NoDataError):
_ = ddlpy.ddlpy.measurements_amount(location, start_date=start_date, end_date=end_date)


# TODO: this testcase is very slow and does not add much value, uncomment it when the ddl is faster
# def test_unsuccessfulrequesterror(location):
# """
# deliberately send a request that is too large to get the error message
# Foutmelding: 'Het max aantal waarnemingen (157681) is overschreven, beperk uw request.'
# which is raised as a UnsuccessfulRequestError
# """
# start_date = dt.datetime(2015, 1, 1)
# end_date = dt.datetime(2020, 1, 1)
# with pytest.raises(ddlpy.ddlpy.UnsuccessfulRequestError):
# #this is the same as ddlpy.measurements(location, start_date=start_date, end_date=end_date, freq=None)
# _ = ddlpy.ddlpy._measurements_slice(location, start_date=start_date, end_date=end_date)


datetype_list = ["string", "pd.Timestamp", "dt.datetime", "mixed"]
@pytest.mark.parametrize("datetype", datetype_list)
def test_check_convert_dates(datetype):
Expand Down

0 comments on commit ec667c2

Please sign in to comment.