Skip to content

Commit

Permalink
improve hydropandas extension
Browse files Browse the repository at this point in the history
- fix if stations is not an int but a series (can probably be removed again after hydropandas v0.12.4
- add axis to squeeze arguments to deal with len(1) series
- check last measurement date in De Bilt, with fallback option
  • Loading branch information
dbrakenhoff committed Sep 27, 2024
1 parent 0bed66a commit 2e405fe
Showing 1 changed file with 40 additions and 23 deletions.
63 changes: 40 additions & 23 deletions pastastore/extensions/hpd.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import hydropandas as hpd
import numpy as np
from hydropandas.io.knmi import get_stations
from hydropandas.io.knmi import _check_latest_measurement_date_de_bilt, get_stations
from pandas import DataFrame, Series, Timedelta, Timestamp
from pastas.timeseries_utils import timestep_weighted_resample
from tqdm.auto import tqdm
Expand Down Expand Up @@ -179,15 +179,15 @@ def add_observation(
action_msg = "added to"

if libname == "oseries":
self._store.upsert_oseries(o.squeeze(), name, metadata=metadata)
self._store.upsert_oseries(o.squeeze(axis=1), name, metadata=metadata)
logger.info(
"%sobservation '%s' %s oseries library.", source, name, action_msg
)
elif libname == "stresses":
if kind is None:
raise ValueError("`kind` must be specified for stresses!")
self._store.upsert_stress(
(o * unit_multiplier).squeeze(), name, kind, metadata=metadata
(o * unit_multiplier).squeeze(axis=1), name, kind, metadata=metadata
)
logger.info(
"%sstress '%s' (kind='%s') %s stresses library.",
Expand Down Expand Up @@ -394,24 +394,32 @@ def update_knmi_meteo(
tmintmax = self._store.get_tmin_tmax("stresses", names=names)

if tmax is not None:
if tmintmax["tmax"].min() > Timestamp(tmax):
logger.info(f"All KNMI stresses are up to date to {tmax}.")
if tmintmax["tmax"].min() >= Timestamp(tmax):
logger.info(f"All KNMI stresses are up to date till {tmax}.")
return

# NOTE: this check is very flaky (15 august 2024), perhaps I annoyed the
# KNMI server... Trying to skip this check and just attempt downloading data.
# maxtmax_rd = _check_latest_measurement_date_de_bilt("RD")
# maxtmax_ev24 = _check_latest_measurement_date_de_bilt("EV24")
maxtmax = Timestamp.today() - Timedelta(days=1)
try:
maxtmax_rd = _check_latest_measurement_date_de_bilt("RD")
maxtmax_ev24 = _check_latest_measurement_date_de_bilt("EV24")
except Exception as e:
# otherwise use maxtmax 28 days (4 weeks) prior to today
logger.warning(
"Could not check latest measurement date in De Bilt: %s" % str(e)
)
maxtmax_rd = maxtmax_ev24 = Timestamp.today() - Timedelta(days=28)
logger.info(
"Using 28 days (4 weeks) prior to today as maxtmax: %s."
% str(maxtmax_rd)
)

for name in tqdm(names, desc="Updating KNMI meteo stresses"):
meteo_var = self._store.stresses.loc[name, "meteo_var"]
# if meteo_var == "RD":
# maxtmax = maxtmax_rd
# elif meteo_var == "EV24":
# maxtmax = maxtmax_ev24
# else:
# maxtmax = maxtmax_rd
if meteo_var == "RD":
maxtmax = maxtmax_rd
elif meteo_var == "EV24":
maxtmax = maxtmax_ev24
else:
maxtmax = maxtmax_rd

# 1 days extra to ensure computation of daily totals using
# timestep_weighted_resample
Expand All @@ -421,7 +429,7 @@ def update_knmi_meteo(
itmin = tmin - Timedelta(days=1)

# ensure 2 observations at least
if itmin >= (maxtmax + Timedelta(days=1)):
if itmin >= (maxtmax - Timedelta(days=1)):
logger.debug("KNMI %s is already up to date." % name)
continue

Expand All @@ -430,20 +438,29 @@ def update_knmi_meteo(
else:
itmax = Timestamp(tmax)

# fix for duplicate station entry in metadata:
stress_station = (
self._store.stresses.at[name, "station"]
if "station" in self._store.stresses.columns
else None
)
if stress_station is not None and not isinstance(
stress_station, (int, np.integer)
):
stress_station = stress_station.squeeze().unique().item()

unit = self._store.stresses.loc[name, "unit"]
kind = self._store.stresses.loc[name, "kind"]
if "station" in self._store.stresses.columns and ~np.isnan(
self._store.stresses.loc[name, "station"]
):
stn = self._store.stresses.loc[name, "station"]
if stress_station is not None:
stn = stress_station
else:
stns = get_stations(meteo_var)
stn_name = name.split("_")[-1].lower()
mask = stns["name"].str.lower().str.replace(" ", "-") == stn_name
if not mask.any():
logger.warning(
f"Station '%s' not found in list of KNMI {meteo_var} stations."
% stn_name
"Station '%s' not found in list of KNMI %s stations."
% (stn_name, meteo_var)
)
continue
stn = stns.loc[mask].index[0]
Expand Down

0 comments on commit 2e405fe

Please sign in to comment.