From 57581908b58649dceb230d176603f95d01961c63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 30 Jul 2024 15:37:12 +0200 Subject: [PATCH 01/31] Add hydropandas extension - some thin wrappers around hydropandas for downloading BRO or KNMI data - convenient option to update time series currently in store from BRO or KNMI. --- pastastore/extensions/__init__.py | 13 ++ pastastore/extensions/accessor.py | 7 + pastastore/extensions/hpd.py | 359 ++++++++++++++++++++++++++++++ pastastore/store.py | 4 +- pyproject.toml | 2 +- tests/test_007_hpdextension.py | 33 +++ 6 files changed, 416 insertions(+), 2 deletions(-) create mode 100644 pastastore/extensions/__init__.py create mode 100644 pastastore/extensions/accessor.py create mode 100644 pastastore/extensions/hpd.py create mode 100644 tests/test_007_hpdextension.py diff --git a/pastastore/extensions/__init__.py b/pastastore/extensions/__init__.py new file mode 100644 index 0000000..04ac3a2 --- /dev/null +++ b/pastastore/extensions/__init__.py @@ -0,0 +1,13 @@ +from pastastore.extensions.accessor import ( + register_pastastore_accessor as register_pastastore_accessor, +) + + +def activate_hydropandas_extension(): + """Register Plotly extension for pastas.Model class for interactive plotting.""" + from pastastore.extensions.hpd import HydroPandasExtension as HpdExt + + print( + "Registered HydroPandas extension in PastaStore class, " + "e.g. `pstore.hpd.download_bro()`." + ) diff --git a/pastastore/extensions/accessor.py b/pastastore/extensions/accessor.py new file mode 100644 index 0000000..4697748 --- /dev/null +++ b/pastastore/extensions/accessor.py @@ -0,0 +1,7 @@ +from pastas.extensions.accessor import _register_accessor + + +def register_pastastore_accessor(name: str): + from pastastore.store import PastaStore + + return _register_accessor(name, PastaStore) \ No newline at end of file diff --git a/pastastore/extensions/hpd.py b/pastastore/extensions/hpd.py new file mode 100644 index 0000000..8b34267 --- /dev/null +++ b/pastastore/extensions/hpd.py @@ -0,0 +1,359 @@ +import logging +from typing import Optional, Union + +import hydropandas as hpd +import numpy as np +from pandas import DataFrame, Series, Timedelta, Timestamp +from tqdm.auto import tqdm + +from pastastore.extensions.accessor import register_pastastore_accessor + +logger = logging.getLogger("hydropandas") + + +TimeType = Optional[Union[str, Timestamp]] + + +@register_pastastore_accessor("hpd") +class HydroPandasExtension: + """HydroPandas extension for PastaStore. + + Parameters + ---------- + store: pastastore.store.PastaStore + PastaStore object to extend with HydroPandas functionality + """ + + def __init__(self, store): + """Initialize HydroPandasExtenstion. + + Parameters + ---------- + store : pasta.store.PastaStore + PastaStore object to extend with HydroPandas functionality + """ + self._store = store + + def add_obscollection( + self, + libname: str, + oc: hpd.ObsCollection, + kind: Optional[str] = None, + data_column: Optional[str] = None, + unit_multiplier: float = 1.0, + ): + """Add an ObsCollection to the PastaStore. + + Parameters + ---------- + libname : str + Name of the library to add the ObsCollection to ["oseries", "stresses"]. + oc : hpd.ObsCollection + ObsCollection to add to the store. + kind : str, optional + kind identifier for observations, by default None. Required for adding + stresses. + data_column : str, optional + name of column containing observation values, by default None. + unit_multiplier : float, optional + multiply unit by this value before saving it in the store + """ + for name, row in oc.iterrows(): + obs = row["obs"] + # metadata = row.drop("obs").to_dict() + self.add_observation( + libname, obs, name=name, kind=kind, data_column=data_column + ) + + def add_observation( + self, + libname: str, + obs: hpd.Obs, + name: Optional[str] = None, + kind: Optional[str] = None, + data_column: Optional[str] = None, + unit_multiplier: float = 1.0, + ): + """Add an hydropandas observation series to the PastaStore. + + Parameters + ---------- + libname : str + Name of the library to add the observation to ["oseries", "stresses"]. + obs : hpd.Obs + hydroPandas observation series to add to the store. + name : str, optional + Name of the observation, by default None. If None, the name of the + observation is used. + kind : str, optional + kind identifier for observations, by default None. Required for adding + stresses. + data_column : str, optional + name of column containing observation values, by default None. + unit_multiplier : float, optional + multiply unit by this value before saving it in the store + """ + # if data_column is not None, use data_column + if data_column is not None: + if not obs.empty: + o = obs[data_column] + else: + o = Series() + # if data_column is None, check no. of columns in obs + # if only one column, use that column + elif isinstance(obs, DataFrame) and obs.columns.size == 1: + o = obs.iloc[:, 0] + elif isinstance(obs, Series): + o = obs + # else raise error + else: + raise ValueError("No data_column specified and obs has multiple columns.") + + # break if obs is empty + if o.empty: + logger.info("Observation '%s' is empty, not adding to store.", name) + return + + # gather metadata from obs object + metadata = {key: getattr(obs, key) for key in obs._metadata} + + metadata.pop("name", None) + metadata.pop("meta", None) + unit = metadata.get("unit", None) + if unit == "m" and unit_multiplier == 1e3: + metadata["unit"] = "mm" + elif unit_multiplier != 1.0: + metadata["unit"] = f"{unit_multiplier:e}*{unit}" + + source = metadata.get("source", "") + if len(source) > 0: + source = f"{source} " + + if libname == "oseries": + self._store.add_oseries(o, name, metadata=metadata) + logger.info("%sobservation '%s' added to oseries library.", source, name) + elif libname == "stresses": + if kind is None: + raise ValueError("`kind` must be specified for stresses!") + self._store.add_stress(o * unit_multiplier, name, kind, metadata=metadata) + logger.info( + "%sstress '%s' (kind='%s') added to stresses library.", + source, + name, + kind, + ) + else: + raise ValueError("libname must be 'oseries' or 'stresses'.") + + def download_knmi_precipitation( + self, + stns: Optional[list[int]] = None, + meteo_var: str = "RH", + tmin: TimeType = None, + tmax: TimeType = None, + unit_multiplier: float = 1e3, + update: bool = False, + **kwargs, + ): + """Download precipitation data from KNMI and store in PastaStore. + + Parameters + ---------- + stns : list of int/str, optional + list of station numbers to download data for, by default None + meteo_var : str, optional + variable to download, by default "RH", valid options are ["RD", "RH"]. + tmin : TimeType, optional + start time, by default None + tmax : TimeType, optional + end time, by default None + unit_multiplier : float, optional + multiply unit by this value before saving it in the store, + by default 1e3 to convert m to mm + update : bool, optional + if True, update currently stored precipitation time series with new data + """ + self.download_knmi_meteo( + meteo_var=meteo_var, + kind="prec", + stns=stns, + tmin=tmin, + tmax=tmax, + unit_multiplier=unit_multiplier, + update=update, + **kwargs, + ) + + def download_knmi_evaporation( + self, + stns: Optional[list[int]] = None, + meteo_var: str = "EV24", + tmin: TimeType = None, + tmax: TimeType = None, + unit_multiplier: float = 1e3, + update: bool = False, + **kwargs, + ): + """Download evaporation data from KNMI and store in PastaStore. + + Parameters + ---------- + stns : list of int/str, optional + list of station numbers to download data for, by default None + meteo_var : str, optional + variable to download, by default "EV24" + tmin : TimeType, optional + start time, by default None + tmax : TimeType, optional + end time, by default None + unit_multiplier : float, optional + multiply unit by this value before saving it in the store, + by default 1e3 to convert m to mm + update : bool, optional + if True, update currently stored evaporation time series with new data + """ + self.download_knmi_meteo( + meteo_var=meteo_var, + kind="evap", + stns=stns, + tmin=tmin, + tmax=tmax, + unit_multiplier=unit_multiplier, + update=update, + **kwargs, + ) + + def download_knmi_meteo( + self, + meteo_var: str, + kind: str, + stns: Optional[list[int]] = None, + tmin: TimeType = None, + tmax: TimeType = None, + unit_multiplier: float = 1.0, + update: bool = False, + **kwargs, + ): + """Download meteorological data from KNMI and store in PastaStore. + + Parameters + ---------- + meteo_var : str, optional + variable to download, by default "RH", valid options are + e.g. ["RD", "RH", "EV24", "T", "Q"]. + kind : str + kind identifier for observations, usually "prec" or "evap". + stns : list of int/str, optional + list of station numbers to download data for, by default None + tmin : TimeType, optional + start time, by default None + tmax : TimeType, optional + end time, by default None + unit_multiplier : float, optional + multiply unit by this value before saving it in the store, + by default 1.0 (no conversion) + update : bool, optional + if True, update currently stored precipitation time series with new data + """ + # get tmin/tmax if not specified + if update: + stressnames = self._store.stresses.loc[ + self._store.stresses["kind"] == kind + ].index.tolist() + tmintmax = self._store.get_tmin_tmax("stresses", names=stressnames) + if tmin is None: + tmin = tmintmax.loc[:, "tmax"].min() + if tmax is None: + tmax = Timestamp.now().normalize() + else: + tmintmax = self._store.get_tmin_tmax("oseries") + if tmin is None: + tmin = tmintmax.loc[:, "tmin"].min() - Timedelta(days=10 * 365) + if tmax is None: + tmax = tmintmax.loc[:, "tmax"].max() + + # if update, only download data for stations in store + if update: + locations = None + if stns is None: + stns = self._store.stresses.loc[stressnames, "station"].tolist() + else: + check = np.isin( + stns, self._store.stresses.loc[stressnames, "station"].values + ) + if not check.all(): + raise ValueError( + "Not all specified stations are in the store: " + f"{np.array(stns)[~check]}" + ) + elif stns is None: + locations = self._store.oseries.loc[:, ["x", "y"]] + else: + locations = None + + # download data + knmi = hpd.read_knmi( + locations=locations, + stns=stns, + meteo_vars=[meteo_var], + starts=tmin, + ends=tmax, + **kwargs, + ) + + # add to store + self.add_obscollection( + libname="stresses", + oc=knmi, + kind=kind, + data_column=meteo_var, + unit_multiplier=unit_multiplier, + ) + + def download_bro_gmw( + self, + extent=None, + tmin=None, + tmax=None, + update=False, + **kwargs, + ): + """Download groundwater monitoring well observations from BRO. + + Parameters + ---------- + extent: tuple, optional + Extent of the area to download observations from. + tmin: pandas.Timestamp, optional + Start date of the observations to download. + tmax: pandas.Timestamp, optional + End date of the observations to download. + update: bool, optional + If True, update existing observations in the store. + **kwargs: dict, optional + Additional keyword arguments to pass to `hpd.read_bro()` + """ + if extent is not None and update: + raise ValueError("Cannot specify extent AND update=True.") + elif extent is None and not update: + raise ValueError("Either extent or update=True must be specified.") + + if update: + tmintmax = self._store.get_tmin_tmax("oseries") + for obsnam in tqdm( + self._store.oseries.index, desc="Updating oseries from BRO" + ): + bro_id, tube_number = obsnam.split("_") + tmin, tmax = tmintmax.loc[obsnam] + obs = hpd.GroundWaterObs.from_bro( + bro_id, int(tube_number), tmin=tmin, tmax=tmax, **kwargs + ) + self.add_observation("oseries", obs, name=obsnam, data_column="values") + else: + bro = hpd.read_bro( + extent=extent, + tmin=tmin, + tmax=tmax, + **kwargs, + ) + self.add_obscollection("oseries", bro, data_column="values") diff --git a/pastastore/store.py b/pastastore/store.py index abf0e37..4c4f4ad 100644 --- a/pastastore/store.py +++ b/pastastore/store.py @@ -43,6 +43,8 @@ class PastaStore: name of the PastaStore, by default takes the name of the Connector object """ + _accessors = set() + def __init__( self, connector: Optional[BaseConnector] = None, @@ -706,7 +708,7 @@ def create_model( meta = self.conn.get_metadata("oseries", name, as_frame=False) ts = self.conn.get_oseries(name) - # convert to Timeseries and create model + # convert to time series and create model if not ts.dropna().empty: if modelname is None: modelname = name diff --git a/pyproject.toml b/pyproject.toml index 48c7a70..d9edd45 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ documentation = "https://pastastore.readthedocs.io/en/latest/" [project.optional-dependencies] full = ["pastastore[arcticdb,optional]"] -lint = ["black", "flake8", "isort"] +lint = ["ruff"] optional = ["contextily", "pyproj", "adjustText"] test = [ "pastastore[arcticdb,lint,optional]", diff --git a/tests/test_007_hpdextension.py b/tests/test_007_hpdextension.py new file mode 100644 index 0000000..ea06bc2 --- /dev/null +++ b/tests/test_007_hpdextension.py @@ -0,0 +1,33 @@ +# ruff: noqa: D103 +import pytest + +import pastastore as pst +from pastastore.extensions import activate_hydropandas_extension + + +@pytest.mark.slow() +def test_hpd_download_from_bro(): + activate_hydropandas_extension() + pstore = pst.PastaStore() + pstore.hpd.download_bro_gmw( + extent=(117850, 118180, 439550, 439900), tmin="2022-01-01", tmax="2022-01-02" + ) + assert pstore.n_oseries == 3 + + +def test_hpd_download_precipitation_from_knmi(): + activate_hydropandas_extension() + pstore = pst.PastaStore() + pstore.hpd.download_knmi_precipitation( + stns=[260], tmin="2022-01-01", tmax="2022-01-31" + ) + assert pstore.n_stresses == 1 + + +def test_hpd_download_evaporation_from_knmi(): + activate_hydropandas_extension() + pstore = pst.PastaStore() + pstore.hpd.download_knmi_evaporation( + stns=[260], tmin="2022-01-01", tmax="2022-01-31" + ) + assert pstore.n_stresses == 1 From edbebe2828650b2372770a711e59a725847df917 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 30 Jul 2024 15:45:36 +0200 Subject: [PATCH 02/31] ruff thingies --- pastastore/extensions/__init__.py | 1 + pastastore/extensions/accessor.py | 10 +++++++++- pastastore/extensions/hpd.py | 9 +++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pastastore/extensions/__init__.py b/pastastore/extensions/__init__.py index 04ac3a2..fd43a0a 100644 --- a/pastastore/extensions/__init__.py +++ b/pastastore/extensions/__init__.py @@ -1,3 +1,4 @@ +# ruff: noqa: D104 F401 from pastastore.extensions.accessor import ( register_pastastore_accessor as register_pastastore_accessor, ) diff --git a/pastastore/extensions/accessor.py b/pastastore/extensions/accessor.py index 4697748..92ee257 100644 --- a/pastastore/extensions/accessor.py +++ b/pastastore/extensions/accessor.py @@ -1,7 +1,15 @@ +# ruff: noqa: D100 from pastas.extensions.accessor import _register_accessor def register_pastastore_accessor(name: str): + """Register an extension in the PastaStore class. + + Parameters + ---------- + name : str + name of the extension to register + """ from pastastore.store import PastaStore - return _register_accessor(name, PastaStore) \ No newline at end of file + return _register_accessor(name, PastaStore) diff --git a/pastastore/extensions/hpd.py b/pastastore/extensions/hpd.py index 8b34267..ffebc19 100644 --- a/pastastore/extensions/hpd.py +++ b/pastastore/extensions/hpd.py @@ -1,3 +1,12 @@ +"""HydroPandas extension for PastaStore. + +Features: + +- Add `hpd.Obs` and `hpd.ObsCollection` to PastaStore. +- Download and store meteorological data from KNMI or groundwater observations from BRO. +- Update currently stored (KNMI or BRO) time series from last observation to tmax. +""" + import logging from typing import Optional, Union From 8e13a5a4e74d977c0aef1767bf13f46fdb9f244b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 30 Jul 2024 15:52:53 +0200 Subject: [PATCH 03/31] ruff --- tests/test_007_hpdextension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_007_hpdextension.py b/tests/test_007_hpdextension.py index ea06bc2..31184d2 100644 --- a/tests/test_007_hpdextension.py +++ b/tests/test_007_hpdextension.py @@ -1,4 +1,4 @@ -# ruff: noqa: D103 +# ruff: noqa: D100 D103 import pytest import pastastore as pst From 24cbc01ac217de9dd9ac0d09721e5806e2a51dea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 30 Jul 2024 16:06:23 +0200 Subject: [PATCH 04/31] test hydropandas extension only once --- .github/workflows/ci.yml | 6 +++--- pastastore/extensions/__init__.py | 2 +- pyproject.toml | 5 ++++- tests/test_007_hpdextension.py | 4 +++- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3ecd45c..735cd99 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,12 +92,12 @@ jobs: - name: Run pytest run: | - pytest -m "not bgmap" + pytest -m "not (bgmap or pastas150)" - - name: Test backgroundmap once + - name: Test marked tests once if: ${{ matrix.python-version == '3.11' && matrix.pastas-version == 'git+https://github.com/pastas/pastas.git@dev' }} run: | - pytest -m "bgmap" + pytest -m "bgmap and pastas150" - name: Run codacy-coverage-reporter if: ${{ github.repository == 'pastas/pastastore' && success() }} diff --git a/pastastore/extensions/__init__.py b/pastastore/extensions/__init__.py index fd43a0a..60dcb8e 100644 --- a/pastastore/extensions/__init__.py +++ b/pastastore/extensions/__init__.py @@ -6,7 +6,7 @@ def activate_hydropandas_extension(): """Register Plotly extension for pastas.Model class for interactive plotting.""" - from pastastore.extensions.hpd import HydroPandasExtension as HpdExt + from pastastore.extensions.hpd import HydroPandasExtension as _ print( "Registered HydroPandas extension in PastaStore class, " diff --git a/pyproject.toml b/pyproject.toml index d9edd45..5486be7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,7 +109,10 @@ convention = "numpy" [tool.pytest.ini_options] addopts = "--durations=0 --cov-report xml:coverage.xml --cov pastastore -v" -markers = ["bgmap: test tile download for background map"] +markers = [ + "bgmap: test tile download for background map", + "pastas150: tests only working for pastas>=1.5.0", +] [tool.mypy] python_version = "3.9" diff --git a/tests/test_007_hpdextension.py b/tests/test_007_hpdextension.py index 31184d2..9d0bbc4 100644 --- a/tests/test_007_hpdextension.py +++ b/tests/test_007_hpdextension.py @@ -5,7 +5,7 @@ from pastastore.extensions import activate_hydropandas_extension -@pytest.mark.slow() +@pytest.mark.pastas150() def test_hpd_download_from_bro(): activate_hydropandas_extension() pstore = pst.PastaStore() @@ -15,6 +15,7 @@ def test_hpd_download_from_bro(): assert pstore.n_oseries == 3 +@pytest.mark.pastas150() def test_hpd_download_precipitation_from_knmi(): activate_hydropandas_extension() pstore = pst.PastaStore() @@ -24,6 +25,7 @@ def test_hpd_download_precipitation_from_knmi(): assert pstore.n_stresses == 1 +@pytest.mark.pastas150() def test_hpd_download_evaporation_from_knmi(): activate_hydropandas_extension() pstore = pst.PastaStore() From 7b9892b619ddf991c38aececd3c589aaf42d913e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 30 Jul 2024 17:02:12 +0200 Subject: [PATCH 05/31] only attempt importing extensions when tests are run --- tests/test_007_hpdextension.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_007_hpdextension.py b/tests/test_007_hpdextension.py index 9d0bbc4..e3ab49a 100644 --- a/tests/test_007_hpdextension.py +++ b/tests/test_007_hpdextension.py @@ -2,11 +2,12 @@ import pytest import pastastore as pst -from pastastore.extensions import activate_hydropandas_extension @pytest.mark.pastas150() def test_hpd_download_from_bro(): + from pastastore.extensions import activate_hydropandas_extension + activate_hydropandas_extension() pstore = pst.PastaStore() pstore.hpd.download_bro_gmw( @@ -17,6 +18,8 @@ def test_hpd_download_from_bro(): @pytest.mark.pastas150() def test_hpd_download_precipitation_from_knmi(): + from pastastore.extensions import activate_hydropandas_extension + activate_hydropandas_extension() pstore = pst.PastaStore() pstore.hpd.download_knmi_precipitation( @@ -27,6 +30,8 @@ def test_hpd_download_precipitation_from_knmi(): @pytest.mark.pastas150() def test_hpd_download_evaporation_from_knmi(): + from pastastore.extensions import activate_hydropandas_extension + activate_hydropandas_extension() pstore = pst.PastaStore() pstore.hpd.download_knmi_evaporation( From c3c38d225b691fad71a7ef0aa91fbbc2d7f3ec9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 30 Jul 2024 17:10:40 +0200 Subject: [PATCH 06/31] all tests once --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 735cd99..49811a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,10 +94,10 @@ jobs: run: | pytest -m "not (bgmap or pastas150)" - - name: Test marked tests once + - name: Run all tests once if: ${{ matrix.python-version == '3.11' && matrix.pastas-version == 'git+https://github.com/pastas/pastas.git@dev' }} run: | - pytest -m "bgmap and pastas150" + pytest -m "" - name: Run codacy-coverage-reporter if: ${{ github.repository == 'pastas/pastastore' && success() }} From 3a1c07c4c6dad9ce694954c29e46ff882ddb042f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Mon, 5 Aug 2024 15:06:33 +0200 Subject: [PATCH 07/31] add hydropandas to extensions optional dependency group --- pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5486be7..26c12e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,8 +45,10 @@ documentation = "https://pastastore.readthedocs.io/en/latest/" [project.optional-dependencies] full = ["pastastore[arcticdb,optional]"] -lint = ["ruff"] +extensions = ["hydropandas"] optional = ["contextily", "pyproj", "adjustText"] +arcticdb = ["arcticdb"] +lint = ["ruff"] test = [ "pastastore[arcticdb,lint,optional]", "hydropandas[full]", @@ -69,7 +71,6 @@ test_py312 = [ "pytest-benchmark", "codacy-coverage", ] -arcticdb = ["arcticdb"] docs = [ "pastastore[optional]", "sphinx_rtd_theme", From 4e2e3b036a4dbc0634cd3fffaec0eb58f46a635a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 6 Aug 2024 11:39:18 +0200 Subject: [PATCH 08/31] improve message --- pastastore/extensions/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pastastore/extensions/__init__.py b/pastastore/extensions/__init__.py index 60dcb8e..e5b7151 100644 --- a/pastastore/extensions/__init__.py +++ b/pastastore/extensions/__init__.py @@ -10,5 +10,5 @@ def activate_hydropandas_extension(): print( "Registered HydroPandas extension in PastaStore class, " - "e.g. `pstore.hpd.download_bro()`." + "e.g. `pstore.hpd.download_bro_gmw()`." ) From 7c69904f985d33dca97e05241fdd224529d2403b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 6 Aug 2024 11:40:24 +0200 Subject: [PATCH 09/31] separate update from download to simplify functions - modify to use upsert to add or update time series --- pastastore/extensions/hpd.py | 222 +++++++++++++++++++++++------------ 1 file changed, 147 insertions(+), 75 deletions(-) diff --git a/pastastore/extensions/hpd.py b/pastastore/extensions/hpd.py index ffebc19..c3ff8ed 100644 --- a/pastastore/extensions/hpd.py +++ b/pastastore/extensions/hpd.py @@ -8,7 +8,7 @@ """ import logging -from typing import Optional, Union +from typing import List, Optional, Union import hydropandas as hpd import numpy as np @@ -50,6 +50,7 @@ def add_obscollection( kind: Optional[str] = None, data_column: Optional[str] = None, unit_multiplier: float = 1.0, + update: bool = False, ): """Add an ObsCollection to the PastaStore. @@ -66,12 +67,20 @@ def add_obscollection( name of column containing observation values, by default None. unit_multiplier : float, optional multiply unit by this value before saving it in the store + update : bool, optional + if True, update currently stored time series with new data """ for name, row in oc.iterrows(): obs = row["obs"] # metadata = row.drop("obs").to_dict() self.add_observation( - libname, obs, name=name, kind=kind, data_column=data_column + libname, + obs, + name=name, + kind=kind, + data_column=data_column, + unit_multiplier=unit_multiplier, + update=update, ) def add_observation( @@ -82,6 +91,7 @@ def add_observation( kind: Optional[str] = None, data_column: Optional[str] = None, unit_multiplier: float = 1.0, + update: bool = False, ): """Add an hydropandas observation series to the PastaStore. @@ -101,6 +111,8 @@ def add_observation( name of column containing observation values, by default None. unit_multiplier : float, optional multiply unit by this value before saving it in the store + update : bool, optional + if True, update currently stored time series with new data """ # if data_column is not None, use data_column if data_column is not None: @@ -126,6 +138,13 @@ def add_observation( # gather metadata from obs object metadata = {key: getattr(obs, key) for key in obs._metadata} + # convert np dtypes to builtins + for k, v in metadata.items(): + if isinstance(v, np.integer): + metadata[k] = int(v) + elif isinstance(v, np.floating): + metadata[k] = float(v) + metadata.pop("name", None) metadata.pop("meta", None) unit = metadata.get("unit", None) @@ -138,18 +157,28 @@ def add_observation( if len(source) > 0: source = f"{source} " + if update: + action_msg = "updated in" + else: + action_msg = "added to" + if libname == "oseries": - self._store.add_oseries(o, name, metadata=metadata) - logger.info("%sobservation '%s' added to oseries library.", source, name) + self._store.upsert_oseries(o, name, metadata=metadata) + logger.info( + "%sobservation '%s' %s oseries library.", source, name, action_msg + ) elif libname == "stresses": if kind is None: raise ValueError("`kind` must be specified for stresses!") - self._store.add_stress(o * unit_multiplier, name, kind, metadata=metadata) + self._store.upsert_stress( + o * unit_multiplier, name, kind, metadata=metadata + ) logger.info( - "%sstress '%s' (kind='%s') added to stresses library.", + "%sstress '%s' (kind='%s') %s stresses library.", source, name, kind, + action_msg, ) else: raise ValueError("libname must be 'oseries' or 'stresses'.") @@ -161,7 +190,6 @@ def download_knmi_precipitation( tmin: TimeType = None, tmax: TimeType = None, unit_multiplier: float = 1e3, - update: bool = False, **kwargs, ): """Download precipitation data from KNMI and store in PastaStore. @@ -179,8 +207,6 @@ def download_knmi_precipitation( unit_multiplier : float, optional multiply unit by this value before saving it in the store, by default 1e3 to convert m to mm - update : bool, optional - if True, update currently stored precipitation time series with new data """ self.download_knmi_meteo( meteo_var=meteo_var, @@ -189,7 +215,6 @@ def download_knmi_precipitation( tmin=tmin, tmax=tmax, unit_multiplier=unit_multiplier, - update=update, **kwargs, ) @@ -200,7 +225,6 @@ def download_knmi_evaporation( tmin: TimeType = None, tmax: TimeType = None, unit_multiplier: float = 1e3, - update: bool = False, **kwargs, ): """Download evaporation data from KNMI and store in PastaStore. @@ -218,8 +242,6 @@ def download_knmi_evaporation( unit_multiplier : float, optional multiply unit by this value before saving it in the store, by default 1e3 to convert m to mm - update : bool, optional - if True, update currently stored evaporation time series with new data """ self.download_knmi_meteo( meteo_var=meteo_var, @@ -228,7 +250,6 @@ def download_knmi_evaporation( tmin=tmin, tmax=tmax, unit_multiplier=unit_multiplier, - update=update, **kwargs, ) @@ -240,7 +261,6 @@ def download_knmi_meteo( tmin: TimeType = None, tmax: TimeType = None, unit_multiplier: float = 1.0, - update: bool = False, **kwargs, ): """Download meteorological data from KNMI and store in PastaStore. @@ -261,41 +281,15 @@ def download_knmi_meteo( unit_multiplier : float, optional multiply unit by this value before saving it in the store, by default 1.0 (no conversion) - update : bool, optional - if True, update currently stored precipitation time series with new data """ # get tmin/tmax if not specified - if update: - stressnames = self._store.stresses.loc[ - self._store.stresses["kind"] == kind - ].index.tolist() - tmintmax = self._store.get_tmin_tmax("stresses", names=stressnames) - if tmin is None: - tmin = tmintmax.loc[:, "tmax"].min() - if tmax is None: - tmax = Timestamp.now().normalize() - else: - tmintmax = self._store.get_tmin_tmax("oseries") - if tmin is None: - tmin = tmintmax.loc[:, "tmin"].min() - Timedelta(days=10 * 365) - if tmax is None: - tmax = tmintmax.loc[:, "tmax"].max() + tmintmax = self._store.get_tmin_tmax("oseries") + if tmin is None: + tmin = tmintmax.loc[:, "tmin"].min() - Timedelta(days=10 * 365) + if tmax is None: + tmax = tmintmax.loc[:, "tmax"].max() - # if update, only download data for stations in store - if update: - locations = None - if stns is None: - stns = self._store.stresses.loc[stressnames, "station"].tolist() - else: - check = np.isin( - stns, self._store.stresses.loc[stressnames, "station"].values - ) - if not check.all(): - raise ValueError( - "Not all specified stations are in the store: " - f"{np.array(stns)[~check]}" - ) - elif stns is None: + if stns is None: locations = self._store.oseries.loc[:, ["x", "y"]] else: locations = None @@ -317,14 +311,71 @@ def download_knmi_meteo( kind=kind, data_column=meteo_var, unit_multiplier=unit_multiplier, + update=False, ) + def update_knmi_meteo( + self, + names: Optional[List[str]] = None, + tmin: TimeType = None, + tmax: TimeType = None, + ): + """Update meteorological data from KNMI in PastaStore. + + Parameters + ---------- + names : list of str, optional + list of names of observations to update, by default None + tmin : TimeType, optional + start time, by default None, which uses current last observation timestamp + as tmin + tmax : TimeType, optional + end time, by default None, which defaults to today + """ + if names is None: + names = self._store.stresses.loc[ + self._store.stresses["source"] == "KNMI" + ].index.tolist() + + tmintmax = self._store.get_tmin_tmax("stresses", names=names) + + for name in tqdm(names, desc="Updating KNMI meteo stresses"): + stn = self._store.stresses.loc[name, "station"] + meteo_var = self._store.stresses.loc[name, "meteo_var"] + unit = self._store.stresses.loc[name, "unit"] + kind = self._store.stresses.loc[name, "kind"] + + if unit == "mm": + unit_multiplier = 1e3 + else: + unit_multiplier = 1.0 + + if tmin is None: + tmin = tmintmax.loc[name, "tmax"] + + knmi = hpd.read_knmi( + stns=[stn], + meteo_vars=[meteo_var], + starts=tmin, + ends=tmax, + ) + + self.add_observation( + "stresses", + knmi["obs"].iloc[0], + name=name, + kind=kind, + data_column=meteo_var, + unit_multiplier=unit_multiplier, + update=True, + ) + def download_bro_gmw( self, - extent=None, - tmin=None, - tmax=None, - update=False, + extent: Optional[List[float | int]] = None, + tmin: TimeType = None, + tmax: TimeType = None, + update: bool = False, **kwargs, ): """Download groundwater monitoring well observations from BRO. @@ -337,32 +388,53 @@ def download_bro_gmw( Start date of the observations to download. tmax: pandas.Timestamp, optional End date of the observations to download. - update: bool, optional - If True, update existing observations in the store. **kwargs: dict, optional Additional keyword arguments to pass to `hpd.read_bro()` """ - if extent is not None and update: - raise ValueError("Cannot specify extent AND update=True.") - elif extent is None and not update: - raise ValueError("Either extent or update=True must be specified.") + bro = hpd.read_bro( + extent=extent, + tmin=tmin, + tmax=tmax, + **kwargs, + ) + self.add_obscollection("oseries", bro, data_column="values", update=update) - if update: - tmintmax = self._store.get_tmin_tmax("oseries") - for obsnam in tqdm( - self._store.oseries.index, desc="Updating oseries from BRO" - ): - bro_id, tube_number = obsnam.split("_") - tmin, tmax = tmintmax.loc[obsnam] - obs = hpd.GroundWaterObs.from_bro( - bro_id, int(tube_number), tmin=tmin, tmax=tmax, **kwargs - ) - self.add_observation("oseries", obs, name=obsnam, data_column="values") - else: - bro = hpd.read_bro( - extent=extent, - tmin=tmin, - tmax=tmax, - **kwargs, + def update_bro_gmw( + self, + names: Optional[List[str]] = None, + tmin: TimeType = None, + tmax: TimeType = None, + **kwargs, + ): + """Update groundwater monitoring well observations from BRO. + + Parameters + ---------- + names : list of str, optional + list of names of observations to update, by default None which updates all + stored oseries. + tmin : TimeType, optional + start time, by default None, which uses current last observation timestamp + as tmin + tmax : TimeType, optional + end time, by default None, which defaults to today + **kwargs : dict, optional + Additional keyword arguments to pass to `hpd.GroundwaterObs.from_bro()` + """ + if names is None: + names = self._store.oseries.index.to_list() + + tmintmax = self._store.get_tmin_tmax("oseries") + + for obsnam in tqdm(names, desc="Updating BRO oseries"): + bro_id, tube_number = obsnam.split("_") + + if tmin is None: + _, tmin = tmintmax.loc[obsnam] # tmin is stored tmax + + obs = hpd.GroundwaterObs.from_bro( + bro_id, int(tube_number), tmin=tmin, tmax=tmax, **kwargs + ) + self.add_observation( + "oseries", obs, name=obsnam, data_column="values", update=True ) - self.add_obscollection("oseries", bro, data_column="values") From 1c96b1cfc2b64378ed64b63639a3acbae96c1edd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 6 Aug 2024 11:40:34 +0200 Subject: [PATCH 10/31] expand tests to check update functions as well --- tests/test_007_hpdextension.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/test_007_hpdextension.py b/tests/test_007_hpdextension.py index e3ab49a..181d00a 100644 --- a/tests/test_007_hpdextension.py +++ b/tests/test_007_hpdextension.py @@ -1,5 +1,6 @@ # ruff: noqa: D100 D103 import pytest +from pandas import Timestamp import pastastore as pst @@ -38,3 +39,28 @@ def test_hpd_download_evaporation_from_knmi(): stns=[260], tmin="2022-01-01", tmax="2022-01-31" ) assert pstore.n_stresses == 1 + + +@pytest.mark.pastas150() +def test_update_oseries(): + from pastastore.extensions import activate_hydropandas_extension + + activate_hydropandas_extension() + + pstore = pst.PastaStore.from_zip("tests/data/test_hpd_update.zip") + pstore.hpd.update_bro_gmw(tmax="2024-01-31") + tmintmax = pstore.get_tmin_tmax("oseries") + assert tmintmax.loc["GMW000000036319_1", "tmax"] >= Timestamp("2024-01-30") + assert tmintmax.loc["GMW000000036327_1", "tmax"] >= Timestamp("2024-01-20") + + +@pytest.mark.pastas150() +def test_update_stresses(): + from pastastore.extensions import activate_hydropandas_extension + + activate_hydropandas_extension() + + pstore = pst.PastaStore.from_zip("tests/data/test_hpd_update.zip") + pstore.hpd.update_knmi_meteo(tmax="2024-01-31") + tmintmax = pstore.get_tmin_tmax("stresses") + assert (tmintmax["tmax"] >= Timestamp("2024-01-31")).all() From 8c1755edbf7d283c4a6baa142c9cd5a75f46068e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 6 Aug 2024 11:40:48 +0200 Subject: [PATCH 11/31] add test pastastore for update tests --- tests/data/test_hpd_update.zip | Bin 0 -> 4310 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/data/test_hpd_update.zip diff --git a/tests/data/test_hpd_update.zip b/tests/data/test_hpd_update.zip new file mode 100644 index 0000000000000000000000000000000000000000..587c2f9ad3aa5122277290d1b08f67cd2e020bae GIT binary patch literal 4310 zcmai2c|4Ts-yT`B<=B}hdm)T{*VqSRne3A_G}#$t7=D$l$U3sGS+i#=jBR2_ksPvQ zVnWEi6q0zSbKdt=-qU&S&vQS|=lSRQT+emi*Y~==kEtO!1uFmmpaJk%QrWz<8&XT6 z0sxG70RYkk@Ikuz-*81r>Orlf{vI;&G9V=fkc2zZ$J;9dTqi8T7_t86@+QUk%S1ZV z$Hp`y)4g?d8}B;(n`HvQ+mV_V%}(Dx`u=@3LE}hN<7lFx<;d#x!QpPgQgK?dC&R|M zjw8!MEa7mQ1^qroWs~#bl!c!fO3q<>2CNF*AatWWK0lg_qUso5-r`6|E=(di_ysR7%hl&t}q-nSwXo|HWD=KnOhzG7L>wHQ}HcWda@JjKT zfqYauxV``rmy%xQQe1>nft?nr<+{~29*s4ykG&~5O(vbw!on%`U0N%sXa;?`RvR@!gX{((+(Q32-d&FprqKU%iU08ZMbePcxw z+r;#y8l+Fs5|DmM%~dn0h-pE#wdTD}OYsLfTpZs@9Je>2PI4*r)&8CY&oW0fUXUlm zD}?2Y-H7!4Kc~ zU<*XqE0AE$2ksYnRq$}3$uf#GQ|K)gDh88LiohqK9jCmTa;U!Px8P25vI6~fStSjh z@>aIhBk1NoeAo}kba^Jwe7$X#B>HSKa*<{}=*9-)V3K-xpmop6M8RufcHoy*w_x7b z?P?75>;}cE80S5FRZ>nxsLr%hZ+v@@-v)!EFMSrcmc>e;IV4AM?~^XI#>7P?mPE@f z;WC^_>$+bRJPN)soQ0*-@O#ebRf=Ih7yAxtjK=i1;*}cRC)~ZvpBPG3-pr_JPsEA>RDLv)FPWW-iAFbG;ywU0;iHa z-pe?ZQMzUm7AV0u^}+C3Dlvy+jK~u|Z*7?pa^nf3gIGP~vTuCeCF_M5B2WABhIvDV zQ`X0+j*v0vc1ijWYT}}OcN2?r?5Y@MVYXBCid4cy)eD)<2I?%RufLf6tw$09mXTRXJ*p#$` z9SlKqTu?S8YDLSMR*ewP*JO&^CED9hdtO-x1(oebKzlf5J7&NH3rrc$XhvEckiM6@ zf1kZlV931N*sNbT&jv1Kg+7xq7dkum<|wXbtrLOX6wU>2$u{TOWjdvcKwwV1xLzT; znq_hBbu(Nm6UgsjorLZ!% zNw&X633)1$WsqybjOv%=-hn*nQh&x=S~OTq$-7TqbI8#1%>?;`sed{qg92%c3D)Si z9fDk8V>Jo5B$ePO5*5@A@g1!aheQUAUe{TYiK+3y&~DSHPq)dZ>q!9Sy=C-y-F4VR zuE4GE$Y{W~e%S1VK1={gq{~of&`EV^`^k9TK2>FXhf zH?pXkn6v4WFalaEZ)ABP`QzA@)O%64&%993Cm7C8UD;m$JMf@C(m-sJF>Ha3=;*a- zZ@fTFPHYz3$FF`he1SKP)hi+DtX$cEN54Ja91Y@m>cb2NW7-Ol15o9XIn*cb%&(ll z?Yp$M+<5E7+%OAYMIri7N)>o^bxcl-eJbnw=JKgL4Td&>hNJDlBQn!p!mJJ#&(cQ< zF)$ebAnXqqR-7?jul zsmoBJ%ApnZc97v$B?jN@ZMdBR061v>CDx@C{wLN`RpQTr=(X3SYn@D)@q_lALp_Y4 ziTJ7ao(Tqaz8+FTlE`YOIh$Q_r2*Z->4Z%-x@bF6+r& zzRo1~#=HPI1d2eA%faZn%M)$SThc9*-0@cUd^O;y@yK+5u5E(OyZpHy<-xYoxT81Ki`uPd3-W zUeTxRMO=ZDKf1$JB=o{DuA5@@ouC{JWSf|Ic>v2R-PFeyzC9Ai0BTaWyc3om&C1?& z7G~ny$CxC?!*Pp7dgLg7{yF1BL#%*op9_4L5kt#4_l96fMeMj;p zj1C2`Gew*~n){)+|L}x0gIs@~`yvXXBWRkD^y9`=K~^$Ac=QUoR+xuatq_UxP*OFZ zVJo=6m;F|X=vf&R<&g}dlFi1s`&gG=7<`j>Lz^10LtZtYH3F`BI$Gk9I;3-76j8NJ zekrd}&-Ke$>)WuZD$1oruLwe|^}?fp`vuZD-);RK2qP%W%;mAna;C{F(88COJvNzj zTpJrb)fdr0hYmh=>;*62>faQa6z~!5+N^Y_Ph{^(KK?PyN}b`#e7oB%V~z^=;8Q0@lZUjTlBA{*R0GjkH1(Mq^b&zH3@)+p(t=gzNzOD;O_KE@MekT>^-J*&a z>+Rm@&@>tj!CIvu{YUkNdFT+}NBWLqFe<#BR%%yB7RNaTAy;j`_{Q(inpN_pKFe{8 z+y=FDqc@A+1cuqHyS>L^Bi7eDe%+zeFBBSjNgmbsH?RKKp~qge_}Qx-;7-T?_G&DR z%CT1^N5K9*A-P!edb<_0dS}X;JC?c|agS1~vXq>yYgoLau1MRLPZ`&gU*Lc=I=5B% zmsJ~9!$bF3OEk9p$F2Uds@t>Bnm7AKp>NjseH@K8Ykc%%Ih>Rt3oLh(uBL;^8X)uvlpR|iwMw$(@cuGU6AHWn2vt2O}2Wp3Hh zpSILEp+nxpJ+-?e9WIk};3Pef0sgK?BO<*;zv2`tPc{LpI?S>{Y5^6s=19DXtyO82mbV-lSkD`lfdGSET8hQ3MDo8ZE^oN zw}|=gCt|`mtjy*niL*LMa_*nG>sU(5{vx{Tpc>TnDTiLza=B= z%Nhkg+uXZJ5%{3}t1XuxyR_{Ptj5a*c7K|)cTG}GbEt>LjVJ*P8{Z?Wt1joVNcWWx zUtqc8>NA!l3mK8KTDqU7pizyYNtL~@L#@-^)E;304|X0L9gaU$_*H12;6wW`63J(z zdG2>)f0nwI_PG>m=5vC&CNC-?L*&Lf7ODE*A{Gbu{a TTL1WeK?86k9dR5Pf4uz{p>jMB literal 0 HcmV?d00001 From cf350d68b86e6e29a751aa756cfb49ff197377b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 6 Aug 2024 11:40:56 +0200 Subject: [PATCH 12/31] update the pastastore version --- pastastore/version.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pastastore/version.py b/pastastore/version.py index 1faf70b..7261c3f 100644 --- a/pastastore/version.py +++ b/pastastore/version.py @@ -9,7 +9,7 @@ PASTAS_LEQ_022 = PASTAS_VERSION <= parse_version("0.22.0") PASTAS_GEQ_150 = PASTAS_VERSION >= parse_version("1.5.0") -__version__ = "1.5.0" +__version__ = "1.6.0" def show_versions(optional=False) -> None: @@ -35,4 +35,6 @@ def show_versions(optional=False) -> None: except ImportError: msg += "Not Installed" + msg += f"\nPastastore version : {__version__}" + print(msg) From 200018214642d6fa2d269a7611ee69f3ae416c63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 6 Aug 2024 14:36:21 +0200 Subject: [PATCH 13/31] 1.5.0 should be the new version --- pastastore/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pastastore/version.py b/pastastore/version.py index 7261c3f..3b1b97a 100644 --- a/pastastore/version.py +++ b/pastastore/version.py @@ -9,7 +9,7 @@ PASTAS_LEQ_022 = PASTAS_VERSION <= parse_version("0.22.0") PASTAS_GEQ_150 = PASTAS_VERSION >= parse_version("1.5.0") -__version__ = "1.6.0" +__version__ = "1.5.0" def show_versions(optional=False) -> None: From 8a0e0952a7f085b70ab2d6a42772752d0867b864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 6 Aug 2024 14:37:10 +0200 Subject: [PATCH 14/31] Revert "1.5.0 should be the new version" This reverts commit 200018214642d6fa2d269a7611ee69f3ae416c63. --- pastastore/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pastastore/version.py b/pastastore/version.py index 3b1b97a..7261c3f 100644 --- a/pastastore/version.py +++ b/pastastore/version.py @@ -9,7 +9,7 @@ PASTAS_LEQ_022 = PASTAS_VERSION <= parse_version("0.22.0") PASTAS_GEQ_150 = PASTAS_VERSION >= parse_version("1.5.0") -__version__ = "1.5.0" +__version__ = "1.6.0" def show_versions(optional=False) -> None: From f9e133ff5e198e09060083087d16452eec87bc42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Fri, 9 Aug 2024 16:19:14 +0200 Subject: [PATCH 15/31] suggestion martin --- pastastore/extensions/hpd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pastastore/extensions/hpd.py b/pastastore/extensions/hpd.py index c3ff8ed..da5031a 100644 --- a/pastastore/extensions/hpd.py +++ b/pastastore/extensions/hpd.py @@ -372,7 +372,7 @@ def update_knmi_meteo( def download_bro_gmw( self, - extent: Optional[List[float | int]] = None, + extent: Optional[List[float]] = None, tmin: TimeType = None, tmax: TimeType = None, update: bool = False, From e8e15d4288d735087d776175e8cff1f1fb753372 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Fri, 9 Aug 2024 16:19:44 +0200 Subject: [PATCH 16/31] show pastastore version on top --- pastastore/version.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pastastore/version.py b/pastastore/version.py index 7261c3f..2b3dc6a 100644 --- a/pastastore/version.py +++ b/pastastore/version.py @@ -21,6 +21,7 @@ def show_versions(optional=False) -> None: Print the version of optional dependencies, by default False """ msg = ( + f"Pastastore version : {__version__}\n\n" f"Python version : {python_version()}\n" f"Pandas version : {metadata.version('pandas')}\n" f"Matplotlib version : {metadata.version('matplotlib')}\n" @@ -35,6 +36,4 @@ def show_versions(optional=False) -> None: except ImportError: msg += "Not Installed" - msg += f"\nPastastore version : {__version__}" - print(msg) From 12ed9393e67c80091192084b9044747ae862f800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Fri, 9 Aug 2024 17:02:34 +0200 Subject: [PATCH 17/31] add methods get_stressmodel and add_stressmodel - allow adding/getting stressmodels from pastastore using stress names - support nearest selection - add tests - update `add_recharge` to use new methods. --- pastastore/store.py | 377 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 338 insertions(+), 39 deletions(-) diff --git a/pastastore/store.py b/pastastore/store.py index 4c4f4ad..012f4a7 100644 --- a/pastastore/store.py +++ b/pastastore/store.py @@ -813,50 +813,349 @@ def add_recharge( recharge_name : str name of the RechargeModel """ - # get nearest prec and evap stns - if "prec" not in self.stresses.kind.values: - raise ValueError( - "No stresses with kind='prec' found in store. " - "add_recharge() requires stresses with kind='prec'!" - ) - if "evap" not in self.stresses.kind.values: - raise ValueError( - "No stresses with kind='evap' found in store. " - "add_recharge() requires stresses with kind='evap'!" - ) - names = [] - for var in ("prec", "evap"): - try: - name = self.get_nearest_stresses(ml.oseries.name, kind=var).iloc[0, 0] - except AttributeError as e: - msg = "No precipitation or evaporation time series found!" - raise Exception(msg) from e - if isinstance(name, float): - if np.isnan(name): + if recharge is None: + recharge = ps.rch.Linear() + if rfunc is None: + rfunc = ps.Exponential + + self.add_stressmodel( + ml, + stresses={"prec": "nearest", "evap": "nearest"}, + rfunc=rfunc, + stressmodel=ps.RechargeModel, + stressmodel_name=recharge_name, + recharge=recharge, + ) + + def _parse_stresses( + self, + stresses: Union[str, List[str], Dict[str, str]], + kind: Optional[str], + stressmodel, + oseries: Optional[str] = None, + ): + # parse stresses for RechargeModel, allow list of len 2 or 3 and + # set correct kwarg names + if stressmodel._name == "RechargeModel": + if isinstance(stresses, list): + if len(stresses) == 2: + stresses = { + "prec": stresses[0], + "evap": stresses[1], + } + elif len(stresses) == 3: + stresses = { + "prec": stresses[0], + "evap": stresses[1], + "temp": stresses[2], + } + else: raise ValueError( - f"Unable to find nearest '{var}' stress! " - "Check x and y coordinates." + "RechargeModel requires 2 or 3 stress names, " + f"got: {len(stresses)}!" ) + # if stresses is list, create dictionary normally + elif isinstance(stresses, list): + stresses = {"stress": stresses} + # if stresses is str, make it a list of len 1 + elif isinstance(stresses, str): + stresses = {"stress": [stresses]} + + # check if stresses is a dictionary, else raise TypeError + if not isinstance(stresses, dict): + raise TypeError("stresses must be a list, string or dictionary!") + + # if no kind specified, set to well for WellModel + if stressmodel._name == "WellModel": + if kind is None: + kind = "well" + + # store a copy of the user input for kind + if isinstance(kind, list): + _kind = kind.copy() + else: + _kind = kind + + # create empty list for gathering metadata + metadata = [] + # loop over stresses keys/values + for i, (k, v) in enumerate(stresses.items()): + # if entry in dictionary is str, make it list of len 1 + if isinstance(v, str): + v = [v] + # parse value + if isinstance(v, list): + for item in v: + names = [] # empty list for names + # parse nearest + if item.startswith("nearest"): + # check oseries defined if nearest option is used + if not oseries: + raise ValueError( + "Getting nearest stress(es) requires oseries name!" + ) + try: + if len(item.split()) == 3: # nearest + n = int(item.split()[1]) + kind = item.split()[2] + elif len(item.split()) == 2: # nearest | + try: + n = int(item.split()[1]) # try converting to + except ValueError: + n = 1 + kind = item.split()[1] # interpret as + else: # nearest + n = 1 + # if RechargeModel, we can infer kind + if ( + _kind is None + and stressmodel._name == "RechargeModel" + ): + kind = k + elif _kind is None: # catch no kind with bare nearest + raise ValueError( + "Bare 'nearest' found but no kind specified." + ) + elif isinstance(_kind, list): + kind = _kind[i] # if multiple kind, select i-th + except Exception as e: + # raise if nearest parsing failed + raise ValueError( + f"Could not parse stresses: '{item}'! " + "When using option 'nearest', use 'nearest' and specify" + " kind, or 'nearest ' or 'nearest '!" + ) from e + # check if kind exists at all + if kind not in self.stresses.kind.values: + raise ValueError( + f"Could not find stresses with kind='{kind}'!" + ) + # get stress names of nearest stresses + inames = self.get_nearest_stresses( + oseries, kind=kind, n=n + ).iloc[0] + # check if any NaNs in result + if inames.isna().any(): + nkind = (self.stresses.kind == kind).sum() + raise ValueError( + f"Could not find {n} nearest stress(es) for '{kind}'! " + f"There are only {nkind} '{kind}' stresses." + ) + # append names + names += inames.tolist() + else: + # assume name is name of stress + names.append(item) + # get stresses and metadata + stress_series, imeta = self.get_stresses( + names, return_metadata=True, squeeze=True + ) + # replace stress name(s) with time series + if len(names) > 1: + stresses[k] = list(stress_series.values()) + else: + stresses[k] = stress_series + # gather metadata + if isinstance(imeta, list): + metadata += imeta + else: + metadata.append(imeta) + + return stresses, metadata + + def get_stressmodel( + self, + stresses: Union[str, List[str], Dict[str, str]], + stressmodel=ps.StressModel, + stressmodel_name: Optional[str] = None, + rfunc=ps.Exponential, + rfunc_kwargs: Optional[dict] = None, + kind: Optional[Union[List[str], str]] = None, + oseries: Optional[str] = None, + **kwargs, + ): + """Get a Pastas stressmodel from stresses time series in Pastastore. + + Supports "nearest" selection. Any stress name can be replaced by + "nearest [] " where is optional and represents the number of + nearest stresses and and represents the kind of stress to + consider. can also be specified directly with the `kind` kwarg. + + Note: the 'nearest' option requires the oseries name to be provided. + Additionally, 'x' and 'y' metadata must be stored for oseries and stresses. + + Parameters + ---------- + stresses : str, list of str, or dict + name(s) of the time series to use for the stressmodel, or dictionary + with key(s) and value(s) as time series name(s). Options include: + - name of stress: `"prec_stn"` + - list of stress names: `["prec_stn", "evap_stn"]` + - dict for RechargeModel: `{"prec": "prec_stn", "evap": "evap_stn"}` + - dict for StressModel: `{"stress": "well1"}` + - nearest, specifying kind: `"nearest well"` + - nearest specifying number and kind: `"nearest 2 well"` + stressmodel : str or class + stressmodel class to use, by default ps.StressModel + stressmodel_name : str, optional + name of the stressmodel, by default None, which uses the stress name, + if there is 1 stress otherwise the name of the stressmodel type. For + RechargeModels, the name defaults to 'recharge'. + rfunc : str or class + response function class to use, by default ps.Exponential + rfunc_kwargs : dict, optional + keyword arguments to pass to the response function, by default None + kind : str or list of str, optional + specify kind of stress(es) to use, by default None, useful in combination + with 'nearest' option for defining stresses + oseries : str, optional + name of the oseries to use for the stressmodel, by default None, used when + 'nearest' option is used for defining stresses. + **kwargs + additional keyword arguments to pass to the stressmodel + + Returns + ------- + stressmodel : pastas.StressModel + pastas StressModel that can be added to pastas Model. + """ + # get stressmodel class, if str was provided + if isinstance(stressmodel, str): + stressmodel = getattr(ps, stressmodel) + + # parse stresses names to get time series and metadata + stresses, metadata = self._parse_stresses( + stresses=stresses, stressmodel=stressmodel, kind=kind, oseries=oseries + ) + + # get stressmodel name if not provided + if stressmodel_name is None: + if stressmodel._name == "RechargeModel": + stressmodel_name = "recharge" + elif len(metadata) == 1: + stressmodel_name = stresses["stress"].squeeze().name else: - names.append(name) - if len(names) == 0: - msg = "No precipitation or evaporation time series found!" - raise Exception(msg) - - # get data - tsdict = self.conn.get_stresses(names) - metadata = self.conn.get_metadata("stresses", names, as_frame=False) - # add recharge to model - rch = ps.RechargeModel( - tsdict[names[0]], - tsdict[names[1]], - rfunc=rfunc, - name=recharge_name, - recharge=recharge, - settings=("prec", "evap"), + stressmodel_name = stressmodel._name + + # check if metadata is list of len 1 and unpack + if isinstance(metadata, list) and len(metadata) == 1: + metadata = metadata[0] + + # get stressmodel time series settings + if kind and "settings" not in kwargs: + # try using kind to get predefined settings options + if isinstance(kind, str): + kwargs["settings"] = ps.rcParams["timeseries"].get(kind, None) + else: + kwargs["settings"] = [ + ps.rcParams["timeseries"].get(ikind, None) for ikind in kind + ] + elif kind is None and "settings" not in kwargs: + # try using kind stored in metadata to get predefined settings options + if isinstance(metadata, list): + kwargs["settings"] = [ + ps.rcParams["timeseries"].get(imeta.get("kind", None), None) + for imeta in metadata + ] + elif isinstance(metadata, dict): + kwargs["settings"] = ps.rcParams["timeseries"].get( + metadata.get("kind", None), None + ) + + # get rfunc class if str was provided + if isinstance(rfunc, str): + rfunc = getattr(ps, rfunc) + + # create empty rfunc_kwargs if not provided + if rfunc_kwargs is None: + rfunc_kwargs = {} + + # special for WellModels + if stressmodel._name == "WellModel": + names = [s.squeeze().name for s in stresses["stress"]] + # check oseries is provided + if oseries is None: + raise ValueError("WellModel requires 'oseries' to compute distances!") + # compute distances and add to kwargs + distances = ( + self.get_distances(oseries=oseries, stresses=names).T.squeeze().values + ) + kwargs["distances"] = distances + # set settings to well + if "settings" not in kwargs: + kwargs["settings"] = "well" + # override rfunc and set to HantushWellModel + rfunc = ps.HantushWellModel + + return stressmodel( + **stresses, + rfunc=rfunc(**rfunc_kwargs), + name=stressmodel_name, metadata=metadata, + **kwargs, + ) + + def add_stressmodel( + self, + ml: ps.Model, + stresses: Union[str, List[str], Dict[str, str]], + stressmodel=ps.StressModel, + stressmodel_name: Optional[str] = None, + rfunc=ps.Exponential, + rfunc_kwargs: Optional[dict] = None, + kind: Optional[Union[List[str], str]] = None, + **kwargs, + ): + """Add a pastas StressModel from stresses time series in Pastastore. + + Supports "nearest" selection. Any stress name can be replaced by + "nearest [] " where is optional and represents the number of + nearest stresses and and represents the kind of stress to + consider. can also be specified directly with the `kind` kwarg. + + Note: the 'nearest' option requires the oseries name to be provided. + Additionally, 'x' and 'y' metadata must be stored for oseries and stresses. + + Parameters + ---------- + ml : pastas.Model + pastas.Model object to add StressModel to + stresses : str, list of str, or dict + name(s) of the time series to use for the stressmodel, or dictionary + with key(s) and value(s) as time series name(s). Options include: + - name of stress: `"prec_stn"` + - list of stress names: `["prec_stn", "evap_stn"]` + - dict for RechargeModel: `{"prec": "prec_stn", "evap": "evap_stn"}` + - dict for StressModel: `{"stress": "well1"}` + - nearest, specifying kind: `"nearest well"` + - nearest specifying number and kind: `"nearest 2 well"` + stressmodel : str or class + stressmodel class to use, by default ps.StressModel + stressmodel_name : str, optional + name of the stressmodel, by default None, which uses the stress name, + if there is 1 stress otherwise the name of the stressmodel type. For + RechargeModels, the name defaults to 'recharge'. + rfunc : str or class + response function class to use, by default ps.Exponential + rfunc_kwargs : dict, optional + keyword arguments to pass to the response function, by default None + kind : str or list of str, optional + specify kind of stress(es) to use, by default None, useful in combination + with 'nearest' option for defining stresses + **kwargs + additional keyword arguments to pass to the stressmodel + """ + sm = self.get_stressmodel( + stresses=stresses, + stressmodel=stressmodel, + stressmodel_name=stressmodel_name, + rfunc=rfunc, + rfunc_kwargs=rfunc_kwargs, + kind=kind, + oseries=ml.oseries.name, + **kwargs, ) - ml.add_stressmodel(rch) + ml.add_stressmodel(sm) def solve_models( self, From a61dc7eb971f9590095f9e0557611adbf2ff6968 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Fri, 9 Aug 2024 17:02:47 +0200 Subject: [PATCH 18/31] add get_stressmodel tests --- tests/test_008_stressmodels.py | 128 +++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 tests/test_008_stressmodels.py diff --git a/tests/test_008_stressmodels.py b/tests/test_008_stressmodels.py new file mode 100644 index 0000000..f44caf4 --- /dev/null +++ b/tests/test_008_stressmodels.py @@ -0,0 +1,128 @@ +# ruff: noqa: D100 D103 +import pastas as ps +import pytest + + +def test_stressmodel_time_series_name(pstore): + pstore.get_stressmodel("evap1") + + +def test_stressmodel_override_settings(pstore): + sm = pstore.get_stressmodel("prec1", settings="evap") + assert sm.stress[0].settings["fill_nan"] == "interpolate" + + # override settings by passing kind: bit weird, but good to check + sm = pstore.get_stressmodel("prec1", kind="evap") + assert sm.stress[0].settings["fill_nan"] == "interpolate" + + +def test_stressmodel_rfunc_kwargs(pstore): + sm = pstore.get_stressmodel("well1", rfunc=ps.Hantush, rfunc_kwargs={"quad": True}) + assert sm.rfunc.quad + + +def test_stressmodel_nearest_kind_no_oseries_specified(pstore): + with pytest.raises(ValueError, match=r"Getting nearest stress*"): + pstore.get_stressmodel("nearest evap") # error oseries + + +def test_stressmodel_nearest_no_kind_specified(pstore): + with pytest.raises(ValueError, match=r"Could not parse stresses*"): + pstore.get_stressmodel("nearest", oseries="oseries1") + + # also in dictionary mode + with pytest.raises(ValueError, match=r"Could not parse stresses*"): + pstore.get_stressmodel({"stress": ["nearest"]}, oseries="oseries1") + + +def test_stressmodel_nearest_kind(pstore): + # nearest kind + sm = pstore.get_stressmodel("nearest evap", oseries="oseries1") + assert sm.stress[0].name == "evap1" + + # nearest kind in dict + sm = pstore.get_stressmodel({"stress": "nearest evap"}, oseries="oseries1") + assert sm.stress[0].name == "evap1" + + # nearest and kind separate + sm = pstore.get_stressmodel({"stress": "nearest"}, kind="evap", oseries="oseries1") + assert sm.stress[0].name == "evap1" + + # nearest in dict and kind separate + sm = pstore.get_stressmodel( + {"stress": ["nearest"]}, kind="evap", oseries="oseries1" + ) + assert sm.stress[0].name == "evap1" + + +def test_recharge_model(pstore): + # test list of stress names + rm = pstore.get_stressmodel(["prec1", "evap1"], stressmodel="RechargeModel") + assert rm.stress[0].name == "prec1" + assert rm.stress[1].name == "evap1" + + # test list of nearest + rm = pstore.get_stressmodel( + ["nearest prec", "nearest evap"], + stressmodel="RechargeModel", + oseries="oseries1", + ) + assert rm.stress[0].name == "prec1" + assert rm.stress[1].name == "evap1" + + # test dict, no kind specified + rm = pstore.get_stressmodel( + {"prec": "nearest", "evap": "nearest"}, + stressmodel="RechargeModel", + oseries="oseries1", + ) + assert rm.stress[0].name == "prec1" + assert rm.stress[1].name == "evap1" + + # test list, bare nearest with kind specified + rm = pstore.get_stressmodel( + ["nearest", "nearest"], + kind=["prec", "evap"], + stressmodel="RechargeModel", + oseries="oseries1", + ) + assert rm.stress[0].name == "prec1" + assert rm.stress[1].name == "evap1" + + +def test_wellmodel(pstore): + # test nearest + wm = pstore.get_stressmodel( + "nearest 2 well", + stressmodel="WellModel", + oseries="oseries1", + ) + assert wm.stress[0].name == "well1" + assert wm.stress[1].name == "well2" + + # test nearest with no kind specified + wm = pstore.get_stressmodel( + "nearest 2", + stressmodel="WellModel", + oseries="oseries1", + ) + assert wm.stress[0].name == "well1" + assert wm.stress[1].name == "well2" + + # test nearest n, with non-existing kind specified + with pytest.raises(ValueError, match=r"Could not find stresses*"): + pstore.get_stressmodel( + "nearest 2", + kind="well2", + stressmodel="WellModel", + oseries="oseries1", + ) + + # test nearest n with n exceeded + with pytest.raises(ValueError, match=r"Could not find*"): + pstore.get_stressmodel( + "nearest 3", + kind="well", + stressmodel="WellModel", + oseries="oseries1", + ) From 821e7304d6e04149afe521834c397b152a966484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Fri, 9 Aug 2024 17:03:01 +0200 Subject: [PATCH 19/31] typo in docs --- docs/examples.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/examples.rst b/docs/examples.rst index e04b060..dc43b96 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -19,7 +19,7 @@ is stored in-memory (in dictionaries):: import pastastore as pst # define dict connector - conn = pst.DictConnect("my_db") + conn = pst.DictConnector("my_db") # create project for managing Pastas data and models store = pst.PastaStore(conn) From a64841f84fe8e4ff076372750a9cb9b632127391 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Fri, 9 Aug 2024 17:05:15 +0200 Subject: [PATCH 20/31] update notebooks --- ...introduction_to_pastastore_databases.ipynb | 14 ++++++++++--- .../ex02_pastastore_plots_and_maps.ipynb | 20 +++++++++++++------ .../ex03_pastastore_yaml_interface.ipynb | 10 +++++++++- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/examples/notebooks/ex01_introduction_to_pastastore_databases.ipynb b/examples/notebooks/ex01_introduction_to_pastastore_databases.ipynb index a0ba8b6..4fee571 100644 --- a/examples/notebooks/ex01_introduction_to_pastastore_databases.ipynb +++ b/examples/notebooks/ex01_introduction_to_pastastore_databases.ipynb @@ -59,6 +59,13 @@ "PyYAML version : 6.0\n", "\n" ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DeprecationWarning: As of Pastas 1.5, no noisemodel is added to the pastas Model class by default anymore. To solve your model using a noisemodel, you have to explicitly add a noisemodel to your model before solving. For more information, and how to adapt your code, please see this issue on GitHub: https://github.com/pastas/pastas/issues/735" + ] } ], "source": [ @@ -1190,7 +1197,7 @@ "- `solve_models`: solve all or selection of models in database\n", "- `model_results`: get results for all or selection of models in database. Requires the `art_tools` module!\n", "\n", - "Let's add some more data to the pystore to show how the bulk operations work." + "Let's add some more data to the pastastore to show how the bulk operations work." ] }, { @@ -1202,6 +1209,7 @@ "name": "stderr", "output_type": "stream", "text": [ + "The Time Series 'oseries2' has nan-values. Pastas will use the fill_nan settings to fill up the nan-values.\n", "The Time Series 'oseries2' has nan-values. Pastas will use the fill_nan settings to fill up the nan-values.\n" ] } @@ -1285,7 +1293,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "15ac33d49a044a7a8367b8145287cc55", + "model_id": "3a1f6710cd1942a78adc916d56280bb4", "version_major": 2, "version_minor": 0 }, @@ -1342,7 +1350,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a694566aa81a4df69a55e4c0d28397c8", + "model_id": "577068d685134722a0208d807122da7b", "version_major": 2, "version_minor": 0 }, diff --git a/examples/notebooks/ex02_pastastore_plots_and_maps.ipynb b/examples/notebooks/ex02_pastastore_plots_and_maps.ipynb index 74006d1..74e175f 100644 --- a/examples/notebooks/ex02_pastastore_plots_and_maps.ipynb +++ b/examples/notebooks/ex02_pastastore_plots_and_maps.ipynb @@ -29,7 +29,15 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DeprecationWarning: As of Pastas 1.5, no noisemodel is added to the pastas Model class by default anymore. To solve your model using a noisemodel, you have to explicitly add a noisemodel to your model before solving. For more information, and how to adapt your code, please see this issue on GitHub: https://github.com/pastas/pastas/issues/735" + ] + } + ], "source": [ "import pandas as pd\n", "import pastas as ps\n", @@ -588,7 +596,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -868,7 +876,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "77a0d79a996643ba8da6de131d54809c", + "model_id": "79bfed92005546b5a2759a9d533272ea", "version_major": 2, "version_minor": 0 }, @@ -944,7 +952,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "429ee4b61c594916b272adb8dc6e7bae", + "model_id": "cca422c5cb3e44129ffc671dd3dba4e6", "version_major": 2, "version_minor": 0 }, @@ -979,7 +987,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0fe32202045340b89af9dbda195a5b02", + "model_id": "2a32a3c8cbdf4b14af5995534944b418", "version_major": 2, "version_minor": 0 }, @@ -1014,7 +1022,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "24233327b1b148409e2c80572084b16d", + "model_id": "92c6bf5df3b3447aabdb61ec1885bda8", "version_major": 2, "version_minor": 0 }, diff --git a/examples/notebooks/ex03_pastastore_yaml_interface.ipynb b/examples/notebooks/ex03_pastastore_yaml_interface.ipynb index 32ed85f..fdbae07 100644 --- a/examples/notebooks/ex03_pastastore_yaml_interface.ipynb +++ b/examples/notebooks/ex03_pastastore_yaml_interface.ipynb @@ -33,7 +33,15 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DeprecationWarning: As of Pastas 1.5, no noisemodel is added to the pastas Model class by default anymore. To solve your model using a noisemodel, you have to explicitly add a noisemodel to your model before solving. For more information, and how to adapt your code, please see this issue on GitHub: https://github.com/pastas/pastas/issues/735" + ] + } + ], "source": [ "import logging\n", "import os\n", From e7e5f5ee11a128a6d8d25605d51b06a4ed780b92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Fri, 9 Aug 2024 17:10:24 +0200 Subject: [PATCH 21/31] remove PystoreConnector and ArcticConnector and all remaining references --- .gitignore | 1 - pastastore/base.py | 3 +- pastastore/connectors.py | 418 ----------------------------------- pastastore/datasets.py | 10 +- pastastore/store.py | 5 +- pastastore/util.py | 114 +--------- pyproject.toml | 2 - tests/conftest.py | 29 +-- tests/test_002_connectors.py | 9 +- 9 files changed, 12 insertions(+), 579 deletions(-) diff --git a/.gitignore b/.gitignore index ddaa85a..0d77409 100644 --- a/.gitignore +++ b/.gitignore @@ -115,6 +115,5 @@ venv.bak/ .idea/workspace.xml */_old -examples/notebooks/pystore cov.xml *.bat diff --git a/pastastore/base.py b/pastastore/base.py index dbf4d8c..1e71486 100644 --- a/pastastore/base.py +++ b/pastastore/base.py @@ -1471,6 +1471,7 @@ def _parse_model_dict(self, mdict: dict, update_ts_settings: bool = False): else: msg = "stress '{}' not present in library".format(name) raise KeyError(msg) + # hack for pcov w dtype object (when filled with NaNs on store?) if "fit" in mdict: if "pcov" in mdict["fit"]: @@ -1534,7 +1535,7 @@ def _set_series_name(series, name): if isinstance(series, pd.Series): series.name = name # empty string on index name causes trouble when reading - # data from Arctic VersionStores + # data from ArcticDB: TODO: check if still an issue? if series.index.name == "": series.index.name = None diff --git a/pastastore/connectors.py b/pastastore/connectors.py index 6a8f83b..99ca25d 100644 --- a/pastastore/connectors.py +++ b/pastastore/connectors.py @@ -4,7 +4,6 @@ import os import warnings from copy import deepcopy -from importlib import import_module from typing import Dict, Optional, Union import pandas as pd @@ -17,195 +16,6 @@ warnings.showwarning = _custom_warning -class ArcticConnector(BaseConnector, ConnectorUtil): # pragma: no cover - """ArcticConnector object that connects to a running MongoDB database via Arctic.""" - - conn_type = "arctic" - - def __init__(self, name: str, connstr: str): - """Create an ArcticConnector object that connects to a MongoDB database. - - Parameters - ---------- - name : str - name of the database - connstr : str - connection string (e.g. 'mongodb://localhost:27017/') - """ - warnings.warn( - "ArcticConnector is deprecated. Please use a different " - "connector, e.g. `pst.ArcticDBConnector`.", - DeprecationWarning, - stacklevel=1, - ) - try: - import arctic - except ModuleNotFoundError as e: - print( - "Please install arctic (also requires " - "a MongoDB instance running somewhere, e.g. " - "MongoDB Community: \n" - "https://docs.mongodb.com/manual/administration" - "/install-community/)!" - ) - raise e - self.connstr = connstr - self.name = name - - self.libs: dict = {} - self.arc = arctic.Arctic(connstr) - self._initialize() - self.models = ModelAccessor(self) - # for older versions of PastaStore, if oseries_models library is empty - # populate oseries - models database - self._update_all_oseries_model_links() - - def _initialize(self) -> None: - """Initialize the libraries (internal method).""" - for libname in self._default_library_names: - if self._library_name(libname) not in self.arc.list_libraries(): - self.arc.initialize_library(self._library_name(libname)) - else: - print( - f"ArcticConnector: library " - f"'{self._library_name(libname)}'" - " already exists. Linking to existing library." - ) - self.libs[libname] = self._get_library(libname) - - def _library_name(self, libname: str) -> str: - """Get full library name according to Arctic (internal method).""" - return ".".join([self.name, libname]) - - def _get_library(self, libname: str): - """Get Arctic library handle. - - Parameters - ---------- - libname : str - name of the library - - Returns - ------- - lib : arctic.Library handle - handle to the library - """ - # get library handle - lib = self.arc.get_library(self._library_name(libname)) - return lib - - def _add_item( - self, - libname: str, - item: Union[FrameorSeriesUnion, Dict], - name: str, - metadata: Optional[Dict] = None, - **_, - ) -> None: - """Add item to library (time series or model) (internal method). - - Parameters - ---------- - libname : str - name of the library - item : Union[FrameorSeriesUnion, Dict] - item to add, either time series or pastas.Model as dictionary - name : str - name of the item - metadata : Optional[Dict], optional - dictionary containing metadata, by default None - """ - lib = self._get_library(libname) - lib.write(name, item, metadata=metadata) - - def _get_item(self, libname: str, name: str) -> Union[FrameorSeriesUnion, Dict]: - """Retrieve item from library (internal method). - - Parameters - ---------- - libname : str - name of the library - name : str - name of the item - - Returns - ------- - item : Union[FrameorSeriesUnion, Dict] - time series or model dictionary - """ - lib = self._get_library(libname) - return lib.read(name).data - - def _del_item(self, libname: str, name: str) -> None: - """Delete items (series or models) (internal method). - - Parameters - ---------- - libname : str - name of library to delete item from - name : str - name of item to delete - """ - lib = self._get_library(libname) - lib.delete(name) - - def _get_metadata(self, libname: str, name: str) -> dict: - """Retrieve metadata for an item (internal method). - - Parameters - ---------- - libname : str - name of the library - name : str - name of the item - - Returns - ------- - dict - dictionary containing metadata - """ - lib = self._get_library(libname) - return lib.read_metadata(name).metadata - - @property - def oseries_names(self): - """List of oseries names. - - Returns - ------- - list - list of oseries in library - """ - return self._get_library("oseries").list_symbols() - - @property - def stresses_names(self): - """List of stresses names. - - Returns - ------- - list - list of stresses in library - """ - return self._get_library("stresses").list_symbols() - - @property - def model_names(self): - """List of model names. - - Returns - ------- - list - list of models in library - """ - return self._get_library("models").list_symbols() - - @property - def oseries_with_models(self): - """List of oseries with models.""" - return self._get_library("oseries_models").list_symbols() - - class ArcticDBConnector(BaseConnector, ConnectorUtil): """ArcticDBConnector object using ArcticDB to store data.""" @@ -388,234 +198,6 @@ def oseries_with_models(self): return self._get_library("oseries_models").list_symbols() -class PystoreConnector(BaseConnector, ConnectorUtil): # pragma: no cover - """PystoreConnector object using pystore as database backend.""" - - conn_type = "pystore" - - def __init__(self, name: str, path: str): - """Create a PystoreConnector object that points to a Pystore. - - Parameters - ---------- - name : str - name of the store - path : str - path to the pystore directory - """ - warnings.warn( - "PystoreConnector is deprecated. Please use a different " - "connector, e.g. `pst.PasConnector`.", - DeprecationWarning, - stacklevel=1, - ) - try: - import pystore - except ModuleNotFoundError as e: - print( - "Install pystore, follow instructions at " - "https://github.com/ranaroussi/pystore#dependencies" - ) - raise e - self.name = name - self.path = path - pystore.set_path(self.path) - self.store = pystore.store(self.name) - self.libs: dict = {} - self._initialize() - self.models = ModelAccessor(self) - # for older versions of PastaStore, if oseries_models library is empty - # populate oseries - models database - self._update_all_oseries_model_links() - - def _initialize(self) -> None: - """Initialize the libraries (stores) (internal method).""" - for libname in self._default_library_names: - if libname in self.store.list_collections(): - print( - f"PystoreConnector: library '{self.path}/{libname}' " - "already exists. Linking to existing library." - ) - lib = self.store.collection(libname) - self.libs[libname] = lib - - def _get_library(self, libname: str): - """Get Pystore library handle. - - Parameters - ---------- - libname : str - name of the library - - Returns - ------- - Pystore.Collection handle - handle to the library - """ - # get library handle - lib = self.store.collection(libname) - return lib - - def _add_item( - self, - libname: str, - item: Union[FrameorSeriesUnion, Dict], - name: str, - metadata: Optional[Dict] = None, - overwrite: bool = False, - ) -> None: - """Add item to library (time series or model) (internal method). - - Parameters - ---------- - libname : str - name of the library - item : Union[FrameorSeriesUnion, Dict] - item to add, either time series or pastas.Model as dictionary - name : str - name of the item - metadata : Optional[Dict], optional - dictionary containing metadata, by default None - overwrite : bool, optional - overwrite item if it already exists, by default False. - """ - # convert to DataFrame because pystore doesn't accept pandas.Series - # (maybe has an easy fix, but converting w to_frame for now) - if isinstance(item, pd.Series): - s = item.to_frame(name=name) - is_type = "series" - elif isinstance(item, dict): - s = pd.DataFrame() # empty DataFrame as placeholder - jsondict = json.loads(json.dumps(item, cls=PastasEncoder, indent=4)) - metadata = jsondict # model dict is stored in metadata - is_type = "series" - elif isinstance(item, list): - s = pd.Series(item).to_frame(name="modelnames") - is_type = "list" - elif isinstance(item, pd.DataFrame): - s = item - is_type = "dataframe" - - # store info about input type to ensure same type is returned - if metadata is None: - metadata = {"_is_type": is_type} - else: - metadata["_is_type"] = is_type - - lib = self._get_library(libname) - lib.write(name, s, metadata=metadata, overwrite=overwrite) - - def _get_item(self, libname: str, name: str) -> Union[FrameorSeriesUnion, Dict]: - """Retrieve item from pystore library (internal method). - - Parameters - ---------- - libname : str - name of the library - name : str - name of the item - - Returns - ------- - item : Union[FrameorSeriesUnion, Dict] - time series or model dictionary - """ - load_mod = import_module("pastas.io.pas") # type: ignore - lib = self._get_library(libname) - # hack for storing models, stored as metadata - if libname == "models": - jsonpath = lib._item_path(name).joinpath("metadata.json") - s = load_mod.load(jsonpath) # type: ignore - else: - # read series and convert to pandas - item = lib.item(name) - s = item.to_pandas() - # remove _is_type key and return correct type - is_type = item.metadata.pop("_is_type") - if is_type == "series": - s = s.squeeze() - elif is_type == "list": - s = s["modelnames"].tolist() - return s - - def _del_item(self, libname: str, name: str) -> None: - """Delete data from the store (internal method). - - Parameters - ---------- - libname : str - name of the library - name : str - name of the item to delete - """ - lib = self._get_library(libname) - lib.delete_item(name) - self._clear_cache(libname) - - def _get_metadata(self, libname: str, name: str) -> dict: - """Read metadata from pystore (internal method). - - Parameters - ---------- - libname : str - name of the library the series are in ("oseries" or "stresses") - name : str - name of item to load metadata for - - Returns - ------- - imeta : dict - dictionary containing metadata - """ - from pystore.utils import read_metadata - - lib = self._get_library(libname) - imeta = read_metadata(lib._item_path(name)) - if "name" not in imeta.keys(): - imeta["name"] = name - if "_is_type" in imeta.keys(): - imeta.pop("_is_type") - return imeta - - @property - def oseries_names(self): - """List of oseries names. - - Returns - ------- - list - list of oseries in library - """ - return list(self._get_library("oseries").list_items()) - - @property - def stresses_names(self): - """List of stresses names. - - Returns - ------- - list - list of stresses in library - """ - return list(self._get_library("stresses").list_items()) - - @property - def model_names(self): - """List of model names. - - Returns - ------- - list - list of models in library - """ - return list(self._get_library("models").list_items()) - - @property - def oseries_with_models(self): - """List of oseries with models.""" - return list(self._get_library("oseries_models").list_items()) - - class DictConnector(BaseConnector, ConnectorUtil): """DictConnector object that stores timeseries and models in dictionaries.""" diff --git a/pastastore/datasets.py b/pastastore/datasets.py index ab4dce6..b8055c6 100644 --- a/pastastore/datasets.py +++ b/pastastore/datasets.py @@ -176,8 +176,7 @@ def _default_connector(conntype: str): Parameters ---------- conntype : str - name of connector (DictConnector, PasConnector, - ArcticConnector, ArcticDBConnector or PystoreConnector) + name of connector (DictConnector, PasConnector, ArcticDBConnector) Returns ------- @@ -185,14 +184,9 @@ def _default_connector(conntype: str): default Connector based on type. """ Conn = getattr(pst, conntype) - if Conn.conn_type == "arctic": - connstr = "mongodb://localhost:27017/" - conn = Conn("my_db", connstr) - elif Conn.conn_type == "arcticdb": + if Conn.conn_type == "arcticdb": uri = "lmdb://./arctic_db" conn = Conn("my_db", uri) - elif Conn.conn_type == "pystore": - conn = Conn("my_db", "./pystore_db") elif Conn.conn_type == "dict": conn = Conn("my_db") elif Conn.conn_type == "pas": diff --git a/pastastore/store.py b/pastastore/store.py index 012f4a7..d853eef 100644 --- a/pastastore/store.py +++ b/pastastore/store.py @@ -30,9 +30,8 @@ class PastaStore: the database. Different Connectors are available, e.g.: - PasConnector for storing all data as .pas (JSON) files on disk (recommended) - - DictConenctor for storing all data in dictionaries (in-memory) - - ArcticConnector for saving data to MongoDB using the Arctic module - - PystoreConnector for saving data to disk using the Pystore module + - ArcticDBConnector for saving data on disk using arcticdb package + - DictConnector for storing all data in dictionaries (in-memory) Parameters ---------- diff --git a/pastastore/util.py b/pastastore/util.py index 8da4823..0cd5f7d 100644 --- a/pastastore/util.py +++ b/pastastore/util.py @@ -1,7 +1,6 @@ """Useful utilities for pastastore.""" import os -import warnings from typing import Dict, List, Optional, Union import numpy as np @@ -24,111 +23,6 @@ class ItemInLibraryException(Exception): pass -# TODO: remove in future version -def delete_pystore_connector( - conn=None, - path: Optional[str] = None, - name: Optional[str] = None, - libraries: Optional[List[str]] = None, -) -> None: # pragma: no cover - """Delete libraries from pystore. - - Parameters - ---------- - conn : PystoreConnector, optional - PystoreConnector object - path : str, optional - path to pystore - name : str, optional - name of the pystore - libraries : Optional[List[str]], optional - list of library names to delete, by default None which deletes - all libraries - """ - warnings.warn( - "This function is deprecated. We recommend to migrate to a different " - "Connector, e.g. `pst.PasConnector`.", - DeprecationWarning, - stacklevel=1, - ) - import pystore - - if conn is not None: - name = conn.name - path = conn.path - elif name is None or path is None: - raise ValueError("Please provide 'name' and 'path' OR 'conn'!") - - print(f"Deleting PystoreConnector database: '{name}' ...", end="") - pystore.set_path(path) - if libraries is None: - pystore.delete_store(name) - print(" Done!") - else: - store = pystore.store(name) - for lib in libraries: - print() - store.delete_collection(lib) - print(f" - deleted: {lib}") - - -# TODO: remove in future version -def delete_arctic_connector( - conn=None, - connstr: Optional[str] = None, - name: Optional[str] = None, - libraries: Optional[List[str]] = None, -) -> None: # pragma: no cover - """Delete libraries from arctic database. - - Parameters - ---------- - conn : pastastore.ArcticConnector - ArcticConnector object - connstr : str, optional - connection string to the database - name : str, optional - name of the database - libraries : Optional[List[str]], optional - list of library names to delete, by default None which deletes - all libraries - """ - warnings.warn( - "This function is deprecated. We recommend to migrate to a different " - "Connector, e.g. `pst.ArcticDBConnector`.", - DeprecationWarning, - stacklevel=1, - ) - import arctic - - if conn is not None: - name = conn.name - connstr = conn.connstr - elif name is None or connstr is None: - raise ValueError("Provide 'name' and 'connstr' OR 'conn'!") - - arc = arctic.Arctic(connstr) - - print(f"Deleting ArcticConnector database: '{name}' ... ", end="") - # get library names - if libraries is None: - libs = [] - for ilib in arc.list_libraries(): - if ilib.split(".")[0] == name: - libs.append(ilib) - elif name is not None: - libs = [name + "." + ilib for ilib in libraries] - else: - raise ValueError("Provide 'name' and 'connstr' OR 'conn'!") - - for lib in libs: - arc.delete_library(lib) - if libraries is not None: - print() - print(f" - deleted: {lib}") - print("Done!") - - def delete_arcticdb_connector( conn=None, uri: Optional[str] = None, @@ -243,14 +137,8 @@ def delete_pastastore(pstore, libraries: Optional[List[str]] = None) -> None: TypeError when Connector type is not recognized """ - # TODO: remove in future version - if pstore.conn.conn_type == "pystore": - delete_pystore_connector(conn=pstore.conn, libraries=libraries) - elif pstore.conn.conn_type == "dict": + if pstore.conn.conn_type == "dict": delete_dict_connector(pstore) - # TODO: remove in future version - elif pstore.conn.conn_type == "arctic": - delete_arctic_connector(conn=pstore.conn, libraries=libraries) elif pstore.conn.conn_type == "arcticdb": delete_arcticdb_connector(conn=pstore.conn, libraries=libraries) elif pstore.conn.conn_type == "pas": diff --git a/pyproject.toml b/pyproject.toml index 26c12e6..175e0e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,8 +128,6 @@ module = [ 'mypy-pypandoc.*', 'mypy-numpy.*', 'mypy-pandas.*', - 'mypy-pystore.*', - 'mypy-arctic.*', 'mypy-art_tools.*', 'mypy-tqdm.*', 'mypy-pastas.*', diff --git a/tests/conftest.py b/tests/conftest.py index 92f0eb3..6976b12 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -66,15 +66,9 @@ def conn(request): """Fixture that yields connection object.""" name = f"test_{request.param}" # connect to dbase - if request.param == "arctic": - connstr = "mongodb://localhost:27017/" - conn = pst.ArcticConnector(name, connstr) - elif request.param == "arcticdb": + if request.param == "arcticdb": uri = "lmdb://./arctic_db/" conn = pst.ArcticDBConnector(name, uri) - elif request.param == "pystore": - path = "./tests/data/pystore" - conn = pst.PystoreConnector(name, path) elif request.param == "dict": conn = pst.DictConnector(name) elif request.param == "pas": @@ -87,21 +81,10 @@ def conn(request): @pytest.fixture(scope="module", params=params) def pstore(request): - if request.param == "arctic": - connstr = "mongodb://localhost:27017/" - name = "test_project" - connector = pst.ArcticConnector(name, connstr) - elif request.param == "arcticdb": + if request.param == "arcticdb": name = "test_project" uri = "lmdb://./arctic_db/" connector = pst.ArcticDBConnector(name, uri) - elif request.param == "pystore": - name = "test_project" - path = "./tests/data/pystore" - import pystore - - pystore.set_path(path) - connector = pst.PystoreConnector(name, path) elif request.param == "dict": name = "test_project" connector = pst.DictConnector(name) @@ -116,14 +99,6 @@ def pstore(request): pst.util.delete_pastastore(pstore) -def delete_arctic_test_db(): - connstr = "mongodb://localhost:27017/" - name = "test_project" - connector = pst.ArcticConnector(name, connstr) - pst.util.delete_arctic_connector(connector) - print("ArcticConnector 'test_project' deleted.") - - def delete_arcticdb_test_db(): connstr = "lmdb://./arctic_db/" name = "test_project" diff --git a/tests/test_002_connectors.py b/tests/test_002_connectors.py index fc6d0cf..750497a 100644 --- a/tests/test_002_connectors.py +++ b/tests/test_002_connectors.py @@ -269,12 +269,9 @@ def test_empty_library(request, conn): @pytest.mark.dependency() def test_delete(request, conn): # no need to delete dictconnector (in memory) - if conn.conn_type == "arctic": - pst.util.delete_arctic_connector(conn, libraries=["oseries"]) - pst.util.delete_arctic_connector(conn) - elif conn.conn_type == "pystore": - pst.util.delete_pystore_connector(conn, libraries=["oseries"]) - pst.util.delete_pystore_connector(conn) + if conn.conn_type == "arcticdb": + pst.util.delete_arcticdb_connector(conn, libraries=["oseries"]) + pst.util.delete_arcticdb_connector(conn) elif conn.conn_type == "pas": pst.util.delete_pas_connector(conn, libraries=["oseries"]) pst.util.delete_pas_connector(conn) From ef2a4d3615bf2ecdac7a07d692085a382388a9e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Fri, 9 Aug 2024 17:10:48 +0200 Subject: [PATCH 22/31] improve tests --- tests/conftest.py | 5 ++++- tests/test_002_connectors.py | 2 +- tests/test_003_pastastore.py | 8 ++------ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 6976b12..c6cb24e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,6 @@ IS_PY312 = parse_version(python_version()) >= parse_version("3.12.0") -# "arctic" and "pystore" removed for CI, can be tested locally params = ["dict", "pas", "arcticdb"] if not IS_PY312 else ["dict", "pas"] @@ -57,6 +56,10 @@ def initialize_project(conn): # pastas<=0.22.0 pass pstore.add_stress(s, "well1", kind="well", metadata={"x": 164691, "y": 423579}) + # add second well + pstore.add_stress( + s + 10, "well2", kind="well", metadata={"x": 164691 + 200, "y": 423579_200} + ) return pstore diff --git a/tests/test_002_connectors.py b/tests/test_002_connectors.py index 750497a..30607b7 100644 --- a/tests/test_002_connectors.py +++ b/tests/test_002_connectors.py @@ -263,7 +263,7 @@ def test_empty_library(request, conn): ) s1.name = "test_series" conn.add_oseries(s1, "test_series", metadata=None) - conn.empty_library("stresses", prompt=False, progressbar=False) + conn.empty_library("oseries", prompt=False, progressbar=False) @pytest.mark.dependency() diff --git a/tests/test_003_pastastore.py b/tests/test_003_pastastore.py index 623b338..cb9577c 100644 --- a/tests/test_003_pastastore.py +++ b/tests/test_003_pastastore.py @@ -214,12 +214,8 @@ def func(ml): @pytest.mark.dependency() def test_save_and_load_model(request, pstore): - ml = pstore.create_model("oseries2") - sm = ps.StressModel( - pstore.get_stresses("well1"), ps.Gamma(), name="well1", settings="well" - ) - ml.add_stressmodel(sm) - ml.solve(tmin="1993-1-1") + ml = pstore.create_model("oseries1") + ml.solve() evp_ml = ml.stats.evp() pstore.add_model(ml, overwrite=True) ml2 = pstore.get_models(ml.name) From 80bc6d24dabb265552f2c65bea0c55c4369fe8d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Fri, 9 Aug 2024 17:11:27 +0200 Subject: [PATCH 23/31] use arc.delete_library --- pastastore/store.py | 6 +++--- pastastore/util.py | 10 ++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/pastastore/store.py b/pastastore/store.py index d853eef..6aadf9d 100644 --- a/pastastore/store.py +++ b/pastastore/store.py @@ -3,7 +3,7 @@ import json import os import warnings -from typing import List, Literal, Optional, Tuple, Union +from typing import Dict, List, Literal, Optional, Tuple, Union import numpy as np import pandas as pd @@ -669,7 +669,7 @@ def get_statistics( def create_model( self, name: str, - modelname: str = None, + modelname: Optional[str] = None, add_recharge: bool = True, add_ar_noisemodel: bool = False, recharge_name: str = "recharge", @@ -686,7 +686,7 @@ def create_model( add recharge to the model by looking for the closest precipitation and evaporation time series in the stresses library, by default True - add_ar1_noisemodel : bool, optional + add_ar_noisemodel : bool, optional add AR(1) noise model to the model, by default False recharge_name : str name of the RechargeModel diff --git a/pastastore/util.py b/pastastore/util.py index 0cd5f7d..dda51c4 100644 --- a/pastastore/util.py +++ b/pastastore/util.py @@ -61,17 +61,15 @@ def delete_arcticdb_connector( libs = [] for ilib in arc.list_libraries(): if ilib.split(".")[0] == name: - # TODO: remove replace when arcticdb is able to delete - libs.append(ilib.replace(".", "/")) + libs.append(ilib) elif name is not None: - # TODO: replace / with . when arcticdb is able to delete - libs = [name + "/" + ilib for ilib in libraries] + libs = [name + "." + ilib for ilib in libraries] else: raise ValueError("Provide 'name' and 'uri' OR 'conn'!") for lib in libs: - # arc.delete_library(lib) # TODO: not working at the moment. - shutil.rmtree(os.path.join(conn.uri.split("//")[-1], lib)) + arc.delete_library(lib) + # shutil.rmtree(os.path.join(conn.uri.split("//")[-1], lib)) if libraries is not None: print() From 6d196d1d9fb8e673b5a5f7dc4eb10ec07be118eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Fri, 9 Aug 2024 17:11:37 +0200 Subject: [PATCH 24/31] improve docs --- docs/connectors.rst | 2 +- docs/examples.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/connectors.rst b/docs/connectors.rst index aeaabdf..96b42ee 100644 --- a/docs/connectors.rst +++ b/docs/connectors.rst @@ -43,7 +43,7 @@ the other directories when the model is loaded from the database. ArcticDB -------- -Note: this Connector uses ArcticDB the next-generation version of Arctic. Requires arcticdb Python package. +Note: this Connector uses ArcticDB. Requires arcticdb Python package. The :ref:`ArcticDBConnector` is an object that creates a local database. This can be an existing or a new database. diff --git a/docs/examples.rst b/docs/examples.rst index dc43b96..e50e84a 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -50,7 +50,7 @@ a `PastaStore` object:: import pastastore as pst - # define arctic connector + # define ArcticDB connector uri = "lmdb://./my_path_here/" conn = pst.ArcticDBConnector("my_db", uri) From 1685361a65c3f97de9b4305808190cab2d3459c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Sun, 11 Aug 2024 14:13:09 +0200 Subject: [PATCH 25/31] fix for pastas 0.22 --- pastastore/store.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pastastore/store.py b/pastastore/store.py index 6aadf9d..8c10032 100644 --- a/pastastore/store.py +++ b/pastastore/store.py @@ -16,7 +16,7 @@ from pastastore.connectors import DictConnector from pastastore.plotting import Maps, Plots from pastastore.util import _custom_warning -from pastastore.version import PASTAS_GEQ_150 +from pastastore.version import PASTAS_GEQ_150, PASTAS_LEQ_022 from pastastore.yaml_interface import PastastoreYAML FrameorSeriesUnion = Union[pd.DataFrame, pd.Series] @@ -1086,11 +1086,14 @@ def get_stressmodel( # override rfunc and set to HantushWellModel rfunc = ps.HantushWellModel + # do not add metadata for pastas 0.22 and WellModel + if not PASTAS_LEQ_022 and (stressmodel._name != "WellModel"): + kwargs["metadata"] = metadata + return stressmodel( **stresses, rfunc=rfunc(**rfunc_kwargs), name=stressmodel_name, - metadata=metadata, **kwargs, ) From bdc4490152987a66e73187291d889e4c26674abf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Sun, 11 Aug 2024 15:26:57 +0200 Subject: [PATCH 26/31] try fix CI error --- .github/workflows/ci.yml | 3 +- tests/conftest.py | 7 +- tests/data/well_month_end.csv | 265 ++++++++++++++++++++++++++++++++++ 3 files changed, 271 insertions(+), 4 deletions(-) create mode 100644 tests/data/well_month_end.csv diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 49811a9..08a6c71 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -86,7 +86,8 @@ jobs: if: ${{ matrix.python-version == '3.12'}} run: | pip install --upgrade pip - pip install numpy + # TODO: remove numpy pin when numba or ? doesn't crash on NaN being deprecated + pip install "numpy<2.0" pip install ${{ matrix.pastas-version }} pip install -e .[test_py312] diff --git a/tests/conftest.py b/tests/conftest.py index c6cb24e..f6a466c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -47,11 +47,12 @@ def initialize_project(conn): pstore.add_stress(s, "evap2", kind="evap", metadata={"x": 164000, "y": 423030}) # well 1 - s = pd.read_csv("./tests/data/well.csv", index_col=0, parse_dates=True) + s = pd.read_csv("./tests/data/well_month_end.csv", index_col=0, parse_dates=True) try: s = ps.ts.timestep_weighted_resample( - s, pd.date_range(s.index[0], s.index[-1], freq="D") - ) + s, + pd.date_range(s.index[0] - pd.offsets.MonthBegin(), s.index[-1], freq="D"), + ).bfill() except AttributeError: # pastas<=0.22.0 pass diff --git a/tests/data/well_month_end.csv b/tests/data/well_month_end.csv new file mode 100644 index 0000000..bb75a7c --- /dev/null +++ b/tests/data/well_month_end.csv @@ -0,0 +1,265 @@ +maand,extraction_rate +1994-12-31,0.03838 +1995-01-31,0.00897 +1995-02-28,0.02393 +1995-03-31,0.49484 +1995-04-30,0.52439 +1995-05-31,0.20929 +1995-06-30,0.28054 +1995-07-31,0.36799 +1995-08-31,0.45723 +1995-09-30,0.44319 +1995-10-31,0.34961 +1995-11-30,0.2618 +1995-12-31,0.25599 +1996-01-31,0.26198 +1996-02-29,0.00016 +1996-04-01,0.38567 +1996-05-01,0.40382 +1996-06-01,0.39721 +1996-07-01,0.31524 +1996-08-01,0.35538 +1996-09-01,0.22886 +1996-10-01,0.23693 +1996-11-01,0.14038 +1996-12-01,0.10601 +1997-01-01,0.18302 +1997-02-01,0.30582 +1997-03-01,0.32931 +1997-04-01,0.19434 +1997-05-01,0.14636 +1997-06-01,0.22482 +1997-07-01,0.25604 +1997-08-01,0.14349 +1997-09-01,0.14365 +1997-10-01,0.11508 +1997-11-01,0.14177 +1997-12-01,0.11021 +1998-01-01,0.10881 +1998-02-01,0.13485 +1998-03-01,0.14543 +1998-04-01,0.13398 +1998-05-01,0.1495 +1998-06-01,0.1159 +1998-07-01,0.12515 +1998-08-01,0.11739 +1998-09-01,0.06723 +1998-10-01,0.05694 +1998-11-01,0.14304 +1998-12-31,0.14304 +1999-01-31,0.16197 +1999-02-28,0.13219 +1999-03-31,0.15673 +1999-04-30,0.12692 +1999-05-31,0.14126 +1999-06-30,0.09268 +1999-07-31,0.08306 +1999-08-31,0.12278 +1999-09-30,0.12872 +1999-10-31,0.12927 +1999-11-30,0.13369 +1999-12-31,0.11105 +2000-01-31,0.12203 +2000-02-29,0.12927 +2000-03-31,0.1195 +2000-04-30,0.14129 +2000-05-31,0.13844 +2000-06-30,0.10825 +2000-07-31,0.09415 +2000-08-31,0.13662 +2000-09-30,0.18055 +2000-10-31,0.0977 +2000-11-30,0.13804 +2000-12-31,0.13413 +2001-01-31,0.14157 +2001-02-28,0.12809 +2001-03-31,0.17677 +2001-04-30,0.12446 +2001-05-31,0.1428 +2001-06-30,0.12015 +2001-07-31,0.09732 +2001-08-31,0.14525 +2001-09-30,0.19072 +2001-10-31,0.21472 +2001-11-30,0.1488 +2001-12-31,0.12716 +2002-01-31,0.11136 +2002-02-28,0.13294 +2002-03-31,0.17193 +2002-04-30,0.21328 +2002-05-31,0.10809 +2002-06-30,0.23905 +2002-07-31,0.1498 +2002-08-31,0.19497 +2002-09-30,0.17893 +2002-10-31,0.2144 +2002-11-30,0.1401 +2003-01-01,0.129813 +2003-02-01,0.082792 +2003-03-01,0.084439 +2003-04-01,0.081279 +2003-05-01,0.130007 +2003-06-01,0.157477 +2003-07-01,0.103786 +2003-08-01,0.120441 +2003-09-01,0.097385 +2003-10-01,0.136799 +2003-11-01,0.116725 +2003-12-01,0.228594 +2004-01-01,0.121232 +2004-02-01,0.170342 +2004-03-01,0.262839 +2004-04-01,0.108799 +2004-05-01,0.07995 +2004-06-01,0.164452 +2004-07-01,0.080246 +2004-08-01,0.128882 +2004-09-01,0.115615 +2004-10-01,0.076441 +2004-11-01,0.135695 +2004-12-01,0.111574 +2005-01-01,0.170306 +2005-02-01,0.092 +2005-03-01,0.110507 +2005-04-01,0.107828 +2005-05-01,0.131834 +2005-06-01,0.123172 +2005-07-01,0.129447 +2005-08-01,0.205508 +2005-09-01,0.094235 +2005-10-01,0.080742 +2005-11-01,0.112639 +2005-12-01,0.168618 +2006-01-01,0.156611 +2006-02-01,0.097841 +2006-03-01,0.096902 +2006-04-01,0.072877 +2006-05-01,0.074971 +2006-06-01,-0.003211 +2006-07-01,0.098402 +2006-08-01,0.099607 +2006-09-01,0.06738 +2006-10-01,0.022963 +2006-11-01,0.087322 +2006-12-01,0.090544 +2007-01-01,0.125955 +2007-02-01,0.128656 +2007-03-01,0.09663 +2007-04-01,0.148318 +2007-05-01,0.049377 +2007-06-01,0.06263 +2007-07-01,-0.011431 +2007-08-01,0.112204 +2007-09-01,-0.014784 +2007-10-01,0.151083 +2007-11-01,0.095417 +2007-12-01,0.096788 +2008-01-01,0.107656 +2008-02-01,0.041061 +2008-03-01,0.03743 +2008-04-01,0.063964 +2008-05-01,0.111998 +2008-06-30,0.111998 +2008-07-31,0.03949 +2008-08-31,0.146439 +2008-09-30,0.011656 +2008-11-01,0.008172 +2009-01-01,0.008172 +2009-03-01,0.008172 +2009-05-01,0.008172 +2009-06-01,0.162465 +2009-08-01,0.162465 +2009-10-01,0.162465 +2009-12-01,0.162465 +2010-02-01,0.162465 +2010-04-01,0.162465 +2010-06-01,0.162465 +2010-08-01,0.162465 +2010-10-01,0.162465 +2010-11-01,0.244097 +2010-12-01,0.168849 +2011-01-01,0.232156 +2011-02-01,0.145149 +2011-03-01,0.155242 +2011-04-01,0.003238 +2011-05-01,0.226989 +2011-06-01,0.129647 +2011-07-01,0.104867 +2011-08-01,0.055698 +2011-09-01,0.066664 +2011-10-01,0.075181 +2011-11-01,0.03268 +2011-12-01,0.057656 +2012-01-01,0.06365 +2012-02-01,0.064159 +2012-03-01,0.042177 +2012-04-01,0.133896 +2012-05-01,0.130707 +2012-06-01,0.124348 +2012-07-01,0.113902 +2012-08-01,0.132095 +2012-09-01,0.199222 +2012-10-01,0.24341 +2012-11-01,0.16646 +2012-12-01,0.109427 +2013-01-01,0.157252 +2013-02-01,0.160432 +2013-03-01,0.092839 +2013-04-01,0.084148 +2013-05-01,0.094315 +2013-06-01,0.126162 +2013-07-01,0.133261 +2013-08-31,0.133261 +2013-09-30,0.097269 +2013-10-31,0.145245 +2013-11-30,0.170888 +2013-12-31,0.121974 +2014-01-31,0.240449 +2014-02-28,0.169527 +2014-03-31,0.021129 +2014-05-01,0.135889 +2014-06-01,0.146984 +2014-07-01,0.151469 +2014-08-01,0.102508 +2014-09-01,0.110702 +2014-10-01,0.120033 +2014-11-01,0.139158 +2014-12-01,0.063081 +2015-01-01,0.003573 +2015-02-01,0.117113 +2015-03-31,0.117113 +2015-04-30,0.242702 +2015-05-31,0.049549 +2015-06-30,0.143998 +2015-07-31,0.112192 +2015-08-31,0.096175 +2015-09-30,0.161869 +2015-10-31,0.177292 +2015-11-30,0.000584 +2015-12-31,0.066398 +2016-01-31,0.109151 +2016-02-29,0.034934 +2016-03-31,0.02964 +2016-04-30,0.060122 +2016-05-31,0.067732 +2016-06-30,0.105745 +2016-07-31,0.101905 +2016-08-31,0.082375 +2016-09-30,0.165833 +2016-10-31,0.096007 +2016-11-30,0.123265 +2016-12-31,0.130096 +2017-01-31,0.265398 +2017-02-28,0.11157 +2017-03-31,0.027638 +2017-04-30,0.009649 +2017-05-31,0.020002 +2017-06-30,0.028245 +2017-07-31,0.011456 +2017-08-31,0.016503 +2017-09-30,0.027984 +2017-10-31,0.12744 +2017-11-30,0.11752 +2017-12-31,0.070679 +2018-02-01,0.00073 +2018-03-01,0.0 From c35c6327967b87c6e55a928b8aaf2c130c9a386d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Mon, 12 Aug 2024 10:43:14 +0200 Subject: [PATCH 27/31] allow in-place adding of stressmdoels to model by passing model as str --- pastastore/store.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/pastastore/store.py b/pastastore/store.py index 8c10032..2ce3c2c 100644 --- a/pastastore/store.py +++ b/pastastore/store.py @@ -1,6 +1,7 @@ """Module containing the PastaStore object for managing time series and models.""" import json +import logging import os import warnings from typing import Dict, List, Literal, Optional, Tuple, Union @@ -22,6 +23,8 @@ FrameorSeriesUnion = Union[pd.DataFrame, pd.Series] warnings.showwarning = _custom_warning +logger = logging.getLogger(__name__) + class PastaStore: """PastaStore object for managing pastas time series and models. @@ -1099,7 +1102,7 @@ def get_stressmodel( def add_stressmodel( self, - ml: ps.Model, + ml: Union[ps.Model, str], stresses: Union[str, List[str], Dict[str, str]], stressmodel=ps.StressModel, stressmodel_name: Optional[str] = None, @@ -1120,8 +1123,10 @@ def add_stressmodel( Parameters ---------- - ml : pastas.Model - pastas.Model object to add StressModel to + ml : pastas.Model or str + pastas.Model object to add StressModel to, if passed as string, + model is loaded from store, the stressmodel is added and then written + back to the store. stresses : str, list of str, or dict name(s) of the time series to use for the stressmodel, or dictionary with key(s) and value(s) as time series name(s). Options include: @@ -1157,7 +1162,16 @@ def add_stressmodel( oseries=ml.oseries.name, **kwargs, ) - ml.add_stressmodel(sm) + if isinstance(ml, str): + ml = self.get_model(ml) + ml.add_stressmodel(sm) + self.conn.add_model(ml, overwrite=True) + logger.info( + f"Stressmodel '{sm.name}' added to model '{ml.name}' " + "and stored in database." + ) + else: + ml.add_stressmodel(sm) def solve_models( self, From 9c2361cd35a7ddc41a76404971f1061af96849d0 Mon Sep 17 00:00:00 2001 From: FransSchaars Date: Wed, 14 Aug 2024 14:48:59 +0200 Subject: [PATCH 28/31] check: oseries can be a model and string --- pastastore/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pastastore/store.py b/pastastore/store.py index 2ce3c2c..97c71a3 100644 --- a/pastastore/store.py +++ b/pastastore/store.py @@ -1159,7 +1159,7 @@ def add_stressmodel( rfunc=rfunc, rfunc_kwargs=rfunc_kwargs, kind=kind, - oseries=ml.oseries.name, + oseries=ml if isinstance(ml, str) else ml.oseries.name, **kwargs, ) if isinstance(ml, str): From 8e66e5343da7bdf626d439dd7600e399a0b05dbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Thu, 15 Aug 2024 15:02:16 +0200 Subject: [PATCH 29/31] move timestep_weighted_resample to add_observation method - improve update knmi --- pastastore/extensions/hpd.py | 178 ++++++++++++++++++++++++++++++----- 1 file changed, 154 insertions(+), 24 deletions(-) diff --git a/pastastore/extensions/hpd.py b/pastastore/extensions/hpd.py index da5031a..cf98e09 100644 --- a/pastastore/extensions/hpd.py +++ b/pastastore/extensions/hpd.py @@ -12,12 +12,14 @@ import hydropandas as hpd import numpy as np +from hydropandas.io.knmi import _check_latest_measurement_date_de_bilt, get_stations from pandas import DataFrame, Series, Timedelta, Timestamp +from pastas.timeseries_utils import timestep_weighted_resample from tqdm.auto import tqdm from pastastore.extensions.accessor import register_pastastore_accessor -logger = logging.getLogger("hydropandas") +logger = logging.getLogger("hydropandas_extension") TimeType = Optional[Union[str, Timestamp]] @@ -51,6 +53,7 @@ def add_obscollection( data_column: Optional[str] = None, unit_multiplier: float = 1.0, update: bool = False, + normalize_datetime_index: bool = True, ): """Add an ObsCollection to the PastaStore. @@ -81,6 +84,7 @@ def add_obscollection( data_column=data_column, unit_multiplier=unit_multiplier, update=update, + normalize_datetime_index=normalize_datetime_index, ) def add_observation( @@ -92,6 +96,7 @@ def add_observation( data_column: Optional[str] = None, unit_multiplier: float = 1.0, update: bool = False, + normalize_datetime_index: bool = False, ): """Add an hydropandas observation series to the PastaStore. @@ -113,28 +118,36 @@ def add_observation( multiply unit by this value before saving it in the store update : bool, optional if True, update currently stored time series with new data + normalize_datetime_index : bool, optional + if True, normalize the datetime so stress value at midnight represents + the daily total, by default True. """ # if data_column is not None, use data_column if data_column is not None: if not obs.empty: - o = obs[data_column] + o = obs[[data_column]] else: o = Series() - # if data_column is None, check no. of columns in obs - # if only one column, use that column - elif isinstance(obs, DataFrame) and obs.columns.size == 1: - o = obs.iloc[:, 0] elif isinstance(obs, Series): o = obs # else raise error - else: + elif isinstance(obs, DataFrame) and (obs.columns.size > 1): raise ValueError("No data_column specified and obs has multiple columns.") + else: + raise TypeError("obs must be a Series or DataFrame with a single column.") # break if obs is empty if o.empty: logger.info("Observation '%s' is empty, not adding to store.", name) return + if normalize_datetime_index and o.index.size > 1: + o = self._normalize_datetime_index(o) + else: + raise ValueError( + "Must have minimum of 2 observations for timestep_weighted_resample." + ) + # gather metadata from obs object metadata = {key: getattr(obs, key) for key in obs._metadata} @@ -163,7 +176,7 @@ def add_observation( action_msg = "added to" if libname == "oseries": - self._store.upsert_oseries(o, name, metadata=metadata) + self._store.upsert_oseries(o.squeeze(), name, metadata=metadata) logger.info( "%sobservation '%s' %s oseries library.", source, name, action_msg ) @@ -171,7 +184,7 @@ def add_observation( if kind is None: raise ValueError("`kind` must be specified for stresses!") self._store.upsert_stress( - o * unit_multiplier, name, kind, metadata=metadata + (o * unit_multiplier).squeeze(), name, kind, metadata=metadata ) logger.info( "%sstress '%s' (kind='%s') %s stresses library.", @@ -190,6 +203,8 @@ def download_knmi_precipitation( tmin: TimeType = None, tmax: TimeType = None, unit_multiplier: float = 1e3, + fill_missing_obs: bool = True, + normalize_datetime_index: bool = True, **kwargs, ): """Download precipitation data from KNMI and store in PastaStore. @@ -215,6 +230,8 @@ def download_knmi_precipitation( tmin=tmin, tmax=tmax, unit_multiplier=unit_multiplier, + fill_missing_obs=fill_missing_obs, + normalize_datetime_index=normalize_datetime_index, **kwargs, ) @@ -225,6 +242,8 @@ def download_knmi_evaporation( tmin: TimeType = None, tmax: TimeType = None, unit_multiplier: float = 1e3, + fill_missing_obs: bool = True, + normalize_datetime_index: bool = True, **kwargs, ): """Download evaporation data from KNMI and store in PastaStore. @@ -242,6 +261,12 @@ def download_knmi_evaporation( unit_multiplier : float, optional multiply unit by this value before saving it in the store, by default 1e3 to convert m to mm + fill_missing_obs : bool, optional + if True, fill missing observations by getting observations from nearest + station with data. + normalize_datetime_index : bool, optional + if True, normalize the datetime so stress value at midnight represents + the daily total, by default True. """ self.download_knmi_meteo( meteo_var=meteo_var, @@ -250,6 +275,8 @@ def download_knmi_evaporation( tmin=tmin, tmax=tmax, unit_multiplier=unit_multiplier, + fill_missing_obs=fill_missing_obs, + normalize_datetime_index=normalize_datetime_index, **kwargs, ) @@ -261,6 +288,8 @@ def download_knmi_meteo( tmin: TimeType = None, tmax: TimeType = None, unit_multiplier: float = 1.0, + normalize_datetime_index: bool = True, + fill_missing_obs: bool = True, **kwargs, ): """Download meteorological data from KNMI and store in PastaStore. @@ -281,6 +310,12 @@ def download_knmi_meteo( unit_multiplier : float, optional multiply unit by this value before saving it in the store, by default 1.0 (no conversion) + fill_missing_obs : bool, optional + if True, fill missing observations by getting observations from nearest + station with data. + normalize_datetime_index : bool, optional + if True, normalize the datetime so stress value at midnight represents + the daily total, by default True. """ # get tmin/tmax if not specified tmintmax = self._store.get_tmin_tmax("oseries") @@ -301,6 +336,7 @@ def download_knmi_meteo( meteo_vars=[meteo_var], starts=tmin, ends=tmax, + fill_missing_obs=fill_missing_obs, **kwargs, ) @@ -312,6 +348,7 @@ def download_knmi_meteo( data_column=meteo_var, unit_multiplier=unit_multiplier, update=False, + normalize_datetime_index=normalize_datetime_index, ) def update_knmi_meteo( @@ -319,6 +356,10 @@ def update_knmi_meteo( names: Optional[List[str]] = None, tmin: TimeType = None, tmax: TimeType = None, + fill_missing_obs=True, + normalize_datetime_index=True, + raise_on_error=False, + **kwargs, ): """Update meteorological data from KNMI in PastaStore. @@ -331,6 +372,16 @@ def update_knmi_meteo( as tmin tmax : TimeType, optional end time, by default None, which defaults to today + fill_missing_obs : bool, optional + if True, fill missing observations by getting observations from nearest + station with data. + normalize_datetime_index : bool, optional + if True, normalize the datetime so stress value at midnight represents + the daily total, by default True. + raise_on_error : bool, optional + if True, raise error if an error occurs, by default False + **kwargs : dict, optional + Additional keyword arguments to pass to `hpd.read_knmi()` """ if names is None: names = self._store.stresses.loc[ @@ -339,36 +390,115 @@ def update_knmi_meteo( tmintmax = self._store.get_tmin_tmax("stresses", names=names) + if tmax is not None: + if tmintmax["tmax"].min() > tmax: + logger.info(f"All KNMI stresses are up to date to {tmax}.") + return + + maxtmax_rd = _check_latest_measurement_date_de_bilt("RD") + maxtmax_ev24 = _check_latest_measurement_date_de_bilt("EV24") + for name in tqdm(names, desc="Updating KNMI meteo stresses"): - stn = self._store.stresses.loc[name, "station"] meteo_var = self._store.stresses.loc[name, "meteo_var"] + if meteo_var == "RD": + maxtmax = maxtmax_rd + elif meteo_var == "EV24": + maxtmax = maxtmax_ev24 + else: + maxtmax = maxtmax_rd + + if tmin is None: + # 1 days extra to ensure computation of daily totals using + # timestep_weighted_resample + itmin = tmintmax.loc[name, "tmax"] - Timedelta(days=1) + else: + itmin = tmin + + # ensure 2 observations at least + if itmin >= (maxtmax + Timedelta(days=1)): + logger.debug("KNMI %s is already up to date." % name) + continue + + if tmax is None: + itmax = maxtmax + else: + itmax = tmax + unit = self._store.stresses.loc[name, "unit"] kind = self._store.stresses.loc[name, "kind"] + if "station" in self._store.stresses.columns and ~np.isnan( + self._store.stresses.loc[name, "station"] + ): + stn = self._store.stresses.loc[name, "station"] + else: + stns = get_stations(meteo_var) + stn_name = name.split("_")[-1].lower() + mask = stns["name"].str.lower().str.replace(" ", "-") == stn_name + if not mask.any(): + logger.warning( + f"Station '%s' not found in list of KNMI {meteo_var} stations." + % stn_name + ) + continue + stn = stns.loc[mask].index[0] if unit == "mm": unit_multiplier = 1e3 else: unit_multiplier = 1.0 - if tmin is None: - tmin = tmintmax.loc[name, "tmax"] - + logger.debug("Updating KNMI %s from %s to %s" % (name, itmin, itmax)) knmi = hpd.read_knmi( stns=[stn], meteo_vars=[meteo_var], - starts=tmin, - ends=tmax, + starts=itmin, + ends=itmax, + fill_missing_obs=fill_missing_obs, + **kwargs, ) + obs = knmi["obs"].iloc[0] + + try: + self.add_observation( + "stresses", + obs.loc[tmintmax.loc[name, "tmax"] :], + name=name, + kind=kind, + data_column=meteo_var, + unit_multiplier=unit_multiplier, + update=True, + normalize_datetime_index=normalize_datetime_index, + ) + except ValueError as e: + logger.error("Error updating KNMI %s: %s" % (name, str(e))) + if raise_on_error: + raise e + + @staticmethod + def _normalize_datetime_index(obs): + """Normalize observation datetime index (i.e. set observation time to midnight). - self.add_observation( - "stresses", - knmi["obs"].iloc[0], - name=name, - kind=kind, - data_column=meteo_var, - unit_multiplier=unit_multiplier, - update=True, - ) + Parameters + ---------- + obs : pandas.Series + observation series to normalize + + Returns + ------- + hpd.Obs + observation series with normalized datetime index + """ + if isinstance(obs, hpd.Obs): + metadata = {k: getattr(obs, k) for k in obs._metadata} + else: + metadata = {} + return obs.__class__( + timestep_weighted_resample( + obs, + obs.index.normalize(), + ).rename(obs.name), + **metadata, + ) def download_bro_gmw( self, From dac5d86319f36f9f8984d55f0434d52f8b773dcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Thu, 15 Aug 2024 15:04:16 +0200 Subject: [PATCH 30/31] new ruff --- tests/test_002_connectors.py | 24 ++++++++++----------- tests/test_003_pastastore.py | 38 +++++++++++++++++----------------- tests/test_004_yaml.py | 14 ++++++------- tests/test_005_maps_plots.py | 12 +++++------ tests/test_007_hpdextension.py | 10 ++++----- 5 files changed, 49 insertions(+), 49 deletions(-) diff --git a/tests/test_002_connectors.py b/tests/test_002_connectors.py index 30607b7..5ac8eaa 100644 --- a/tests/test_002_connectors.py +++ b/tests/test_002_connectors.py @@ -178,7 +178,7 @@ def test_update_metadata(request, conn): conn.del_oseries("test_df") -@pytest.mark.dependency() +@pytest.mark.dependency def test_add_oseries(conn): o = pd.read_csv("./tests/data/obs.csv", index_col=0, parse_dates=True) conn.add_oseries( @@ -189,7 +189,7 @@ def test_add_oseries(conn): ) -@pytest.mark.dependency() +@pytest.mark.dependency def test_add_stress(conn): s = pd.read_csv("./tests/data/rain.csv", index_col=0, parse_dates=True) conn.add_stress( @@ -200,39 +200,39 @@ def test_add_stress(conn): ) -@pytest.mark.dependency() +@pytest.mark.dependency def test_get_oseries(request, conn): depends(request, [f"test_add_oseries[{conn.type}]"]) _ = conn.get_oseries("oseries1") -@pytest.mark.dependency() +@pytest.mark.dependency def test_get_oseries_and_metadata(request, conn): depends(request, [f"test_add_oseries[{conn.type}]"]) _ = conn.get_oseries("oseries1", return_metadata=True) -@pytest.mark.dependency() +@pytest.mark.dependency def test_get_stress(request, conn): depends(request, [f"test_add_stress[{conn.type}]"]) s = conn.get_stresses("prec") s.name = "prec" -@pytest.mark.dependency() +@pytest.mark.dependency def test_get_stress_and_metadata(request, conn): depends(request, [f"test_add_stress[{conn.type}]"]) s, _ = conn.get_stresses("prec", return_metadata=True) s.name = "prec" -@pytest.mark.dependency() +@pytest.mark.dependency def test_oseries_prop(request, conn): depends(request, [f"test_add_oseries[{conn.type}]"]) _ = conn.oseries -@pytest.mark.dependency() +@pytest.mark.dependency def test_stresses_prop(request, conn): depends(request, [f"test_add_stress[{conn.type}]"]) _ = conn.stresses @@ -242,19 +242,19 @@ def test_repr(conn): conn.__repr__() -@pytest.mark.dependency() +@pytest.mark.dependency def test_del_oseries(request, conn): depends(request, [f"test_add_oseries[{conn.type}]"]) conn.del_oseries("oseries1") -@pytest.mark.dependency() +@pytest.mark.dependency def test_del_stress(request, conn): depends(request, [f"test_add_stress[{conn.type}]"]) conn.del_stress("prec") -@pytest.mark.dependency() +@pytest.mark.dependency def test_empty_library(request, conn): s1 = pd.Series( index=pd.date_range("2000", periods=10, freq="D"), @@ -266,7 +266,7 @@ def test_empty_library(request, conn): conn.empty_library("oseries", prompt=False, progressbar=False) -@pytest.mark.dependency() +@pytest.mark.dependency def test_delete(request, conn): # no need to delete dictconnector (in memory) if conn.conn_type == "arcticdb": diff --git a/tests/test_003_pastastore.py b/tests/test_003_pastastore.py index cb9577c..3fc3b35 100644 --- a/tests/test_003_pastastore.py +++ b/tests/test_003_pastastore.py @@ -12,17 +12,17 @@ import pastastore as pst -@pytest.mark.dependency() +@pytest.mark.dependency def test_iter_oseries(pstore): _ = list(pstore.iter_oseries()) -@pytest.mark.dependency() +@pytest.mark.dependency def test_iter_stresses(pstore): _ = list(pstore.iter_stresses()) -@pytest.mark.dependency() +@pytest.mark.dependency def test_get_tmintmax(pstore): ostt = pstore.get_tmin_tmax("oseries") assert ostt.at["oseries1", "tmin"] == pd.Timestamp("2010-01-14") @@ -36,19 +36,19 @@ def test_get_tmintmax(pstore): pstore.del_model("oseries1") -@pytest.mark.dependency() +@pytest.mark.dependency def test_search(pstore): results = pstore.search("oseries", "OSER", case_sensitive=False) assert len(results) == 3 assert len(set(results) - {"oseries1", "oseries2", "oseries3"}) == 0 -@pytest.mark.dependency() +@pytest.mark.dependency def test_create_model(pstore): _ = pstore.create_model("oseries1") -@pytest.mark.dependency() +@pytest.mark.dependency def test_properties(pstore): pstore.add_oseries(pd.Series(dtype=np.float64), "deleteme", validate=False) pstore.add_stress( @@ -67,14 +67,14 @@ def test_properties(pstore): pstore.del_stress("deleteme") -@pytest.mark.dependency() +@pytest.mark.dependency def test_store_model(request, pstore): depends(request, [f"test_create_model[{pstore.type}]"]) ml = pstore.create_model("oseries1") pstore.conn.add_model(ml) -@pytest.mark.dependency() +@pytest.mark.dependency def test_model_accessor(request, pstore): depends(request, [f"test_store_model[{pstore.type}]"]) # repr @@ -93,7 +93,7 @@ def test_model_accessor(request, pstore): pstore.del_models("oseries1_2") -@pytest.mark.dependency() +@pytest.mark.dependency def test_oseries_model_accessor(request, pstore): depends(request, [f"test_store_model[{pstore.type}]"]) # repr @@ -114,7 +114,7 @@ def test_oseries_model_accessor(request, pstore): assert len(ml_list3) == 1 -@pytest.mark.dependency() +@pytest.mark.dependency def test_store_model_missing_series(request, pstore): depends( request, @@ -135,7 +135,7 @@ def test_store_model_missing_series(request, pstore): pstore.add_model(ml) -@pytest.mark.dependency() +@pytest.mark.dependency def test_get_model(request, pstore): depends( request, @@ -148,7 +148,7 @@ def test_get_model(request, pstore): _ = pstore.conn.get_models("oseries1") -@pytest.mark.dependency() +@pytest.mark.dependency def test_del_model(request, pstore): depends( request, @@ -162,7 +162,7 @@ def test_del_model(request, pstore): pstore.conn.del_models("oseries1") -@pytest.mark.dependency() +@pytest.mark.dependency def test_create_models(pstore): _ = pstore.create_models_bulk( ["oseries1", "oseries2"], store=True, progressbar=False @@ -170,7 +170,7 @@ def test_create_models(pstore): _ = pstore.conn.models -@pytest.mark.dependency() +@pytest.mark.dependency def test_get_parameters(request, pstore): depends(request, [f"test_create_models[{pstore.type}]"]) p = pstore.get_parameters(progressbar=False, param_value="initial") @@ -178,20 +178,20 @@ def test_get_parameters(request, pstore): assert p.isna().sum().sum() == 0 -@pytest.mark.dependency() +@pytest.mark.dependency def test_get_signatures(request, pstore): depends(request, [f"test_create_models[{pstore.type}]"]) s = pstore.get_signatures(progressbar=False) assert s.shape[1] == len(ps.stats.signatures.__all__) -@pytest.mark.dependency() +@pytest.mark.dependency def test_iter_models(request, pstore): depends(request, [f"test_create_models[{pstore.type}]"]) _ = list(pstore.iter_models()) -@pytest.mark.dependency() +@pytest.mark.dependency def test_solve_models_and_get_stats(request, pstore): depends(request, [f"test_create_models[{pstore.type}]"]) _ = pstore.solve_models( @@ -201,7 +201,7 @@ def test_solve_models_and_get_stats(request, pstore): assert stats.index.size == 2 -@pytest.mark.dependency() +@pytest.mark.dependency def test_apply(request, pstore): depends(request, [f"test_solve_models_and_get_stats[{pstore.type}]"]) @@ -212,7 +212,7 @@ def func(ml): assert len(result) == 2 -@pytest.mark.dependency() +@pytest.mark.dependency def test_save_and_load_model(request, pstore): ml = pstore.create_model("oseries1") ml.solve() diff --git a/tests/test_004_yaml.py b/tests/test_004_yaml.py index 3b8b98a..05893dc 100644 --- a/tests/test_004_yaml.py +++ b/tests/test_004_yaml.py @@ -20,7 +20,7 @@ def tempyaml(yaml): os.unlink(temp.name) -@pytest.mark.dependency() +@pytest.mark.dependency def test_load_yaml_rechargemodel(pstore): yamlstr = """ my_first_model: # model name @@ -38,7 +38,7 @@ def test_load_yaml_rechargemodel(pstore): pstore.add_model(ml) -@pytest.mark.dependency() +@pytest.mark.dependency def test_load_yaml_stressmodel(pstore): yamlstr = """ my_second_model: # model name @@ -54,7 +54,7 @@ def test_load_yaml_stressmodel(pstore): pstore.add_model(ml) -@pytest.mark.dependency() +@pytest.mark.dependency def test_load_yaml_wellmodel(pstore): yamlstr = """ my_third_model: # model name @@ -71,7 +71,7 @@ def test_load_yaml_wellmodel(pstore): pstore.add_model(ml) -@pytest.mark.dependency() +@pytest.mark.dependency def test_write_load_compare_yaml(request, pstore): depends( request, @@ -90,7 +90,7 @@ def test_write_load_compare_yaml(request, pstore): os.remove("my_first_model.yaml") -@pytest.mark.dependency() +@pytest.mark.dependency def test_write_yaml_per_oseries(request, pstore): depends( request, @@ -105,7 +105,7 @@ def test_write_yaml_per_oseries(request, pstore): os.remove("oseries2.yaml") -@pytest.mark.dependency() +@pytest.mark.dependency def test_write_yaml_minimal(request, pstore): depends( request, @@ -120,7 +120,7 @@ def test_write_yaml_minimal(request, pstore): os.remove("my_first_model.yaml") -@pytest.mark.dependency() +@pytest.mark.dependency def test_write_yaml_minimal_nearest(request, pstore): depends( request, diff --git a/tests/test_005_maps_plots.py b/tests/test_005_maps_plots.py index 424cbda..12b46db 100644 --- a/tests/test_005_maps_plots.py +++ b/tests/test_005_maps_plots.py @@ -22,7 +22,7 @@ def test_plot_stresses_availability(pstore): plt.close(ax.figure) -@pytest.mark.dependency() +@pytest.mark.dependency def test_cumulative_hist(request, pstore): ml1 = pstore.create_model("oseries1") pstore.add_model(ml1) @@ -35,7 +35,7 @@ def test_cumulative_hist(request, pstore): # %% maps -@pytest.mark.bgmap() +@pytest.mark.bgmap def test_map_oseries_w_bgmap(pstore): ax = pstore.maps.oseries() # only test bgmap once for pas @@ -57,25 +57,25 @@ def test_map_stresslinks(pstore): plt.close(ax.figure) -@pytest.mark.dependency() +@pytest.mark.dependency def test_map_models(request, pstore): ax = pstore.maps.models() plt.close(ax.figure) -@pytest.mark.dependency() +@pytest.mark.dependency def test_map_model(request, pstore): depends(request, [f"test_map_models[{pstore.type}]"]) ax = pstore.maps.model("oseries1") plt.close(ax.figure) -@pytest.mark.dependency() +@pytest.mark.dependency def test_map_modelstat(request, pstore): ax = pstore.maps.modelstat("evp") plt.close(ax.figure) -@pytest.mark.dependency() +@pytest.mark.dependency def test_list_ctx_providers(request, pstore): pstore.maps._list_contextily_providers() diff --git a/tests/test_007_hpdextension.py b/tests/test_007_hpdextension.py index 181d00a..befe375 100644 --- a/tests/test_007_hpdextension.py +++ b/tests/test_007_hpdextension.py @@ -5,7 +5,7 @@ import pastastore as pst -@pytest.mark.pastas150() +@pytest.mark.pastas150 def test_hpd_download_from_bro(): from pastastore.extensions import activate_hydropandas_extension @@ -17,7 +17,7 @@ def test_hpd_download_from_bro(): assert pstore.n_oseries == 3 -@pytest.mark.pastas150() +@pytest.mark.pastas150 def test_hpd_download_precipitation_from_knmi(): from pastastore.extensions import activate_hydropandas_extension @@ -29,7 +29,7 @@ def test_hpd_download_precipitation_from_knmi(): assert pstore.n_stresses == 1 -@pytest.mark.pastas150() +@pytest.mark.pastas150 def test_hpd_download_evaporation_from_knmi(): from pastastore.extensions import activate_hydropandas_extension @@ -41,7 +41,7 @@ def test_hpd_download_evaporation_from_knmi(): assert pstore.n_stresses == 1 -@pytest.mark.pastas150() +@pytest.mark.pastas150 def test_update_oseries(): from pastastore.extensions import activate_hydropandas_extension @@ -54,7 +54,7 @@ def test_update_oseries(): assert tmintmax.loc["GMW000000036327_1", "tmax"] >= Timestamp("2024-01-20") -@pytest.mark.pastas150() +@pytest.mark.pastas150 def test_update_stresses(): from pastastore.extensions import activate_hydropandas_extension From a5b70f730912bc794b4b35b485b99326ab72ef77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Thu, 15 Aug 2024 15:12:17 +0200 Subject: [PATCH 31/31] fix test errors --- pastastore/extensions/hpd.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pastastore/extensions/hpd.py b/pastastore/extensions/hpd.py index cf98e09..7e863fb 100644 --- a/pastastore/extensions/hpd.py +++ b/pastastore/extensions/hpd.py @@ -53,7 +53,7 @@ def add_obscollection( data_column: Optional[str] = None, unit_multiplier: float = 1.0, update: bool = False, - normalize_datetime_index: bool = True, + normalize_datetime_index: bool = False, ): """Add an ObsCollection to the PastaStore. @@ -72,6 +72,9 @@ def add_obscollection( multiply unit by this value before saving it in the store update : bool, optional if True, update currently stored time series with new data + normalize_datetime_index : bool, optional + if True, normalize the datetime so stress value at midnight represents + the daily total, by default True. """ for name, row in oc.iterrows(): obs = row["obs"] @@ -391,7 +394,7 @@ def update_knmi_meteo( tmintmax = self._store.get_tmin_tmax("stresses", names=names) if tmax is not None: - if tmintmax["tmax"].min() > tmax: + if tmintmax["tmax"].min() > Timestamp(tmax): logger.info(f"All KNMI stresses are up to date to {tmax}.") return