diff --git a/.gitignore b/.gitignore index 3c7b339c..8ac3aea8 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,9 @@ autotest/codecov autotest/.coverage autotest/temp +# test_data +test_data/.test_data_version* + # sphinx generated/ diff --git a/README.md b/README.md index 473b07a6..c80ba64a 100644 --- a/README.md +++ b/README.md @@ -20,53 +20,33 @@ **Table of Contents** -- [Purpose](#purpose) +- [About](#about) - [Installation](#installation) -- [Contributing](#contributing) -- [Example Notebooks](#example-notebooks) -- [Overview of Repository Contents](#overview-of-repository-contents) +- [Getting started / Example notebooks](#getting-started--example-notebooks) +- [Community engagement](#community-engagement) - [Disclaimer](#disclaimer) -## Purpose +## About -The purpose of this repository is to refactor and redesign the [PRMS modeling -system](https://www.usgs.gov/software/precipitation-runoff-modeling-system-prms) -while maintaining its functionality. Code modernization is a step towards -unification with [MODFLOW 6 (MF6)](https://github.com/MODFLOW-USGS/modflow6). +Welcome to the pywatershed repository! -The following motivations are taken from our [AGU poster from December -2022](https://agu2022fallmeeting-agu.ipostersessions.com/default.aspx?s=05-E1-C6-40-DF-0D-4D-C7-4E-DE-D2-61-02-05-8F-0A) -which provides additional details on motivations, project status, and current -directions of this project as of approximately January 2023. +Pywatershed is Python package for simulating hydrologic processes motivated by +the need to modernize important, legacy hydrologic models at the USGS, +particularly the +[Precipitation-Runoff Modeling System](https://www.usgs.gov/software/precipitation-runoff-modeling-system-prms) +(PRMS, Markstrom et al., 2015) and its role in +[GSFLOW](https://www.usgs.gov/software/gsflow-coupled-groundwater-and-surface-water-flow-model>) +(Markstrom et al., 2008). +The goal of modernization is to make these legacy models more flexible as process +representations, to support testing of alternative hydrologic process +conceptualizations, and to facilitate the incorporation of cutting edge +modeling techniques and data sources. Pywatershed is a place for experimentation +with software design, process representation, and data fusion in the context +of well-established hydrologic process modeling. -Goals of the USGS Enterprise Capacity (EC) project include: - - * A sustainable integrated, hydrologic modeling framework for the U.S. - Geological Survey (USGS) - * Interoperable modeling across the USGS, partner agencies, and academia - -Goals for EC Watershed Modeling: - - * Couple the Precipitation-Runoff Modeling System (PRMS, e.g. Regan et al, - 2018)  with MODFLOW 6 (MF6, e.g. Langevin et al, 2017) in a sustainable - way - * Redesign PRMS to be more modern and flexible - * Prioritize process representations in the current National Hydrological - Model (NHM) based on PRMS 5.2.1 - -Prototype an EC watershed model: "pywatershed" - - * Redesign PRMS quickly in python - * Couple to MF6 via BMI/XMI interface (Hughes et al, 2021; Hutton et al, 2020) - * Establish a prototyping ground for EC codes that couples to the compiled - framework: low cost proof of concepts (at the price of potentially less - computational performance) * Enable process representation hypothesis testing - * Use cutting-edge techniques and technologies to improve models - * Machine learning, automatic differentiation - * Address challenges of modeling across space and time scales - * Transition prototype watershed model to compiled EC code +For more information on the goals and status of pywatershed, please see the [pywatershed docs](https://pywatershed.readthedocs.io/). ## Installation @@ -81,7 +61,7 @@ all platforms. The `pywatershed` package is [available on conda-forge](https://anaconda.org/conda-forge/pywatershed). The installation is the quickest way to get up and running by provides only the minimal set of -dependencies (not including jupyter nor all packages needed for running the +dependencies (not including Jupyter nor all packages needed for running the example notebooks, also not suitable for development purposes). We recommend the following installation procedures to get fully-functional @@ -92,7 +72,7 @@ repository before installing `pywatershed` itself. Mamba will be much faster than Ananconda (but the conda command could also be used). If you wish to use the stable release, you will use `main` in place of -`` in the following commands. If you want to follow developemnt, you'll +`` in the following commands. If you want to follow development, you'll use `develop` instead. Without using `git` (directly), you may: @@ -120,68 +100,55 @@ you will also need to activate this environment by name.) We install the `environment_w_jupyter.yml` to provide all known dependencies -including those for running the eample notebooks. (The `environment.yml` -does not contain jupyter or jupyterlab because this interferes with installation -on WholeTale, see Example Notebooks seection below.) +including those for running the example notebooks. (The `environment.yml` +does not contain Jupyter or JupyterLab because this interferes with installation +on WholeTale, see Getting Started section below.) -## Contributing -See the [developer documentation](./DEVELOPER.md) for instructions on setting up -a development environment. See the [contribution guide](./CONTRIBUTING.md) to -contribute to this project. +## Getting started / Example notebooks -## Example Notebooks +Please note that you can browse the API reference, developer info, and index +in the [pywatershed docs]((https://pywatershed.readthedocs.io/)). But +*the best way to get started with pywatershed is to dive into the example +notebooks*. For introductory example notebooks, look in the [`examples/`](https://github.com/EC-USGS/pywatershed/tree/main/examples>) directory in the repository. Numbered starting at 00, these are meant to be -completed in order. Non-numbered notebooks coveradditional topics. These -notebooks are note yet covered by testing and so may be expected to have some -issues until they are added to testing. In `examples/developer/` there are -notebooks of interest to developers who may want to learn about running the -software tests. - -Though no notebook outputs are saved in Github, these notebooks can easily -navigated to and run in WholeTale containers (free but sign-up or log-in -required). This is a very easy and quick way to get started without needing to -install pywatershed requirements yourself. WholeTale is an NSF funded project -and supports logins from many institutions, e.g. the USGS, and you may not need -to register. - -There are containers for both the `main` and `develop` branches. +completed in order. Numbered starting at 00, these are meant to be completed +in order. Notebook outputs are not saved in Github. But you can run these +notebooks locally or using WholeTale (an NSF funded project supporting logins +from many institutions, free but sign-up or log-in required) +where the pywatershed environment is all ready to go: [![WholeTale](https://raw.githubusercontent.com/whole-tale/wt-design-docs/master/badges/wholetale-explore.svg)](https://dashboard.wholetale.org) - * [WholeTale container for latest release (main - branch)](https://dashboard.wholetale.org/run/64ae29e8a887f48b9f173678?tab=metadata) - * [WholeTale container for develop - branch](https://dashboard.wholetale.org/run/64ae25c3a887f48b9f1735c8?tab=metadata) + * [Run latest release in WholeTale](https://dashboard.wholetale.org/run/64ae29e8a887f48b9f173678?tab=metadata) + * [Run the develop branch in WholeTale](https://dashboard.wholetale.org/run/64ae25c3a887f48b9f1735c8?tab=metadata) -WholeTale will give you a jupyter-lab running in the root of this +WholeTale will give you a JupyterLab running in the root of this repository. You can navigate to `examples/` and then open and run the notebooks of your choice. The develop container may require the user to update the repository (`git pull origin`) to stay current with development. -## Overview of Repository Contents +Non-numbered notebooks in `examples/` cover additional topics. These +notebooks are not yet covered by testing and you may encounter some +issues. In `examples/developer/` there are notebooks of interest to +developers who may want to learn about running the software tests. -The contents of directories at this level is described. Therein you may discover -another README.md for more information. -``` -.github/: Github actions, scripts and Python environments for continuous integration (CI) and releasing, -asv_benchmarks/: preformance benchmarking by ASV -autotest/: pywatershed package testing using pytest -autotest_exs/: pywatershed example notebook testing using pytest -bin/:PRMS executables distributed -doc/:Package/code documentation source code -evaluation/: tools for evaluation of pywatershed -examples/:How to use the package, mostly jupyter notebooks -prms_src/:PRMS source used for generating executables in bin/ -pywatershed/:Package source -reference/:Ancillary materials for development -resources/:Static stuff like images -test_data/:Data used for automated testing -``` +## Community engagement + +We value your feedback! Please use [discussions](https://github.com/EC-USGS/pywatershed/discussions) +or [issues](https://github.com/EC-USGS/pywatershed/issues) on Github. +For more in-depth contributions, please start by reading over +the pywatershed +[DEVELOPER.md](https://github.com/EC-USGS/pywatershed/blob/develop/DEVELOPER.md) and +[CONTRIBUTING.md](https://github.com/EC-USGS/pywatershed/blob/develop/CONTRIBUTING.md) +guidelines. + +Thank you for your interest. + ## Disclaimer diff --git a/autotest/test_control.py b/autotest/test_control.py index 1515a6f3..0eff4f48 100644 --- a/autotest/test_control.py +++ b/autotest/test_control.py @@ -51,7 +51,7 @@ def test_control_simple(control_simple): assert control_simple.time_step == ts assert control_simple.start_time == time_dict["start_time"] assert control_simple.end_time == time_dict["end_time"] - assert control_simple.current_time is None + assert control_simple.current_time == control_simple.init_time assert control_simple.itime_step == -1 prev_time = control_simple.current_time n_times = control_simple.n_times diff --git a/autotest/test_netcdf_output.py b/autotest/test_netcdf_output.py index 3f00a6ad..19e7fb2d 100644 --- a/autotest/test_netcdf_output.py +++ b/autotest/test_netcdf_output.py @@ -32,6 +32,7 @@ def control(domain): control.edit_n_time_steps(n_time_steps) control.options["budget_type"] = "error" del control.options["netcdf_output_var_names"] + del control.options["netcdf_output_dir"] return control @@ -40,6 +41,15 @@ def control(domain): # optional variables to budgets check_vars = { + "PRMSSolarGeometry": [ + "soltab_horad_potsw", + "soltab_potsw", + ], + "PRMSAtmosphere": [ + "tminf", + "potet", + "swrad", + ], "PRMSCanopy": [ "hru_intcpstor", "hru_intcpstor_change", @@ -65,7 +75,24 @@ def control(domain): def test_process_budgets(domain, control, params, tmp_path, budget_sum_param): tmp_dir = pl.Path(tmp_path) # print(tmp_dir) - model_procs = [pywatershed.PRMSCanopy, pywatershed.PRMSChannel] + model_procs = [ + pywatershed.PRMSSolarGeometry, + pywatershed.PRMSAtmosphere, + pywatershed.PRMSCanopy, + pywatershed.PRMSChannel, + ] + + # setup input_dir with symlinked prms inputs and outputs + domain_output_dir = domain["prms_output_dir"] + input_dir = tmp_path / "input" + input_dir.mkdir() + control.options["input_dir"] = input_dir + + # Could limit this to just the variables in model_procs + for ff in domain_output_dir.resolve().glob("*.nc"): + shutil.copy(ff, input_dir / ff.name) + for ff in domain_output_dir.parent.resolve().glob("*.nc"): + shutil.copy(ff, input_dir / ff.name) # Deal with parameter around what budget sum vars to write and check if budget_sum_param == "some": @@ -80,9 +107,7 @@ def test_process_budgets(domain, control, params, tmp_path, budget_sum_param): else: raise ValueError("upexpected value") - # dont need any PRMS inputs for the model specified, so this is sufficient - input_dir = domain["prms_output_dir"] - control.options["input_dir"] = input_dir + control.options["netcdf_output_dir"] = tmp_dir # TODO: Eliminate potet and other variables from being used model = Model( @@ -91,27 +116,26 @@ def test_process_budgets(domain, control, params, tmp_path, budget_sum_param): parameters=params, ) + # we are going to harvest the data from memory and store here check_dict = {proc: {} for proc in check_vars.keys()} # test outputting specific vars by only using check_vars output_vars = [ item for sublist in list(check_vars.values()) for item in sublist ] - output_vars = None - with pytest.warns(UserWarning): + with pytest.raises(ValueError): model.initialize_netcdf( - tmp_dir, + pl.Path("foo"), budget_args=budget_args, output_vars=output_vars, ) - with pytest.raises(RuntimeError): - model.initialize_netcdf( - tmp_dir, - budget_args=budget_args, - output_vars=output_vars, - ) + model.initialize_netcdf( + output_dir=tmp_dir, # should allow a matching argument to control + budget_args=budget_args, + output_vars=output_vars, + ) for tt in range(n_time_steps): model.advance() @@ -122,11 +146,24 @@ def test_process_budgets(domain, control, params, tmp_path, budget_sum_param): for vv in pp_vars: if tt == 0: # use the output data to figure out the shape - check_dict[pp][vv] = np.zeros( - (n_time_steps, model.processes[pp][vv].shape[0]) - ) + if isinstance( + model.processes[pp][vv], pywatershed.TimeseriesArray + ): + spatial_len = model.processes[pp][vv].data.shape[1] + else: + spatial_len = model.processes[pp][vv].shape[0] - check_dict[pp][vv][tt, :] = model.processes[pp][vv] + check_dict[pp][vv] = np.zeros((n_time_steps, spatial_len)) + + if isinstance( + model.processes[pp][vv], pywatershed.TimeseriesArray + ): + check_dict[pp][vv][tt, :] = model.processes[pp][vv].current + else: + check_dict[pp][vv][tt, :] = model.processes[pp][vv] + + if pp in ["PRMSSolarGeometry", "PRMSAtmosphere"]: + continue for bb in check_budget_sum_vars: if tt == 0: @@ -148,7 +185,15 @@ def test_process_budgets(domain, control, params, tmp_path, budget_sum_param): for pp, pp_vars in check_vars.items(): for vv in pp_vars: nc_data = xr.open_dataset(tmp_dir / f"{vv}.nc")[vv] - assert np.allclose(check_dict[pp][vv], nc_data) + if vv in pywatershed.PRMSSolarGeometry.get_variables(): + assert np.allclose( + check_dict[pp][vv], nc_data[0:n_time_steps, :] + ) + else: + assert np.allclose(check_dict[pp][vv], nc_data) + + if pp in ["PRMSSolarGeometry", "PRMSAtmosphere"]: + continue for bb in check_budget_sum_vars: nc_data = xr.open_dataset(tmp_dir / f"{pp}_budget.nc")[bb] @@ -194,14 +239,12 @@ def test_separate_together_var_list( ] # setup input_dir with symlinked prms inputs and outputs - test_output_dir = tmp_dir / "test_results" domain_output_dir = domain["prms_output_dir"] input_dir = tmp_path / "input" input_dir.mkdir() control.options["input_dir"] = input_dir control.options["netcdf_output_var_names"] = output_vars control.options["netcdf_output_separate_files"] = separate - del control.options["netcdf_output_dir"] # Could limit this to just the variables in model_procs for ff in domain_output_dir.resolve().glob("*.nc"): @@ -218,6 +261,7 @@ def test_separate_together_var_list( # passing no output_dir arg and none in opts throws an error model.initialize_netcdf() + test_output_dir = tmp_dir / "test_results" control.options["netcdf_output_dir"] = test_output_dir model = Model( model_procs, @@ -257,7 +301,10 @@ def test_separate_together_var_list( assert nc_file.exists() ds = xr.open_dataset(nc_file, decode_timedelta=False) - proc_vars = set(proc.get_variables()) + if output_vars is None: + proc_vars = set(proc.get_variables()) + else: + proc_vars = set(check_vars[proc_key]) nc_vars = set(ds.data_vars) assert proc_vars == nc_vars for vv in proc.variables: diff --git a/autotest/test_nhm_self_drive.py b/autotest/test_nhm_self_drive.py index cd7b006d..8cafd91d 100644 --- a/autotest/test_nhm_self_drive.py +++ b/autotest/test_nhm_self_drive.py @@ -42,14 +42,15 @@ def test_drive_indiv_process(domain, tmp_path): control.options["calc_method"] = "numba" control.options["input_dir"] = domain["prms_run_dir"] del control.options["netcdf_output_var_names"] + del control.options["netcdf_output_dir"] nhm = pws.Model( nhm_processes, control=control, parameters=params, ) - with pytest.warns(UserWarning): - nhm.initialize_netcdf(output_dir=nhm_output_dir) + + nhm.initialize_netcdf(output_dir=nhm_output_dir) nhm.run(finalize=True) del nhm, params, control diff --git a/autotest/test_prms_atmosphere.py b/autotest/test_prms_atmosphere.py index 5b7b4e88..7394517e 100644 --- a/autotest/test_prms_atmosphere.py +++ b/autotest/test_prms_atmosphere.py @@ -18,7 +18,9 @@ @pytest.fixture(scope="function") def control(domain): - return Control.load_prms(domain["control_file"], warn_unused_options=False) + ctl = Control.load_prms(domain["control_file"], warn_unused_options=False) + del ctl.options["netcdf_output_dir"] + return ctl @pytest.fixture(scope="function") @@ -57,9 +59,10 @@ def test_compare_prms(domain, control, discretization, parameters, tmp_path): discretization=discretization, parameters=parameters, **input_variables, - netcdf_output_dir=tmp_path, ) + atm.initialize_netcdf(output_dir=tmp_path) + if do_compare_in_memory: answers = {} for var in comparison_var_names: diff --git a/autotest/test_prms_canopy.py b/autotest/test_prms_canopy.py index 57b95bf6..e43ad118 100644 --- a/autotest/test_prms_canopy.py +++ b/autotest/test_prms_canopy.py @@ -19,7 +19,10 @@ @pytest.fixture(scope="function") def control(domain): - return Control.load_prms(domain["control_file"], warn_unused_options=False) + ctl = Control.load_prms(domain["control_file"], warn_unused_options=False) + del ctl.options["netcdf_output_dir"] + del ctl.options["netcdf_output_var_names"] + return ctl @pytest.fixture(scope="function") diff --git a/autotest/test_prms_channel.py b/autotest/test_prms_channel.py index 6b6abef7..c8ce3c34 100644 --- a/autotest/test_prms_channel.py +++ b/autotest/test_prms_channel.py @@ -20,7 +20,10 @@ @pytest.fixture(scope="function") def control(domain): - return Control.load_prms(domain["control_file"], warn_unused_options=False) + ctl = Control.load_prms(domain["control_file"], warn_unused_options=False) + del ctl.options["netcdf_output_dir"] + del ctl.options["netcdf_output_var_names"] + return ctl @pytest.fixture(scope="function") diff --git a/autotest/test_prms_solar_geom.py b/autotest/test_prms_solar_geom.py index 615285b2..c4c6b70b 100644 --- a/autotest/test_prms_solar_geom.py +++ b/autotest/test_prms_solar_geom.py @@ -18,7 +18,9 @@ @pytest.fixture(scope="function") def control(domain): - return Control.load_prms(domain["control_file"], warn_unused_options=False) + ctl = Control.load_prms(domain["control_file"], warn_unused_options=False) + del ctl.options["netcdf_output_dir"] + return ctl @pytest.fixture(scope="function") @@ -57,9 +59,10 @@ def test_compare_prms( discretization=discretization, parameters=parameters, from_prms_file=from_prms_file, - netcdf_output_dir=tmp_path, ) + solar_geom.initialize_netcdf(output_dir=tmp_path) + if do_compare_in_memory: answers = {} for var in PRMSSolarGeometry.get_variables(): diff --git a/doc/api.rst b/doc/api.rst index 0166862f..ce10115d 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1,14 +1,22 @@ .. currentmodule:: pywatershed ################ -Full API Listing +API Summary ################ +.. include:: api/meta.rst + +.. include:: api/control.rst + +.. include:: api/parameters.rst + +.. include:: api/adapter.rst + .. include:: api/atmosphere.rst .. include:: api/hydrology.rst -.. include:: api/parameters.rst +.. include:: api/model.rst .. include:: api/base.rst diff --git a/doc/api/adapter.rst b/doc/api/adapter.rst new file mode 100644 index 00000000..138e9265 --- /dev/null +++ b/doc/api/adapter.rst @@ -0,0 +1,15 @@ +.. currentmodule:: pywatershed + + +----------- +adapter +----------- + +The adapter module adapts various inputs to a common interface for pywatershed inputs. + +.. autosummary:: + :toctree: generated/ + + adapter_factory + Adapter + AdapterNetcdf diff --git a/doc/api/atmosphere.rst b/doc/api/atmosphere.rst index 42460399..8116ded2 100644 --- a/doc/api/atmosphere.rst +++ b/doc/api/atmosphere.rst @@ -1,9 +1,9 @@ .. currentmodule:: pywatershed -########## +---------- Atmosphere -########## +---------- Atmospheric process models. diff --git a/doc/api/base.rst b/doc/api/base.rst index 7507fc94..d1cdb41c 100644 --- a/doc/api/base.rst +++ b/doc/api/base.rst @@ -1,8 +1,8 @@ .. currentmodule:: pywatershed -########## -Base -########## +------------- +Base Classes +------------- Base classes for the modeling system. @@ -10,11 +10,7 @@ Base classes for the modeling system. :toctree: generated/ base.Accessor - Adapter - Control base.DatasetDict - meta base.Budget base.Process base.ConservativeProcess - Model diff --git a/doc/api/control.rst b/doc/api/control.rst new file mode 100644 index 00000000..8ed305a7 --- /dev/null +++ b/doc/api/control.rst @@ -0,0 +1,10 @@ +.. currentmodule:: pywatershed + +---------- +Control +---------- + +.. autosummary:: + :toctree: generated/ + + Control diff --git a/doc/api/hydrology.rst b/doc/api/hydrology.rst index d2602563..fe62ef07 100644 --- a/doc/api/hydrology.rst +++ b/doc/api/hydrology.rst @@ -1,9 +1,9 @@ .. currentmodule:: pywatershed -########## +---------- Hydrology -########## +---------- Hydrologic model components. diff --git a/doc/api/meta.rst b/doc/api/meta.rst new file mode 100644 index 00000000..cfaa6bd9 --- /dev/null +++ b/doc/api/meta.rst @@ -0,0 +1,13 @@ +.. currentmodule:: pywatershed + + +----------- +metadata +----------- + +The metadata module provides higher-level information on all aspects of pywatershed. + +.. autosummary:: + :toctree: generated/ + + meta diff --git a/doc/api/model.rst b/doc/api/model.rst new file mode 100644 index 00000000..7be7f4c4 --- /dev/null +++ b/doc/api/model.rst @@ -0,0 +1,10 @@ +.. currentmodule:: pywatershed + +---------- +Model +---------- + +.. autosummary:: + :toctree: generated/ + + Model diff --git a/doc/api/parameters.rst b/doc/api/parameters.rst index a0a26d8c..aafcd74e 100644 --- a/doc/api/parameters.rst +++ b/doc/api/parameters.rst @@ -1,9 +1,9 @@ .. currentmodule:: pywatershed -########### +----------- Parameters -########### +----------- Parameter classes. diff --git a/doc/api/utils.rst b/doc/api/utils.rst index ad0d5b08..88727983 100644 --- a/doc/api/utils.rst +++ b/doc/api/utils.rst @@ -1,9 +1,9 @@ .. currentmodule:: pywatershed -########## +---------- Utils -########## +---------- .. autosummary:: :toctree: generated/ diff --git a/doc/conf.py b/doc/conf.py index efce9f6e..0723dd0f 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -16,6 +16,8 @@ from datetime import datetime sys.path.insert(0, os.path.abspath("../")) +sys.path.insert(0, os.path.abspath("../pywatershed")) + import sphinx_autosummary_accessors # noqa @@ -33,11 +35,14 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. +# For sphinx-autodoc-typehints compat with sphinx.ext.napoleon see +# https://github.com/tox-dev/sphinx-autodoc-typehints/issues/15 extensions = [ "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.viewcode", "sphinx.ext.napoleon", + "sphinx_autodoc_typehints", # "nbsphinx", "sphinx_autosummary_accessors", "sphinx.ext.intersphinx", @@ -56,24 +61,25 @@ autosummary_generate = True -autodoc_typehints = "none" + +# autodoc_typehints = "none" # autosummary_imported_members = True -# autodoc_default_options = { -# # "members": False, -# # # "imported-members": True, -# # # "inherited-members": True, -# # # "undoc-members": True, -# # # "private-members": True, # -# "special-members": "", -# "exclude-members": "__init__", -# } +autodoc_default_options = { + "members": True, + # "imported-members": True, + "inherited-members": True, + "undoc-members": True, + "private-members": False, # + # "special-members": "", + "exclude-members": "__init__", +} # Napoleon configurations napoleon_google_docstring = True napoleon_numpy_docstring = True -napoleon_use_param = False +napoleon_use_param = True napoleon_use_ivar = True napoleon_use_rtype = False napoleon_preprocess_types = True @@ -133,3 +139,13 @@ # show_navbar_depth=1, # show_toc_level=1, ) + +intersphinx_mapping = { + "python": ("https://docs.python.org/3/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), + "numpy": ("https://numpy.org/doc/stable", None), + "numba": ("https://numba.readthedocs.io/en/stable/", None), + "matplotlib": ("https://matplotlib.org/stable/", None), + "cftime": ("https://unidata.github.io/cftime", None), + "xarray": ("https://docs.xarray.dev/en/stable/", None), +} diff --git a/doc/index.rst b/doc/index.rst index 0ef4a6d8..7c5f941b 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,47 +1,138 @@ .. currentmodule:: pywatershed #################################################### -pywatershed: a hydrologic model in Python +pywatershed: A hydrologic model in Python #################################################### Welcome to the `pywatershed` docs! -To learn more about this project, see `About `_. - -Please browse the API reference, developer info, and -index using the table of contents on the left. - -| For introductory example notebooks, look in the `examples/ `_ directory in the repository. Numbered starting at 00, these are meant to be completed in order. Though no notebook outputs are saved in Github, these notebooks can be easily found and run in WholeTale containers (free but sign-up or log-in required): +Pywatershed is Python package for simulating hydrologic processes motivated by +the need to modernize important, legacy hydrologic models at the USGS, +particularly the +`Precipitation-Runoff Modeling System `__ +(PRMS, Markstrom et al., 2015) and its role in +`GSFLOW `__ +(Markstrom et al., 2008). +The goal of modernization is to make these legacy models more flexible as process +representations, to support testing of alternative hydrologic process +conceptualizations, and to facilitate the incorporation of cutting edge +modeling techniques and data sources. Pywatershed is a place for experimentation +with software design, process representation, and data fusion in the context +of well-established hydrologic process modeling. + +The Python language was choosen because it is accessible to a wide audience of +potential contributors which will help foster community development and +experimentation. A large number of advanced libraries available for Python can +also be applied to hdyrologic modeling, including libraries for parallelism, data +access and manipulation, and machine learning. + +Following the conceptual design of PRMS, pywatershed calculates explicit solutions +of spatially distributed hydrologic process representations including evaporation, +transpiration, runoff, infiltration, interflow, snowpack, soil moisture, conceptual +groundwater storage, and channel flow. These process representations simulate hydrologic +response and water budgets given inputs of spatially distributed weather variables and +land use change at temporal scales ranging from days to centuries. + +Pywatershed enhances PRMS with a new software design that is object-oriented and highly +flexible, allowing users to easily run "sub-models", replace process representations, and +incorporate new data. There are base classes which manage mass and energy conservation +and the implementation of concrete process classes follows a self-describing design +which allows for a Model class to connect hydrologic process classes based on their +descriptions of themselves. A variety of input data sources is managed by the +Adapter class which implements subclasses for different sources. The design of +pywatershed is documented in these docs and also demonstrated by numbered Jupyter +Notebooks in the `examples/` directory. + +The flexible structure of pywatershed helps it to couple with other hydrologic +models. We can easily one-way couple pywatershed to +`MODFLOW 6 `__ +(MF6, Hughes et al., 2017) +via its +`XMI interface `__ +(Hughes et al., 2022). +We are working towards a two-way, tight coupling with MF6 to reproduce GSFLOW. Our goal is +support integrated hydrologic process modeling of surface water and groundwater in a +sustainable manner that allows individual software components to evolve independently. + + +========================= +Current version: 1.0.0 +========================= +With pywatershed version 1.0.0, we have faithfully reproduced the PRMS process representations used in +the USGS `National Hydrolgical Model `__ (NHM, Regan et al., +2018). For more information on version 1.0.0 see the +`release notes `_ +and the `extended release notes `_ +for version 1.0.0. + +============================================ +Upcoming development in 2024 +============================================ +The broad goal is to reproduce GSFLOW coupling using the MODFLOW 6 API. This will include +gridded configurations and cascading flows. +We are also working on reservoir representations. + +================= +Getting started +================= +Please note that you can browse the API reference, developer info, and index +using the table of contents on the left. But *the best way to get started +with pywatershed is to dive into the example notebooks*. + +| For introductory example notebooks, look in the `examples/ `_ directory in the repository. Numbered starting at 00, these are meant to be completed in order. Notebook outputs are not saved in Github. But you can run these notebooks locally or using WholeTale (free but sign-up or log-in required) where the pywatershed environment is all ready to go: .. image:: https://raw.githubusercontent.com/whole-tale/wt-design-docs/master/badges/wholetale-explore.svg :target: https://dashboard.wholetale.org -| `WholeTale container for latest release (main branch) `_ -| `WholeTale container for develop branch `_ +* `Run the latest release in WholeTale `_ +* `Run the develop branch in WholeTale `_ -See `README.md `_ for more details on using WholeTale. +See `README.md `_ for more details +on both `running locally `_ +or `using WholeTale `_. -We value your feedback! To view the repository, suggest an edit to this documentation, or open an issue, please follow the Github Octocat at the top of the page. For more in-depth contributions, please start by reading over the `DEVELOPER.md file `_. +======================== +Community engagement +======================== +We value your feedback! Please use `discussions `_ +or `issues `_ on Github. You may also suggest +edits to this documentation or open an issue by clicking on the Github Octocat at the top of the page. +For more in-depth contributions, please start by reading over +the `DEVELOPER.md file `_. Thank you for your interest. +=========== +References +=========== +* `Hughes, J. D., Langevin, C. D., & Banta, E. R. (2017). Documentation for the MODFLOW 6 framework (No. 6-A57). US Geological Survey. `__ +* `Hughes, J. D., Russcher, M. J., Langevin, C. D., Morway, E. D., & McDonald, R. R. (2022). The MODFLOW Application Programming Interface for simulation control and software interoperability. Environmental Modelling & Software, 148, 105257. `__ +* `Markstrom, S. L., Niswonger, R. G., Regan, R. S., Prudic, D. E., & Barlow, P. M. (2008). GSFLOW-Coupled Ground-water and Surface-water FLOW model based on the integration of the Precipitation-Runoff Modeling System (PRMS) and the Modular Ground-Water Flow Model (MODFLOW-2005). US Geological Survey techniques and methods, 6, 240. `__ +* `Markstrom, S. L., Regan, R. S., Hay, L. E., Viger, R. J., Webb, R. M., Payn, R. A., & LaFontaine, J. H. (2015). PRMS-IV, the precipitation-runoff modeling system, version 4 (No. 6-B7). US Geological Survey. `__ +* `Regan, R. S., Markstrom, S. L., Hay, L. E., Viger, R. J., Norton, P. A., Driscoll, J. M., & LaFontaine, J. H. (2018). Description of the national hydrologic model for use with the precipitation-runoff modeling system (prms) (No. 6-B9). US Geological Survey. `__ + .. toctree:: - :hidden: - :caption: About + :caption: Home - About + self + + .. toctree:: :hidden: :caption: API Reference - API Reference + API Summary + meta + Control + Parameters + adapter Atmosphere Hydrology - Parameters - Base - Utils + Model + Base Classes + Utilities .. toctree:: :maxdepth: 2 diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3f62481c..f85505da 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -12,13 +12,16 @@ What's New np.random.seed(123456) -.. _whats-new.0.3.0: +.. _whats-new.1.0.0: -v0.3.0 (unreleased) +v1.0.0 (unreleased) --------------------- New features ~~~~~~~~~~~~ +- Control object features including (optional) warnings for unused legacy options, and + defined and enforced options. Also to_yaml() and __str__ implementations. + (:pull:`240`) By `James McCreight `_. - Example notebook of how to edit Parameters with associated bug fixes to do so. (:pull:`232`) By `James McCreight `_. - Conda feedstock for pywatershed ``_. @@ -31,7 +34,8 @@ Breaking changes Deprecations ~~~~~~~~~~~~ - +- Deprecation of Control.load() for Control.load_prms(). + (:pull:`240`) By `James McCreight `_. Performance ~~~~~~~~~~~ @@ -39,6 +43,14 @@ Performance Bug fixes ~~~~~~~~~ +- Mass balance fix in PRMS snow for rain on snow followed by evaporation + consuming the entire snow pack. + (:pull:`248`) By `James McCreight `_. +- Fix mass balance issue in PRMSSnow is also present in PRMS, + snow evap is not taken from freeh2o when there is no pk_ice. + (:pull:`236`) By `James McCreight `_. +- Resolve issues with different ways of specifying necdf output options. + (:pull:`230`) By `James McCreight `_. - Resolve issues with different ways of specifiying netcdf output options. (:pull:`230`) By `James McCreight `_. - PRMSSoilzone remove soil_moist_prev because soil_moist is not prognotic and @@ -48,13 +60,30 @@ Bug fixes Documentation ~~~~~~~~~~~~~ +- Implement sphinx_autodoc_typehints. + (:pull:`257`) By `James McCreight `_. +- New gh-pages branch (without history) to publish + `"pywatershed notes" `_ including the + `extended release notes for v1.0.0 `_. + This branch publishes analysis supporting the version 1.0.0 release. - Add about section for version 1.0 to describe how pywatershed matches PRMS' NHM configuration and how to perform the comparison. (:pull:`244`) By `James McCreight `_. - Internal changes ~~~~~~~~~~~~~~~~ +- New system for generating test_data, by calling generate_test_data.py from + `autotest/`. The system helps autotest know if test data were generated + and if they are up to date. + (:pull:`253`) By `James McCreight `_. +- Apply pylint and flake8 everywhere as much as possible. + (:pull:`251`) By `James McCreight `_. +- Remove diagnostic variables pkwater_equiv_change, pkwater_ante + (:pull:`248`) By `James McCreight `_. +- Use v1 instead of main for fortran-lang/setup-fortran. + (:pull:`242`, :pull:`243`) By `Wes Bonelli `_. +- Refactor test data generation to solve race condition for dependent tests. + (:pull:`237`) By `Wes Bonelli `_. - Refactor tests against PRMS for consistency, flexibility, and thoroughness. (:pull:`244`) By `James McCreight `_. diff --git a/environment.yml b/environment.yml index 71d39656..8d0e580b 100644 --- a/environment.yml +++ b/environment.yml @@ -49,5 +49,6 @@ dependencies: - flake8 - git+https://github.com/modflowpy/flopy.git - jupyter_black - - modflow-devtools < 1.0.0 + - modflow-devtools - pylint + - sphinx-autodoc-typehints diff --git a/environment_w_jupyter.yml b/environment_w_jupyter.yml index f2a19af9..8083817f 100644 --- a/environment_w_jupyter.yml +++ b/environment_w_jupyter.yml @@ -54,3 +54,4 @@ dependencies: - jupyter_black - modflow-devtools - pylint + - sphinx-autodoc-typehints diff --git a/examples/00_processes.ipynb b/examples/00_processes.ipynb index 58d17fc8..baaa1033 100644 --- a/examples/00_processes.ipynb +++ b/examples/00_processes.ipynb @@ -70,7 +70,7 @@ "\n", "from helpers import gis_files\n", "\n", - "gis_files.download() # this downloads GIS files" + "gis_files.download() # this downloads GIS files specific to the example notebooks" ] }, { @@ -391,8 +391,7 @@ "outputs": [], "source": [ "var = \"gwres_flow\"\n", - "# There is only one variable per file, so bring this into a xr.DataArray\n", - "var_da = xr.open_dataset(f\"00_processes/{var}.nc\")[var]\n", + "var_da = xr.open_dataarray(f\"00_processes/{var}.nc\")\n", "display(var_da)\n", "var_da.hvplot(groupby=\"nhm_id\")" ] diff --git a/examples/01_multi-process_models.ipynb b/examples/01_multi-process_models.ipynb index f6d003f8..8f5dd6d1 100644 --- a/examples/01_multi-process_models.ipynb +++ b/examples/01_multi-process_models.ipynb @@ -49,8 +49,7 @@ "import hvplot.xarray # noqa\n", "import numpy as np\n", "import pywatershed as pws\n", - "\n", - "# from tqdm.notebook import tqdm\n", + "from pywatershed.utils.path import dict_pl_to_str\n", "import xarray as xr\n", "\n", "from helpers import gis_files\n", @@ -299,7 +298,10 @@ " show_params=show_params,\n", " ).SVG(verbose=True, dpi=48)\n", "except:\n", - " print(\"In some cases, dot fails on Mac ARM machines.\")" + " print(\n", + " \"Dot fails on some machines. You can see the graph at this url:\"\n", + " \"https://private-user-images.githubusercontent.com/12465248/259783743-7813cba0-343c-4bf1-9b15-6ed92930f288.svg?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTEiLCJleHAiOjE3MDI0MjU1MzIsIm5iZiI6MTcwMjQyNTIzMiwicGF0aCI6Ii8xMjQ2NTI0OC8yNTk3ODM3NDMtNzgxM2NiYTAtMzQzYy00YmYxLTliMTUtNmVkOTI5MzBmMjg4LnN2Zz9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFJV05KWUFYNENTVkVINTNBJTJGMjAyMzEyMTIlMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjMxMjEyVDIzNTM1MlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTA1NmJjMTVkZDRlODk2ZDY4MGNlMWQyOGUzYjMzOTY0ODM5YTgwYTE0MzNhYjBmNzRjNDc5MWU0Yjc0YjM5MzQmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0JmFjdG9yX2lkPTAma2V5X2lkPTAmcmVwb19pZD0wIn0.QY2w3UvoE8hFjog03cztTggjZNmUhyYmoHj2ZuGTaGw\"\n", + " )" ] }, { @@ -335,9 +337,9 @@ "metadata": {}, "source": [ "## Model dictionary yaml file\n", - "It may be preferable to have a model dictionary encoded in YAML file in many situations. Let's do that. Necessarily, the contents of the YAML file will look different than above where we had the contents of the model dictionary in memory.\n", + "It may be preferable to have a model dictionary encoded in YAML file in many situations, instead of in Python code. Necessarily, the contents of the YAML file will look different than the code above where we had the contents of the model dictionary in memory.\n", "\n", - "First we'll need to write the control instance as a YAML file. To do that we need a serializable dictionary in Python." + "We'll create a new directory for our YAML-based run. Then we'll write the existing Control instance as a YAML file in that directory." ] }, { @@ -351,7 +353,7 @@ "run_dir.mkdir(exist_ok=True)\n", "control_yaml_file = run_dir / \"control.yaml\"\n", "control_yaml = deepcopy(control)\n", - "control_yaml.options[\"netcdf_output_dir\"] = nb_output_dir / \"nhm_yaml\"\n", + "control_yaml.options[\"netcdf_output_dir\"] = run_dir\n", "control_yaml.to_yaml(control_yaml_file)" ] }, @@ -365,7 +367,7 @@ } }, "source": [ - "We add the option `netcdf_output_dir` to the control since we assume we wont be able to do so at run time. Note that this option and the `input_dir` option are `pathlib.Path` objects. These are not what we want to write to file. We want their string version. We could do `str()` on each one by hand, but it will be more handy to write a small, recursive function to do this on a supplied dictionary since this will be a recurring task with the model dictionary we will create after the control YAML file." + "We add the option `netcdf_output_dir` to the control since we assume we wont be able to do so at run time. Note that this option and the `input_dir` option are `pathlib.Path` objects." ] }, { @@ -378,21 +380,24 @@ "control" ] }, + { + "cell_type": "markdown", + "id": "c56e0f88-1842-406a-94f4-d45a918783dd", + "metadata": {}, + "source": [ + "But the `to_yaml()` method converts these to strings in the YAML file for us:" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "e8e907ed-29e1-44f0-9960-98f70db6a578", + "id": "1ad1007d-6890-4ef9-83ec-6591a589732e", "metadata": {}, "outputs": [], "source": [ - "def dict_pl_to_str(the_dict):\n", - " for key, val in the_dict.items():\n", - " if isinstance(val, dict):\n", - " the_dict[key] = dict_pl_to_str(val)\n", - " elif isinstance(val, pl.Path):\n", - " the_dict[key] = str(val)\n", - "\n", - " return the_dict" + "with control_yaml_file.open(\"r\") as file:\n", + " for line in file:\n", + " print(line[0:-1]) # drop the line break to condense" ] }, { @@ -452,7 +457,7 @@ "id": "d1162367-7898-4a6a-bf05-3cf79fcfa99a", "metadata": {}, "source": [ - "Before looking at it, we'll convert `Path` objects to strings:" + "Before looking at it, we'll convert `Path` objects to strings using a utility in pywatershed, `dict_pl_to_str`: " ] }, { @@ -493,7 +498,7 @@ "id": "4230eeac-3feb-4534-896f-442cc9e85451", "metadata": {}, "source": [ - "We'll use a little magics to directly examine the written YAML files" + "Examine the written model dictionary YAML file:" ] }, { @@ -503,17 +508,9 @@ "metadata": {}, "outputs": [], "source": [ - "! cat 01_multi-process_models/nhm_yaml/control.yaml" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8901961b-527a-419d-82fc-a3cb16af04ec", - "metadata": {}, - "outputs": [], - "source": [ - "! cat 01_multi-process_models/nhm_yaml/model_dict.yaml" + "with model_dict_yaml_file.open(\"r\") as file:\n", + " for line in file:\n", + " print(line[0:-1]) # drop the line break to condense" ] }, { @@ -551,7 +548,10 @@ " show_params=show_params,\n", " ).SVG(verbose=True, dpi=48)\n", "except:\n", - " print(\"In some cases, dot fails on Mac ARM machines.\")" + " print(\n", + " \"Dot fails on some machines. You can see the graph at this url:\"\n", + " \"https://private-user-images.githubusercontent.com/12465248/259783743-7813cba0-343c-4bf1-9b15-6ed92930f288.svg?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTEiLCJleHAiOjE3MDI0MjU1MzIsIm5iZiI6MTcwMjQyNTIzMiwicGF0aCI6Ii8xMjQ2NTI0OC8yNTk3ODM3NDMtNzgxM2NiYTAtMzQzYy00YmYxLTliMTUtNmVkOTI5MzBmMjg4LnN2Zz9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFJV05KWUFYNENTVkVINTNBJTJGMjAyMzEyMTIlMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjMxMjEyVDIzNTM1MlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTA1NmJjMTVkZDRlODk2ZDY4MGNlMWQyOGUzYjMzOTY0ODM5YTgwYTE0MzNhYjBmNzRjNDc5MWU0Yjc0YjM5MzQmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0JmFjdG9yX2lkPTAma2V5X2lkPTAmcmVwb19pZD0wIn0.QY2w3UvoE8hFjog03cztTggjZNmUhyYmoHj2ZuGTaGw\"\n", + " )" ] }, { @@ -605,16 +605,6 @@ ")" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "310a8d35-5ddb-4ef1-b4a1-65a823d4aa7c", - "metadata": {}, - "outputs": [], - "source": [ - "set([ff.name for ff in yaml_files]) - set([ff.name for ff in mem_files])" - ] - }, { "cell_type": "markdown", "id": "1024fe62-ac86-4171-8c45-018f1ac35902", @@ -700,6 +690,28 @@ ")" ] }, + { + "cell_type": "markdown", + "id": "c97d3fff-4a83-4a4e-a22f-ed5da54412cd", + "metadata": {}, + "source": [ + "We can also make a spatial plot of the streamflow at the final time. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50bc3067-9f56-4483-9ce6-d6fe8b1e0f09", + "metadata": {}, + "outputs": [], + "source": [ + "proc_plot = pws.analysis.ProcessPlot(gis_files.gis_dir / \"drb_2yr\")\n", + "proc_name = \"prms_channel\"\n", + "var_name = \"seg_outflow\"\n", + "proc = model_yaml.processes[proc_name]\n", + "display(proc_plot.plot(var_name, proc))" + ] + }, { "cell_type": "markdown", "id": "2ae49ade-3c8b-4fe8-af5a-9497f856151b", @@ -789,7 +801,7 @@ "id": "2a275786-239b-4f8a-9643-e65ad6ef9dc8", "metadata": {}, "source": [ - "And finally we instantiate the submodel from the model dictionary yaml file. " + "And finally we instantiate the reduced output from the model dictionary yaml file. " ] }, { @@ -799,8 +811,8 @@ "metadata": {}, "outputs": [], "source": [ - "submodel = pws.Model.from_yaml(model_dict_yaml_file)\n", - "submodel" + "model_less_output = pws.Model.from_yaml(model_dict_yaml_file)\n", + "model_less_output" ] }, { @@ -811,7 +823,7 @@ "outputs": [], "source": [ "%%time\n", - "submodel.run(finalize=True)" + "model_less_output.run(finalize=True)" ] }, { @@ -833,7 +845,7 @@ "metadata": {}, "source": [ "## NHM Submodel for the Delaware River Basin \n", - "In many cases, running the full NHM model may not be necessary and it may be advantageous to just run some of the processes in it. Suppose you wanted to change parameters or model process representation in the PRMSSoilzone to better predict streamflow. As the model is 1-way coupled, you can simply run a submodel starting with PRMSSoilzone and running through PRMSChannel." + "In many cases, running the full NHM model may not be necessary and it may be advantageous to just run some of the processes in it. Pywatershed gives you this flexibility. Suppose you wanted to change parameters or model process representation in the PRMSSoilzone to better predict streamflow. As the model is 1-way coupled, you can simply run a submodel starting with PRMSSoilzone and running through PRMSChannel." ] }, { @@ -916,7 +928,7 @@ "id": "6c789efb-b982-48d2-91cd-c555820fccb3", "metadata": {}, "source": [ - "And where will we get these input files? If you pay close attention, you'll notice that these files do not come with the repository. Instead they are generated when we ran the full NHM model above." + "And where will we get these input files? You'll notice that these files do not come with the repository. Instead they are generated when we ran the full NHM model above." ] }, { @@ -961,8 +973,9 @@ "run_dir = pl.Path(nb_output_dir / \"nhm_sub\").resolve()\n", "run_dir.mkdir(exist_ok=True)\n", "\n", - "# key that inputs exist from previous full-model run\n", + "\n", "control_cp = deepcopy(control)\n", + "# It is key that inputs exist from previous full-model run\n", "control_cp.options[\"input_dir\"] = yaml_output_dir.resolve()\n", "control_cp.options[\"netcdf_output_dir\"] = run_dir.resolve()\n", "control_yaml_file = run_dir / \"control.yaml\"\n", @@ -993,7 +1006,6 @@ " if isinstance(model_dict[kk], dict) and kk not in keep_procs:\n", " del model_dict[kk]\n", "\n", - "\n", "pprint(model_dict, sort_dicts=False)" ] }, @@ -1064,7 +1076,10 @@ " show_params=show_params,\n", " ).SVG(verbose=True, dpi=48)\n", "except:\n", - " print(\"In some cases, dot fails on Mac ARM machines.\")" + " print(\n", + " \"Dot fails on some machines. You can see the graph at this url:\"\n", + " \"https://private-user-images.githubusercontent.com/12465248/259783766-e0902e7f-ac69-488c-8719-858315d45e7c.svg?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTEiLCJleHAiOjE3MDI0MjU1MzIsIm5iZiI6MTcwMjQyNTIzMiwicGF0aCI6Ii8xMjQ2NTI0OC8yNTk3ODM3NjYtZTA5MDJlN2YtYWM2OS00ODhjLTg3MTktODU4MzE1ZDQ1ZTdjLnN2Zz9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFJV05KWUFYNENTVkVINTNBJTJGMjAyMzEyMTIlMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjMxMjEyVDIzNTM1MlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPWJlMzE0Zjc0YmNlZDlkY2YwZWE0ZDhkMTBiYTgzZTFhNmNlODFkM2I2NzA0MjViNDdkMDQ1ZjZmZWNhZTk0ZTkmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0JmFjdG9yX2lkPTAma2V5X2lkPTAmcmVwb19pZD0wIn0.HCW8QQr1tobxM0Ln0U0Sw_xNbrOscNnDe_tS0e5uMvk\"\n", + " )" ] }, { @@ -1137,7 +1152,7 @@ "source": [ "First we access the metadata on `recharge` and we see its description, dimension, type, and units. The we look at the dimension names of the PRMSSoilzone process in whith it is found. We see the length of the `nhru` dimension and that this is the only dimension on `recharge`. We also see that `recharge` is a `numpy.ndarray` with data type `float64`.\n", "\n", - "So recharge only has spatial dimension. It is written to file with each timestep (or periodically). However, the last timestep is still in memory (even though we've finalized the run) and we can visualize it. This time the data are on the unstructured/polygon grid of Hydrologic Response Units (HRUs) instead of the streamflow network plotted above." + "So recharge only has spatial dimension. It is written to file with each timestep (or periodically). However, the last timestep is still in memory (even though we've finalized the run) and we can visualize it. The data are on the unstructured/polygon grid of Hydrologic Response Units (HRUs), we'll visualize the spatial distribution at this final time." ] }, { @@ -1172,8 +1187,8 @@ "outputs": [], "source": [ "var = \"recharge\"\n", - "nhm_ds = xr.open_dataset(yaml_output_dir / f\"{var}.nc\")\n", - "sub_ds = xr.open_dataset(run_dir / f\"{var}.nc\")" + "nhm_da = xr.open_dataarray(yaml_output_dir / f\"{var}.nc\")\n", + "sub_da = xr.open_dataarray(run_dir / f\"{var}.nc\")" ] }, { @@ -1183,8 +1198,8 @@ "metadata": {}, "outputs": [], "source": [ - "display(nhm_ds)\n", - "display(sub_ds)" + "display(nhm_da)\n", + "display(sub_da)" ] }, { @@ -1192,9 +1207,7 @@ "id": "4f317363-c765-4dda-a972-92f5265abd47", "metadata": {}, "source": [ - "If you expand the metadata (the little folded page icon on the right side of the recharge line), you get a variable description, dimension, type, and units. \n", - "\n", - "Now compare all output variables common to both runs, asserting that the two runs gave equal output." + "Now we can compare all output variables common to both runs, asserting that the two runs gave equal output." ] }, { @@ -1205,8 +1218,8 @@ "outputs": [], "source": [ "for var in submodel_variables:\n", - " nhm_da = xr.open_dataset(yaml_output_dir / f\"{var}.nc\")[var]\n", - " sub_da = xr.open_dataset(run_dir / f\"{var}.nc\")[var]\n", + " nhm_da = xr.open_dataarray(yaml_output_dir / f\"{var}.nc\")\n", + " sub_da = xr.open_dataarray(run_dir / f\"{var}.nc\")\n", " xr.testing.assert_equal(nhm_da, sub_da)" ] }, @@ -1218,8 +1231,8 @@ "outputs": [], "source": [ "# var_name = \"dprst_seep_hru\"\n", - "nhm_da = xr.open_dataset(yaml_output_dir / f\"{var_name}.nc\")[var_name]\n", - "sub_da = xr.open_dataset(run_dir / f\"{var_name}.nc\")[var_name]\n", + "nhm_da = xr.open_dataarray(yaml_output_dir / f\"{var_name}.nc\")\n", + "sub_da = xr.open_dataarray(run_dir / f\"{var_name}.nc\")\n", "scat = xr.merge(\n", " [nhm_da.rename(f\"{var_name}_yaml\"), sub_da.rename(f\"{var_name}_subset\")]\n", ")\n", diff --git a/examples/02_prms_legacy_models.ipynb b/examples/02_prms_legacy_models.ipynb index 313a8a29..988153e9 100644 --- a/examples/02_prms_legacy_models.ipynb +++ b/examples/02_prms_legacy_models.ipynb @@ -12,9 +12,9 @@ "source": [ "# Multi-process models in pywatershed: PRMS-legacy instantiation\n", "\n", - "Because `pywatershed` has its roots in the Precipitation-Runoff Modeling System (PRMS, Regan et al., 2015), `pywatershed` supports PRMS-model instantation from legacy PRMS input files. The traditional PRMS input files are the control file, the parameter file, and the climate-by-hru (CBH) files (typically daily precipitation and maximum and minimum temperatures). While the CBH files need to be pre-processed to NetCDF format, native PRMS control and parameter files are supported. \n", + "Because pywatershed has its roots in the Precipitation-Runoff Modeling System (PRMS, Regan et al., 2015), pywatershed supports PRMS-model instantation from legacy PRMS input files. The traditional PRMS input files are the control file, the parameter file, and the climate-by-hru (CBH) files (typically daily precipitation and maximum and minimum temperatures). While the CBH files need to be pre-processed to NetCDF format, native PRMS control and parameter files are supported. \n", "\n", - "Below we'll show how to preprocess the CBH files to NetCDF and how to instantiate a `Model` using PRMS-native files. In this notebook we'll reproduce the basic results from the previous notebook (01) for the NHM full model and its submodel. As in the previous notebooks, we'll look at running PRMS processes moels on the Delaware River Basin (DRB) subdomain of the NHM for a 2 year period using `pywatershed`.\n", + "Below we'll show how to preprocess the CBH files to NetCDF and how to instantiate a pywatershed `Model` using PRMS-native files. In this notebook we'll reproduce the basic results from the previous notebook (`01_multi-process_models.ipynb`) for the NHM full model and its submodel. As in the previous notebooks, run PRMS processes models on the Delaware River Basin (DRB) subdomain of the NHM for a 2 year period using `pywatershed`.\n", "\n", "## Prerequisites" ] @@ -90,7 +90,8 @@ "id": "b45c9899-3c4d-4cc9-b493-572d31a88426", "metadata": {}, "source": [ - "Since we need to preprocess CBH to netcdf files, let's see how to do it" + "## Preprocess CBH files to NetCDF format\n", + "We need to preprocess CBH to NetCDF files, let's see how to do it! We'll create a directory to hold these files and then we'll process precipitiation (prcp), maximum temperature (tmax), and minimum temperature (tmin) files from CBH to NetCDF using a utility from pywatershed." ] }, { @@ -125,7 +126,7 @@ "source": [ "## An NHM multi-process model: PRMS-legacy instantation\n", "\n", - "The 8 conceptual `Process` classes that comprise the NHM are, in order:" + "The 8 conceptual `Process` classes that comprise the NHM in pywatershed are, in order:" ] }, { @@ -154,7 +155,7 @@ "id": "c460bc54-bba5-4493-9d1f-e2fe6672b186", "metadata": {}, "source": [ - "A multi-process model, comprised of the above `Process`es (see notebook 00), is assembled by the `Model` class. We can take a quick look at the first 22 lines of help on `Model`:" + "A multi-process model, comprised of the above `Process`es (see notebook `00_processes.ipynb`), is assembled by the `Model` class. We can take a quick look at the first 22 lines of help on `Model`:" ] }, { @@ -177,7 +178,7 @@ "source": [ "The `help()` mentions that there are 2 distinct ways of instantiating a `Model` class. In this notebook, we focus on the PRMS-legacy instantiation (see previous notebook for the pywatershed-centric way).\n", "\n", - "With the PRMS-legacy approach, the first argument is a \"process list\", which is what we defined with the 8 NHM classes above. In addition, we'll supply the `control` and `parameters` arguments. The full `help()` describes PRMS-legacy instantation and provides examples. Please use it for reference and more details. Here we'll give an extended concrete example. \n", + "With the PRMS-legacy approach, the first argument is a \"process list\", which is what we defined with the 8 NHM classes above. In addition, we must also supply the `control` and `parameters` arguments. The full `help()` describes PRMS-legacy instantation and provides examples. Please use it for reference and more details. Here we'll give an extended concrete example. \n", "\n", "We already loaded a `PrmsParameters` object from a PRMS-native file when we converted the CBH files. We'll just check that it is an instance/subclass of the `Parameters` class. Then we'll return the object (which invokes the `__repr__` method on the object, just giving information about it) to see what we got." ] @@ -198,7 +199,7 @@ "id": "360ac849-9093-4b34-af70-e5e56988f298", "metadata": {}, "source": [ - "We got a `PrmsParameters` object which is a subclass of `Parameters`. We now load the control file into a `Control` object, " + "We now load the PRMS-native control file into a `Control` object, " ] }, { @@ -210,7 +211,7 @@ "source": [ "with warnings.catch_warnings():\n", " warnings.simplefilter(\"ignore\")\n", - " control = pws.Control.load(domain_dir / \"control.test\")\n", + " control = pws.Control.load_prms(domain_dir / \"control.test\")\n", "\n", "control" ] @@ -225,9 +226,9 @@ } }, "source": [ - "We suppress warnings when loading legacy PRMS parameter control files indicating which options are not being used. \n", + "When loading this PRMS-native parameter control file, we suppress warnings that indicating which PRMS options are not being used by pywatershed. For complete discussion of these see the help on `Control.load_prms()` in the documentation. The documentation covers what we also see in the above output, that the `netcdf_output_var_names` in `control.options` is the combination of `nhruOutVar_names` and `nsegmentOutVar_names` from the PRMS-native `control.test` file. The reqested output is different in this example than in the previous notebook, where all variables were output, for this reason. We'll keep all these variables for this model run and reduce the requested output later on in this notebook. However we'll need to add two pywatershed variables to this list to be able to run the sub-model below. \n", "\n", - "Now we'll edit this control object. First we'll reduce the total simulation time to six months for the purposes of this demonstration (but feel free to increase this to the full 2 years available, if you like). Next we'll specify several global options, including the location of the atmospheric forcing/input data, the budget type, and the calculation method." + "Now we'll edit this control instance. First, we'll add the additional two output variables necessary to provide boundary conditions to the sub-model below. Next, we'll reduce the total simulation time to six months for the purposes of this demonstration (but feel free to increase this to the full 2 years available, if you like). Then, we'll specify several global options, including the location of the atmospheric forcing/input data, the budget type, and the calculation method." ] }, { @@ -242,6 +243,7 @@ }, "outputs": [], "source": [ + "control.options[\"netcdf_output_var_names\"] += [\"infil_hru\", \"sroff_vol\"]\n", "control.edit_end_time(np.datetime64(\"1979-07-01T00:00:00\"))\n", "run_dir = nb_output_dir / \"nhm\"\n", "if run_dir.exists():\n", @@ -254,44 +256,6 @@ "}" ] }, - { - "cell_type": "markdown", - "id": "ed82f8d1-8bfc-469e-a968-f86e029c7a5f", - "metadata": {}, - "source": [ - "We note that the `netcdf_output_var_names` in `control.options` is the combination of `nhruOutVar_names` and `nsegmentOutVar_names` from the PRMS-native `control.test` file. In the next section we'll customize this list of variables names, but here we list what we'll output with our current simulation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e28f2df1-df17-451f-87ed-5d8d1e9d8b7e", - "metadata": {}, - "outputs": [], - "source": [ - "control.options[\"netcdf_output_var_names\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d35aa7f6-0d3a-425f-8db9-1b846675cc5c", - "metadata": {}, - "outputs": [], - "source": [ - "submodel_input_names = pws.Model(\n", - " submodel_processes,\n", - " control=control,\n", - " parameters=params,\n", - " find_input_files=False,\n", - ")._file_input_names\n", - "print(submodel_input_names - set(control.options[\"netcdf_output_var_names\"]))\n", - "control.options[\"netcdf_output_var_names\"] = set(\n", - " control.options[\"netcdf_output_var_names\"] + list(submodel_input_names)\n", - ")\n", - "control.options[\"netcdf_output_var_names\"]" - ] - }, { "cell_type": "markdown", "id": "0b46e9ca-e84b-40b3-bdc5-179fd6c85555", @@ -379,7 +343,7 @@ "source": [ "Above we see the spatial domain, the outline of its extent, the network on which streamflow is calculated, and the final simulated values of streamflow. \n", "\n", - "Let us turn towards the model structure in more detail: how do atmospheric inputs/forcings result in the simulated streamflow above? We will produce the model graph which shows the flow of information from the input files through all the process representations, all the way down to the channel streamflow. First, we print a color legend for each represented process in the NHM. Each process is outlined by a box of this color and values/fluxes flowing from a process have the color of the originating process. Finally, a variable outlined (above and on the sides) participates in the mass budget of its process. This diagram gives some very specific information of the model conceptualization, how the processes relate to each other, and the complexity of the indivdual processes. (Note that the underlying graphviz/dot program that generates the plot is not fully working on Mac ARM/M1, so plots here and below are less detailed if you are are using such a machine, the notebooks in the repo will be complete for your reference.) Each process's data is placed into one of three categories: inputs(blue), parameters(orange), and variables(green). All of this information is public for each process (indeed in static methods) so we can produce these plots programatically without needing to run the `Model`. As discussed in previous notebooks, the `Model` object contains all the information needed to generate the plot when it is initialized." + "Let us turn towards the model structure in more detail: how do atmospheric inputs/forcings result in the simulated streamflow above? We will produce the model graph which shows the flow of information from the input files through all the process representations, all the way down to the channel streamflow. First, we print a color legend for each represented process in the NHM. Each process is outlined by a box of this color and values/fluxes flowing from a process have the color of the originating process. Finally, a variable outlined in blue (above and on the sides) participates in the mass budget of its process. This diagram gives some very specific information of the model conceptualization, how the processes relate to each other, and the complexity of the indivdual processes. (Note that the underlying graphviz/dot program that generates the plot is not fully working on Mac ARM/M1, so plots here and below are less detailed if you are are using such a machine, the notebooks in the repo will be complete for your reference.) Each process's data is placed into one of three categories: inputs(blue), parameters(orange), and variables(green). All of this information is public for each process (indeed in static methods) so we can produce these plots programatically without needing to run the `Model`. As discussed in the previous notebook, the `Model` object contains all the information needed to generate the plot when it is initialized." ] }, { @@ -400,7 +364,10 @@ " show_params=show_params,\n", " ).SVG(verbose=True, dpi=48)\n", "except:\n", - " print(\"In some cases, dot fails on Mac ARM machines.\")" + " print(\n", + " \"Dot fails on some machines. You can see the graph at this url:\"\n", + " \"https://private-user-images.githubusercontent.com/12465248/259783743-7813cba0-343c-4bf1-9b15-6ed92930f288.svg?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTEiLCJleHAiOjE3MDI0MjU1MzIsIm5iZiI6MTcwMjQyNTIzMiwicGF0aCI6Ii8xMjQ2NTI0OC8yNTk3ODM3NDMtNzgxM2NiYTAtMzQzYy00YmYxLTliMTUtNmVkOTI5MzBmMjg4LnN2Zz9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFJV05KWUFYNENTVkVINTNBJTJGMjAyMzEyMTIlMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjMxMjEyVDIzNTM1MlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTA1NmJjMTVkZDRlODk2ZDY4MGNlMWQyOGUzYjMzOTY0ODM5YTgwYTE0MzNhYjBmNzRjNDc5MWU0Yjc0YjM5MzQmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0JmFjdG9yX2lkPTAma2V5X2lkPTAmcmVwb19pZD0wIn0.QY2w3UvoE8hFjog03cztTggjZNmUhyYmoHj2ZuGTaGw\"\n", + " )" ] }, { @@ -437,7 +404,7 @@ }, "outputs": [], "source": [ - "control = pws.Control.load_prms(\n", + "control_sub = pws.Control.load_prms(\n", " domain_dir / \"control.test\", warn_unused_options=False\n", ")\n", "\n", @@ -445,16 +412,17 @@ "if run_dir_submodel.exists():\n", " rmtree(run_dir_submodel)\n", "\n", - "control.edit_end_time(np.datetime64(\"1979-07-01T00:00:00\"))\n", - "control.options = control.options | {\n", + "control_sub.edit_end_time(np.datetime64(\"1979-07-01T00:00:00\"))\n", + "control_sub.options = control_sub.options | {\n", " \"input_dir\": run_dir,\n", " \"budget_type\": \"warn\",\n", " \"calc_method\": \"numba\",\n", " \"netcdf_output_dir\": run_dir_submodel,\n", "}\n", "\n", - "\n", - "control.options[\"netcdf_output_var_names\"] = pws.PRMSChannel.get_variables()" + "control_sub.options[\n", + " \"netcdf_output_var_names\"\n", + "] = pws.PRMSChannel.get_variables()" ] }, { @@ -479,7 +447,7 @@ "source": [ "submodel = pws.Model(\n", " submodel_processes,\n", - " control=control,\n", + " control=control_sub,\n", " parameters=params,\n", ")" ] @@ -503,13 +471,60 @@ "submodel.run(finalize=True)" ] }, + { + "cell_type": "markdown", + "id": "2fca6466-cc1d-46ac-9b49-deb7f66b9414", + "metadata": {}, + "source": [ + "We can visualize the sub-model with its `ModelGraph`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14a0e229-8297-42af-8b07-aa416073d400", + "metadata": {}, + "outputs": [], + "source": [ + "show_params = not (platform == \"darwin\" and processor() == \"arm\")\n", + "try:\n", + " pws.analysis.ModelGraph(\n", + " submodel,\n", + " hide_variables=False,\n", + " process_colors=palette,\n", + " show_params=show_params,\n", + " ).SVG(verbose=True, dpi=48)\n", + "except:\n", + " print(\n", + " \"Dot fails on some machines. You can see the graph at this url:\"\n", + " \"https://private-user-images.githubusercontent.com/12465248/259783766-e0902e7f-ac69-488c-8719-858315d45e7c.svg?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTEiLCJleHAiOjE3MDI0MjU1MzIsIm5iZiI6MTcwMjQyNTIzMiwicGF0aCI6Ii8xMjQ2NTI0OC8yNTk3ODM3NjYtZTA5MDJlN2YtYWM2OS00ODhjLTg3MTktODU4MzE1ZDQ1ZTdjLnN2Zz9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFJV05KWUFYNENTVkVINTNBJTJGMjAyMzEyMTIlMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjMxMjEyVDIzNTM1MlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPWJlMzE0Zjc0YmNlZDlkY2YwZWE0ZDhkMTBiYTgzZTFhNmNlODFkM2I2NzA0MjViNDdkMDQ1ZjZmZWNhZTk0ZTkmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0JmFjdG9yX2lkPTAma2V5X2lkPTAmcmVwb19pZD0wIn0.HCW8QQr1tobxM0Ln0U0Sw_xNbrOscNnDe_tS0e5uMvk\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "544c6f78-ade4-42d3-84fb-ceb71a462856", + "metadata": {}, + "source": [ + "Finally we can check that the output of the model and the submodel are identical. " + ] + }, { "cell_type": "code", "execution_count": null, - "id": "9e6b4c56-bd36-4d5e-9a6c-4537e036c2bd", + "id": "63bc7082-d4fe-4237-8bd8-613dc692beeb", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "vars_output_both = set(control.options[\"netcdf_output_var_names\"]) & set(\n", + " control_sub.options[\"netcdf_output_var_names\"]\n", + ")\n", + "for var in vars_output_both:\n", + " print(var)\n", + " nhm_da = xr.open_dataarray(run_dir / f\"{var}.nc\")\n", + " sub_da = xr.open_dataarray(run_dir_submodel / f\"{var}.nc\")\n", + " xr.testing.assert_equal(nhm_da, sub_da)" + ] } ], "metadata": { diff --git a/examples/03_compare_pws_prms.ipynb b/examples/03_compare_pws_prms.ipynb index d3378332..ee89cbaf 100644 --- a/examples/03_compare_pws_prms.ipynb +++ b/examples/03_compare_pws_prms.ipynb @@ -7,12 +7,16 @@ "source": [ "# Compare pywatershed and PRMS\n", "\n", - "This notebook compares pywatershed (PWS) and PRMS outputs. As part of release of v1.0 this notebook is meant to give users insight on how wimilar results from pywatershed and PRMS are. They are identical or very close in the vast majority of cases. This notebook makes it easy to find when they are not by providing statistics at individual HRUs and timeseries for all HRUs.\n", + "This notebook compares pywatershed (PWS) and PRMS outputs. It is intended to give users insight on and tools to examine the similarity of pywatershed and PRMS simulations. While these are very close in most cases, this notebook makes it easy to find when they are not close by providing statistics at individual HRUs and timeseries plots for variables on all HRUs or stream segments.\n", "\n", - "Note that this notebook requires an editable install of pywatershed (`pip install -e` in the pywatershed repository root) for the requisite data. PRMS/NHM domains which may be used are in the `test_data/` directory of pywatershed (`hru_1`, `drb_2yr`, and `ucb_2yr`) but any other domain may be used. \n", + "This notebook requires an editable install of pywatershed (`pip install -e` in the pywatershed repository root) for the requisite data. PRMS/NHM domains which may be used are in the `test_data/` directory of pywatershed (`hru_1`, `drb_2yr`, and `ucb_2yr`) but any other domain may be used. See below for notes on setting up your own domains to work with this notebook.\n", + "\n", + "## Precision of PRMS\n", + "The runs of PRMS in this notebook use PRMS binaries where all floating point variables are double precision (64 bit). If you are running PRMS yourself, you are likely using a version\n", + "with a mix of single (32 bit) and double precision floating point variables. There are some differences between PRMS built in these two ways. If you want to adapt this notebook to compare against mixed precision PRMS, please contact the pywatershed developers. Because we do not want to implement mixed precision operations in pywatershed and it became necessary to compare its output to PRMS built with only double precision floating point variables. The PRMS binaries used in this notebook (platform dependent) are produced by the [`prms_src/prms5.2.1`](https://github.com/EC-USGS/pywatershed/tree/develop/prms_src/prms5.2.1) source code in the pywatershed repository compiled with the `DBL_PREC=true` flag. It is possible that this notebook will fail if the included binaries do not work on your system. You may need to compile the PRMS 5.2.1 binary in the repository on your system and move the executable in to the `bin/` in the root of the repository. See [`DEVELOPER.md`](https://github.com/EC-USGS/pywatershed/blob/develop/DEVELOPER.md) for more details in this case.\n", "\n", "## Notes on setting up other domains\n", - "You may want to supply your own domain and see how pywatershed works on it. Here are notes on doing so. Domains must supply the correct, required files in `test_data/your_domain` which are given in this listing:\n", + "If want to supply your own domain and see how pywatershed works on it, here are notes on doing so. Domains must supply the correct, required files in `test_data/your_domain` which are given in this listing:\n", "\n", "```\n", "control.test prcp.cbh sf_data tmax.nc tmin.nc\n", @@ -21,10 +25,10 @@ "\n", "The `*.cbh` files must be pre-converted to netcdf for `prcp`, `tmin`, and `tmax` and how to do this can be found near the top of notebook 02. The `control.test` and `myparam.param` files are used by both PRMS and PWS. The `control.test` files in the repo are specific for being able to run sub-models and include a nearly maximal amount of model output (time-inefficient for both PRMS and PWS). The stock control files can be found in `test_data/common` there is a file for single-hru domains and multi-hru domains and these are identical (as appropriate) for the domains included in the repository. For running a large domain, for example, it is desirable to reduce the total amount of output (but this may not allow for PWS sub-models to be run as PRMS dosent necessarily supply all the required fields). So you may modify the `control.test` file but take careful note of what options are available in pywatershed as currently only NHM configuration is available.\n", "\n", - "The runs of PRMS use double precision binaries produced by the `prms_src/prms5.2.1` source code in the pywatershed repository. The procedure used below is exactly as done in CI for running regression tests against PRMS.\n", - "\n", + "## Plots\n", "All of the code required for plotting below is included so that it can be further tailored to your tastes.\n", "\n", + "\n", "## Imports, etc" ] }, @@ -337,7 +341,8 @@ " if stat_name.lower() == \"rmse\":\n", " stat = np.sqrt((nhm_after - prms).mean(dim=time_dim) ** 2)\n", " elif stat_name.lower() == \"rrmse\":\n", - " stat = np.sqrt(((nhm_after - prms) / prms).mean(dim=time_dim) ** 2)\n", + " stat = np.sqrt(((nhm_after - prms)).mean(dim=time_dim) ** 2)\n", + " stat = stat / prms.mean(dim=time_dim) # rrmse\n", " return stat.to_dataframe().melt(ignore_index=False)\n", "\n", "\n", @@ -588,22 +593,6 @@ "if pws.PRMSChannel in nhm_processes:\n", " compare_var_timeseries(\"seg_outflow\") # , rmse_min=0.01)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c9b6a847-e7e8-412c-adea-fd63a516279b", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1d152230-b6cc-434a-9331-90799083dab7", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/examples/04_preprocess_atm.ipynb b/examples/04_preprocess_atm.ipynb new file mode 100644 index 00000000..f216f1d7 --- /dev/null +++ b/examples/04_preprocess_atm.ipynb @@ -0,0 +1,341 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2aa72049-ae70-41ca-8720-5c7091b4dcf0", + "metadata": {}, + "source": [ + "# Preprocess PRMSAtmosphere.\n", + "\n", + "This notebook demonstrates how to pre-process the atmospheric forcings used by the hydrology. If you are not varying parameters in PRMSAtmosphere over your model runs, this preprocessing can save considerable time. Certain workflows, like calibration, vary parameters in PRMSAtmosphere and therefore can not preprocess its output. \n", + "\n", + "Preprocessing the PRMSAtmoshpere is just one example of how you can preprocess all the inputs up to your process of interest, provided they do not vary in any for your problem of interest. For example, the sub-models demonstrated in notebooks `01_multi-process_models.ipynb` and `02_prms_legacy_models.ipynb` have all thier inputs pre-processed. But because `PRMSSolarGeom` and `PRMSAtmosphere` behave silghtly different, this example of how to pre-process their outputs can be helpful. \n", + "\n", + "This example further illustrates the flexible nature of how input data are handled by pywatershed. Below we'll run the atmopshere using an active `PRMSSolarGeom` instance and also use the static output of `PRMSSolarGeom` to drive `PRMSAtmosphere`. We'll start by preprocessing CBH files from PRMS-native format to NetCDF (as was previously demonstrated in `02_prms_legacy_models.ipynb`.\n", + "\n", + "This notebook assumes you have an editable install of pywatershed (`pip install -e .` from the root of the cloned repository), to get necessary domain information. See [this section of the DEVELOPER.md](https://github.com/EC-USGS/pywatershed/blob/develop/DEVELOPER.md#installing-pywatershed-in-development-mode) for additional details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7275fe95-8e0b-45f9-837c-ad6f7d38ced7", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, + "outputs": [], + "source": [ + "# auto-format the code in this notebook\n", + "%load_ext jupyter_black" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4d1ec1c-2526-4ccd-a8e5-3169c898cf68", + "metadata": {}, + "outputs": [], + "source": [ + "from copy import deepcopy\n", + "import pathlib as pl\n", + "from pprint import pprint\n", + "import shutil\n", + "\n", + "import numpy as np\n", + "import pywatershed as pws\n", + "import xarray as xr\n", + "\n", + "pws_repo_root = pws.constants.__pywatershed_root__.parent" + ] + }, + { + "cell_type": "markdown", + "id": "ce5f687e-90af-45e8-90c0-ecfc04f37c5a", + "metadata": {}, + "source": [ + "This is where we'll place all the output from this notebook. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19fda9b4-d194-4b47-a4c5-03f732a8d909", + "metadata": {}, + "outputs": [], + "source": [ + "nb_output_dir = pl.Path(\"./04_preprocess_atm\")\n", + "if nb_output_dir.exists():\n", + " shutil.rmtree(nb_output_dir)\n", + "nb_output_dir.mkdir()" + ] + }, + { + "cell_type": "markdown", + "id": "a340d6a2-a6de-4079-b9d6-dada3bdbe4b0", + "metadata": {}, + "source": [ + "This works with domains in the pywatershed repository, you can configure for your domains." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "462020a2-5519-4b0f-9144-dfda1039963d", + "metadata": {}, + "outputs": [], + "source": [ + "dom_name = \"drb_2yr\"\n", + "dom_dir = pws_repo_root / f\"test_data/{dom_name}\"\n", + "param_file = dom_dir / \"myparam.param\"\n", + "control_file = dom_dir / \"control.test\"\n", + "dis_file = dom_dir / \"parameters_dis_hru.nc\"" + ] + }, + { + "cell_type": "markdown", + "id": "e368cfc8-3a1d-4810-9a29-44dc3740c5ef", + "metadata": {}, + "source": [ + "## Convert CBH files to netcdf\n", + "For completeness sakes, we'll start with PRMS-native inputs and process those to the NetCDF files that pywatershed will use." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6333def3-918c-412a-9a0d-a3732fdbcc05", + "metadata": {}, + "outputs": [], + "source": [ + "params = pws.parameters.PrmsParameters.load(param_file)\n", + "\n", + "cbh_files = {\n", + " \"prcp\": dom_dir / \"prcp.cbh\",\n", + " \"tmax\": dom_dir / \"tmax.cbh\",\n", + " \"tmin\": dom_dir / \"tmin.cbh\",\n", + "}\n", + "\n", + "cbh_dir = nb_output_dir / f\"cbh\"\n", + "cbh_dir.mkdir(exist_ok=True)\n", + "\n", + "for kk, vv in cbh_files.items():\n", + " out_file = cbh_dir / f\"{kk}.nc\"\n", + " pws.utils.cbh_file_to_netcdf({kk: vv}, params, out_file)" + ] + }, + { + "cell_type": "markdown", + "id": "3733421a-652e-46b7-aa9e-c8967ef32f6b", + "metadata": {}, + "source": [ + "## Write solar geometry files\n", + "Below we'll demonstrated using an active instance of `PRMSSolarGeom` and also using its static output to drive `PRMSAtmosphere`. Here we create the static output that we need for `PRMSSolarGeom` in the second case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d25797be-2a7a-4a98-9fd2-c953f48d482c", + "metadata": {}, + "outputs": [], + "source": [ + "solar_geom_dir = nb_output_dir / \"solar_geom\"\n", + "solar_geom_dir.mkdir(exist_ok=True)\n", + "\n", + "solar_geom_output_vars = [\"soltab_horad_potsw\", \"soltab_potsw\"]\n", + "\n", + "control = pws.Control.load_prms(control_file, warn_unused_options=False)\n", + "control.options = control.options | {\n", + " \"netcdf_output_dir\": solar_geom_dir,\n", + " \"netcdf_output_var_names\": [\n", + " \"soltab_horad_potsw\",\n", + " \"soltab_potsw\",\n", + " ],\n", + "}\n", + "\n", + "solar_geom = pws.PRMSSolarGeometry(control, None, params)\n", + "solar_geom.initialize_netcdf()\n", + "control.advance()\n", + "solar_geom.advance()\n", + "solar_geom.output()\n", + "del solar_geom" + ] + }, + { + "cell_type": "markdown", + "id": "8dbb1ea0-fd86-4f8f-aefa-9905b592b002", + "metadata": {}, + "source": [ + "We'll take a look at some of the data, particularly looking at the last time available in the file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b290462-4bfb-4297-be8e-88b496442c01", + "metadata": {}, + "outputs": [], + "source": [ + "var = \"soltab_potsw\"\n", + "da = xr.open_dataarray(solar_geom_dir / f\"{var}.nc\", decode_timedelta=False)\n", + "display(da)\n", + "print(da[-1, 0:100].values)\n", + "da.close()" + ] + }, + { + "cell_type": "markdown", + "id": "1e5fa092-7623-4ef2-8907-d9f0597f8271", + "metadata": {}, + "source": [ + "## Preprocess atmospheric forcings without solar geometry files present\n", + "\n", + "When a `PRMSAtmosphere` object is initalized with a `netcdf_output_dir` argument, the adjusted forcings \n", + "are written to this location. Unless one requests specific variables only, all variables are written. \n", + "\n", + "Typically, the `soltab_potsw.nc` and `soltab_horad_potsw.nc` input files are not available as inputs. \n", + "(These are only output in a fixed width format by a version of PRMS5.2.1 in the pynhm repository\n", + "that is translated to netCDF when setting up test data). Here we show how to get the CBH adjustments\n", + "to output files using PRMSSolarGeometry instead of soltab files. The next section will show how to use available soltab files we created above." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b86c8a5c-1d61-4961-804f-009dad45c7c4", + "metadata": {}, + "outputs": [], + "source": [ + "cbh_files_dict = {ff.with_suffix(\"\").name: ff for ff in cbh_dir.glob(\"*.nc\")}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8a12348-378d-45e8-928b-bc938cf430c3", + "metadata": {}, + "outputs": [], + "source": [ + "atm_dir = nb_output_dir / \"atm_without_solar_files\"\n", + "atm_dir.mkdir(exist_ok=True)\n", + "\n", + "control = pws.Control.load_prms(control_file, warn_unused_options=False)\n", + "control.options[\"netcdf_output_dir\"] = atm_dir\n", + "\n", + "solar_geom = pws.PRMSSolarGeometry(control, None, params)\n", + "\n", + "atm = pws.PRMSAtmosphere(\n", + " control,\n", + " None,\n", + " params,\n", + " **cbh_files_dict,\n", + " soltab_horad_potsw=solar_geom.soltab_horad_potsw,\n", + " soltab_potsw=solar_geom.soltab_potsw,\n", + ")\n", + "atm.initialize_netcdf()\n", + "control.advance()\n", + "solar_geom.advance()\n", + "atm.advance()\n", + "atm.calculate(1)\n", + "atm.output()\n", + "del atm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a318e7da-f904-4312-9e7a-aff5b2991043", + "metadata": {}, + "outputs": [], + "source": [ + "var = \"potet\"\n", + "da = xr.open_dataarray(atm_dir / f\"{var}.nc\")\n", + "display(da)\n", + "print(da[-1, 0:100].values)\n", + "da.close()" + ] + }, + { + "cell_type": "markdown", + "id": "d72cfda1-9957-4283-a35d-f3f8f595a139", + "metadata": {}, + "source": [ + "## Preprocess atmospheric forcings with solar geometry files present\n", + "We repeat the above, dropping the `PRMSSolarGeometry` object as its information is now coming from the soltab files. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1696ae3-28e4-4a20-a811-68b58158f09c", + "metadata": {}, + "outputs": [], + "source": [ + "cbh_files_dict = {ff.with_suffix(\"\").name: ff for ff in cbh_dir.glob(\"*.nc\")}\n", + "solar_files_dict = {\n", + " ff.with_suffix(\"\").name: ff for ff in solar_geom_dir.glob(\"*.nc\")\n", + "}\n", + "atm_input_files_dict = cbh_files_dict | solar_files_dict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9836e071-2754-4230-b880-f2ea8dca3eb0", + "metadata": {}, + "outputs": [], + "source": [ + "atm_solar_files_dir = nb_output_dir / \"atm_without_solar_files\"\n", + "atm_solar_files_dir.mkdir(exist_ok=True)\n", + "\n", + "control = pws.Control.load_prms(control_file, warn_unused_options=False)\n", + "control.options[\"netcdf_output_dir\"] = atm_solar_files_dir\n", + "\n", + "solar_geom = pws.PRMSSolarGeometry(control, None, params)\n", + "\n", + "atm = pws.PRMSAtmosphere(\n", + " control,\n", + " None,\n", + " params,\n", + " **atm_input_files_dict,\n", + ")\n", + "atm.initialize_netcdf()\n", + "control.advance()\n", + "atm.advance()\n", + "atm.calculate(1)\n", + "atm.output()\n", + "del atm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "386822e6-c13d-4203-afb1-baa6cd2501ab", + "metadata": {}, + "outputs": [], + "source": [ + "var = \"potet\"\n", + "da = xr.open_dataarray(atm_dir / f\"{var}.nc\")\n", + "display(da)\n", + "print(da[-1, 0:100].values)\n", + "da.close()" + ] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython" + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/05_parameter_ensemble.ipynb b/examples/05_parameter_ensemble.ipynb new file mode 100644 index 00000000..f2a713df --- /dev/null +++ b/examples/05_parameter_ensemble.ipynb @@ -0,0 +1,481 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f932e1c0-24ac-4be8-a886-8554cd1e4412", + "metadata": {}, + "source": [ + "# Parameter Ensemble\n", + "This notebook shows how to edit and work with parameters in pywatershed. First we look at the data model used by the `Parameter` class to build a small ensemble of parameters for the `PRMSChannel` hydrologic process. Then we do a little bit of (embarassingly) parallel programming using Python's [concurrent.futures](https://docs.python.org/3/library/concurrent.futures.html) to run this ensemble in parallel (in addition to serial). This provides a skeleton recipe for how to do calibration or sensitivity analysis with pywatershed. \n", + "\n", + "It is a design feature that the `Parameter` class is read-only. This is because we dont want the code and developers modifying parameters opaquely under the hood. While this practice is commonplace, it undermines the idea of reproducible research and causes more headaches than it sovles. So we guard against this with software design. The trick is that we need to make the `Parameter` object editable, but that means we have to change it to another class first. \n", + "\n", + "Let's get started. \n", + "\n", + "Note this notebook needs notebooks 0-1 to have been run in advance." + ] + }, + { + "cell_type": "markdown", + "id": "d2458336-67f7-4a2d-b486-8dbb99389854", + "metadata": {}, + "source": [ + "## Preliminaries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73380b42-3336-4c6c-99cb-f797841a132a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, + "outputs": [], + "source": [ + "# auto-format the code in this notebook\n", + "%load_ext jupyter_black" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "769aa543-dfa5-43e9-84f6-0830ad0cb1d0", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + } + }, + "outputs": [], + "source": [ + "import os\n", + "import pathlib as pl\n", + "from pprint import pprint\n", + "import shutil\n", + "\n", + "import numpy as np\n", + "import pywatershed as pws\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "id": "c73e4353-26c7-40c1-a952-9739f52c8198", + "metadata": {}, + "source": [ + "We'll use a PRMS-native parameter file from one of the domains supplied with pywatershed on install." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01da8dca-622f-44c2-9041-8a43826d5d6f", + "metadata": {}, + "outputs": [], + "source": [ + "domain_dir = pws.constants.__pywatershed_root__ / \"data/drb_2yr\"\n", + "nb_output_dir = pl.Path(\"./05_parameter_ensemble\")\n", + "nb_output_dir.mkdir(exist_ok=True)\n", + "(nb_output_dir / \"params\").mkdir(exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b08b96b-9bcc-4b1b-a2e9-231949b2dd0c", + "metadata": {}, + "outputs": [], + "source": [ + "params = pws.parameters.PrmsParameters.load(domain_dir / \"myparam.param\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7dd6bc3-18aa-4358-9426-1515c1d0f884", + "metadata": {}, + "outputs": [], + "source": [ + "print(params)\n", + "isinstance(params, pws.Parameters)" + ] + }, + { + "cell_type": "markdown", + "id": "21bae789-3c33-4ff7-a0c2-e60afccaefbb", + "metadata": {}, + "source": [ + "## Create an ensemble or parameters\n", + "Now that we have the PRMS parameters as a `pws.Parameters` object, actually as its subclass `PrmsParameters`, we'll conduct a simple demonstration of how to generate an ensemble of values for `K_coef`, the Muskingum storage coefficient which affects the travel time of waves in the `PRMSChannel` representation.\n", + "\n", + "As mentioned above, we have to get the parameter data in to a different class to be able to edit. Here we have two options: 1) an `xarray.Dataset` 2) a `pywatershed.DatasetDict`. These two options have invertible mappings provided by `pywatershed.DatasetDict`. \n", + "\n", + "First we'll deomonstrate the approach with `xarray.Dataset`. We'll create an ensemble with 11 members and we'll write the new parameter datasets, including all the variables, out to disk as separate NetCDF files. Note we could do this in memory and not write to disk, but generally it is favorable to have a record of inputs. This also demonstrates how to how one can quite easily convert a native PRMS parameter file to a NetCDF file. \n", + "\n", + "We'll just multiply the `K_coef` the coefficient by the 11 numbers in 0.75, 0.8, ... , 1.2, 1.25 to get our 11 realizations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7abef9f9-7675-4c7d-98ef-5dd0bc60ce3e", + "metadata": {}, + "outputs": [], + "source": [ + "param_files = [] # get a list of written NetCDF files back at the end\n", + "n_members = 11\n", + "for ii in range(n_members):\n", + " param_ds = params.to_xr_ds() # copies by default\n", + " multiplier = ii * 0.05 + 0.75\n", + " print(\"multiplier = \", multiplier)\n", + " param_ds[\"K_coef\"] *= multiplier\n", + " param_file_name = (\n", + " nb_output_dir / f\"params/perturbed_params_xr_{str(ii).zfill(3)}.nc\"\n", + " )\n", + " param_files += [param_file_name]\n", + " param_ds.to_netcdf(param_file_name)" + ] + }, + { + "cell_type": "markdown", + "id": "d326af89-56b8-4ec6-8c53-71de58e630fb", + "metadata": {}, + "source": [ + "For the final `param_ds` still in memory, we can look at it... it has 144 variables, so you'll need to click the triangle to see the list. The little papert with bent corner icon provides metadata and the stacked disks give a python `repr`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7cc16ad5-1858-4f42-b902-69138d4458a2", + "metadata": {}, + "outputs": [], + "source": [ + "param_ds" + ] + }, + { + "cell_type": "markdown", + "id": "1089e6fb-d39c-4d86-8044-b10c213af5da", + "metadata": {}, + "source": [ + "Do a check that the values in the file divided by the original values reproduce the factors in order." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "146516e8-bd2e-4587-ac8b-167d34e84a96", + "metadata": {}, + "outputs": [], + "source": [ + "for ff in param_files:\n", + " new_params = xr.open_dataset(\n", + " ff, decode_times=False, decode_timedelta=False\n", + " )\n", + " k_coef = new_params[\"K_coef\"]\n", + " # new_params = pws.parameters.PrmsParameters.from_netcdf(ff)\n", + " # k_coef = new_params.data_vars[\"K_coef\"]\n", + " multipliers = k_coef / params.data_vars[\"K_coef\"]\n", + " assert (multipliers - multipliers[0] < 1e-15).all()\n", + " print(multipliers[0].values)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf869257-d4e5-4b36-93e8-deada428a81e", + "metadata": {}, + "outputs": [], + "source": [ + "del param_files" + ] + }, + { + "cell_type": "markdown", + "id": "c075c737-9157-449c-886b-7d07ebe59486", + "metadata": {}, + "source": [ + "Now to demonstrate the use of a `pywatershed.DatasetDict` which you can read about in the [documentation](https://pywatershed.readthedocs.io/en/main/api/generated/pywatershed.base.DatasetDict.html#pywatershed.base.DatasetDict). Note that the edited `DatasetDict` can be made a `Parameters` object again by `Parameters(**param_dict.data)`, but we'll just write directly to file and then load as a `Parameters` object. These are slightly different choices from above, show additional flexibility. We still choose to write the parameter ensemble to disk, however.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd179039-a91d-4057-8e11-4ba49cb387bd", + "metadata": {}, + "outputs": [], + "source": [ + "param_files = []\n", + "for ii in range(11):\n", + " param_dict = params.to_dd() # copies by default\n", + " multiplier = ii * 0.05 + 0.75\n", + " print(\"multiplier = \", multiplier)\n", + " param_dict.data_vars[\"K_coef\"] *= multiplier\n", + " param_file_name = (\n", + " nb_output_dir / f\"params/perturbed_params_{str(ii).zfill(3)}.nc\"\n", + " )\n", + " param_files += [param_file_name]\n", + " param_dict.to_netcdf(\n", + " param_file_name, use_xr=True\n", + " ) # using xarray, more work necessary for nc4 export" + ] + }, + { + "cell_type": "markdown", + "id": "ef9a8074-d0ef-4afb-8f15-1b3ec726fbe5", + "metadata": {}, + "source": [ + "Same check as above, but this time we read the NetCDF file into a `PrmsParameters` object rather than an `xarray.Dataset`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04cb7d5a-4979-4859-872c-6ccd6f6f1fc3", + "metadata": {}, + "outputs": [], + "source": [ + "for ff in param_files:\n", + " # the problem arises on the read with xarray default decoding\n", + " # but we can just open the netcdf file as Parameters\n", + " # ds = xr.open_dataset(ff, decode_times=False, decode_timedelta=False)\n", + " # k_coef = ds[\"K_coef\"]\n", + " new_params = pws.parameters.PrmsParameters.from_netcdf(ff)\n", + " k_coef = new_params.data_vars[\"K_coef\"]\n", + " multipliers = k_coef / params.data_vars[\"K_coef\"]\n", + " assert (multipliers - multipliers[0] < 1e-15).all()\n", + " print(multipliers[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ae2d078-4d36-4db0-b824-aa09da6b9585", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "cad8c37c-ddbc-4c0f-868e-c0f9fdce6e78", + "metadata": {}, + "source": [ + "## Run the parameter ensemble\n", + "We'll write a helper function for running the parameters through the model. Note comments on details around concurrent.futures." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97cc4ba6-9f81-4c6a-8a1f-d2e2a169c986", + "metadata": {}, + "outputs": [], + "source": [ + "def run_channel_model(output_dir_parent, param_file):\n", + " # for concurrent.futures we have to write this function to file/module\n", + " # so we have to import things that wont be in scope in that case.\n", + " import numpy as np\n", + " import pywatershed as pws\n", + "\n", + " domain_dir = pws.constants.__pywatershed_root__ / \"data/drb_2yr\"\n", + "\n", + " params = pws.parameters.PrmsParameters.from_netcdf(param_file)\n", + "\n", + " param_id = param_file.with_suffix(\"\").name.split(\"_\")[-1]\n", + " nc_output_dir = output_dir_parent / f\"run_params_{param_id}\"\n", + " nc_output_dir.mkdir(parents=True, exist_ok=True)\n", + "\n", + " control = pws.Control.load_prms(\n", + " domain_dir / \"control.test\", warn_unused_options=False\n", + " )\n", + " control.edit_end_time(np.datetime64(\"1979-07-01T00:00:00\"))\n", + " control.options = control.options | {\n", + " \"input_dir\": \"01_multi-process_models/nhm_memory\",\n", + " \"budget_type\": \"warn\",\n", + " \"calc_method\": \"numba\",\n", + " \"netcdf_output_dir\": nc_output_dir,\n", + " }\n", + "\n", + " model = pws.Model(\n", + " [pws.PRMSChannel],\n", + " control=control,\n", + " parameters=params,\n", + " )\n", + "\n", + " model.run(finalize=True)\n", + " return nc_output_dir" + ] + }, + { + "cell_type": "markdown", + "id": "bf2cb631-91da-4488-962c-ed5c80378049", + "metadata": {}, + "source": [ + "### Serial run\n", + "We'll perform serial execution of the model over the parameter files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8dc58917-2b29-474a-bb56-1d401c68b4ef", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "serial_output_dirs = []\n", + "serial_output_parent = nb_output_dir / \"serial\"\n", + "if serial_output_parent.exists():\n", + " shutil.rmtree(serial_output_parent)\n", + "serial_output_parent.mkdir()\n", + "for ff in param_files:\n", + " serial_output_dirs += [run_channel_model(serial_output_parent, ff)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81841620-fbc1-49cf-a699-ccad3b2db051", + "metadata": {}, + "outputs": [], + "source": [ + "serial_output_dirs" + ] + }, + { + "cell_type": "markdown", + "id": "e3ccefe5-2b6e-40e7-afee-fd6718a3af38", + "metadata": {}, + "source": [ + "### concurrent.futures run\n", + "For [concurrent futures](https://docs.python.org/3/library/concurrent.futures.html) to work in an interactive setting, we have to import the iterated/mapped function from a module, the function can not be defined in the notebook/interactive setting. We can easily just write the function out to file (ensure above that everything is in scope when imported, as noted in the function)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2188410-5d3e-40b0-a539-b26292ed0e17", + "metadata": {}, + "outputs": [], + "source": [ + "# the name of the nb_output_dir can not be imported from so\n", + "# we'll create another directory to import from and delete it later\n", + "import inspect\n", + "\n", + "import_dir = pl.Path(\"param_ensemble_tmp\")\n", + "import_dir.mkdir(exist_ok=True)\n", + "with open(import_dir / \"run_channel_model_mod.py\", \"w\") as the_file:\n", + " the_file.write(inspect.getsource(run_channel_model))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7913ba4b-3a46-432d-abb5-a86a11f6e02c", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "import time\n", + "from concurrent.futures import ProcessPoolExecutor as PoolExecutor\n", + "from concurrent.futures import as_completed\n", + "from functools import partial\n", + "from param_ensemble_tmp.run_channel_model_mod import run_channel_model\n", + "\n", + "parallel_output_parent = pl.Path(\"parallel\")\n", + "if parallel_output_parent.exists():\n", + " shutil.rmtree(parallel_output_parent)\n", + "parallel_output_parent.mkdir()\n", + "\n", + "# you can set your choice of max_workers\n", + "with PoolExecutor(max_workers=4) as executor:\n", + " parallel_output_dirs = executor.map(\n", + " partial(run_channel_model, parallel_output_parent), param_files\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7cea730c-4f0a-4720-8dee-90c463a7a702", + "metadata": {}, + "outputs": [], + "source": [ + "shutil.rmtree(import_dir)" + ] + }, + { + "cell_type": "markdown", + "id": "34d68668-dac4-4f77-9917-ea72074f3dad", + "metadata": {}, + "source": [ + "### Check serial and parallel \n", + "See that these runs gave the same results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc4d7600-8488-44f4-81fb-893cb0876c29", + "metadata": {}, + "outputs": [], + "source": [ + "# check serial == parallel\n", + "serial_runs = sorted(serial_output_parent.glob(\"*\"))\n", + "parallel_runs = sorted(parallel_output_parent.glob(\"*\"))\n", + "\n", + "for ss, pp in zip(serial_runs, parallel_runs):\n", + " serial_files = sorted(ss.glob(\"*.nc\"))\n", + " parallel_files = sorted(pp.glob(\"*.nc\"))\n", + " for sf, pf in zip(serial_files, parallel_files):\n", + " s_ds = xr.open_dataset(sf)\n", + " p_ds = xr.open_dataset(pf)\n", + " xr.testing.assert_allclose(s_ds, p_ds)" + ] + }, + { + "cell_type": "markdown", + "id": "c30e9294-0682-46f7-bc63-5bb7c407dce8", + "metadata": {}, + "source": [ + "Can also check that the original parameters give the same results as in notebook `01_multi-process_models.ipynb`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cca4dc2f-c91b-475d-9021-c52261c0c31e", + "metadata": {}, + "outputs": [], + "source": [ + "run_005 = serial_output_parent / \"run_params_005\"\n", + "files_005 = sorted(run_005.glob(\"*.nc\"))\n", + "for ff in files_005:\n", + " if ff.name == \"PRMSChannel_budget.nc\":\n", + " continue\n", + " ds_005 = xr.open_dataset(ff)\n", + " ds_01 = xr.open_dataset(\n", + " pl.Path(\"01_multi-process_models/nhm_memory\") / ff.name\n", + " )\n", + " xr.testing.assert_allclose(ds_005, ds_01)" + ] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython" + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/param_edits.ipynb b/examples/param_edits.ipynb deleted file mode 100644 index 0c5bcc7e..00000000 --- a/examples/param_edits.ipynb +++ /dev/null @@ -1,320 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "f932e1c0-24ac-4be8-a886-8554cd1e4412", - "metadata": {}, - "source": [ - "# Parameter Editing\n", - "This notebook gives a quick recipe for how to do calibration or sensitivity analysis with pywatershed. \n", - "It is a design feature that parameters, more specifically the Parameter class, are read-only because \n", - "it should be the case that parameters supplied are used and the code is not opaquely modifying these.\n", - "\n", - "As a consequence, one has to make the Parameter class editable. Below this is accomplished by doing\n", - "`the_parameters.to_dd()` which returns a DatasetDict which is editable. One has to know something about\n", - "how DatasetDicts are constructed to edit effectively, information can be found in the [documentation](https://pywatershed.readthedocs.io/en/main/api/generated/pywatershed.base.DatasetDict.html#pywatershed.base.DatasetDict). \n", - "The edited DatasetDict can be made a Parameters object again by `Parameters(**param_dict.data)`, as shown below. \n", - "\n", - "Note this notebook needs notebooks 0-1 to have been run in advance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "73380b42-3336-4c6c-99cb-f797841a132a", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - } - }, - "outputs": [], - "source": [ - "# auto-format the code in this notebook\n", - "%load_ext jupyter_black" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "769aa543-dfa5-43e9-84f6-0830ad0cb1d0", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - } - }, - "outputs": [], - "source": [ - "import pathlib as pl\n", - "from pprint import pprint\n", - "import shutil\n", - "\n", - "import numpy as np\n", - "import pywatershed as pws\n", - "import xarray as xr" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01da8dca-622f-44c2-9041-8a43826d5d6f", - "metadata": {}, - "outputs": [], - "source": [ - "domain_dir = pws.constants.__pywatershed_root__ / \"data/drb_2yr\"\n", - "nb_output_dir = pl.Path(\"./param_edits\")\n", - "nb_output_dir.mkdir(exist_ok=True)\n", - "(nb_output_dir / \"params\").mkdir(exist_ok=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8b08b96b-9bcc-4b1b-a2e9-231949b2dd0c", - "metadata": {}, - "outputs": [], - "source": [ - "# A legacy PRMS parameter file\n", - "params = pws.parameters.PrmsParameters.load(domain_dir / \"myparam.param\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cd179039-a91d-4057-8e11-4ba49cb387bd", - "metadata": {}, - "outputs": [], - "source": [ - "param_list = []\n", - "param_files = []\n", - "for ii in range(11):\n", - " param_dict = params.to_dd() # copies by default\n", - " multiplier = ii * 0.05 + 0.75\n", - " print(\"multiplier = \", multiplier)\n", - " param_dict.data_vars[\"K_coef\"] *= multiplier\n", - " param_file_name = (\n", - " nb_output_dir / f\"params/perturbed_params_{str(ii).zfill(3)}.nc\"\n", - " )\n", - " param_files += [param_file_name]\n", - " # These could avoid export to netcdf4 if just using in memory\n", - " # could store in a list like: param_list.append(pws.Parameters(**param_dict.data))\n", - " pws.Parameters(**param_dict.data).to_netcdf(\n", - " param_file_name, use_xr=True\n", - " ) # using xarray, more work necessary for nc4 export" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "04cb7d5a-4979-4859-872c-6ccd6f6f1fc3", - "metadata": {}, - "outputs": [], - "source": [ - "# this provides a check that the values from file are what we expect\n", - "for ff in param_files:\n", - " # the problem arises on the read with xarray default decoding\n", - " # but we can just open the netcdf file as Parameters\n", - " # ds = xr.open_dataset(ff, decode_times=False, decode_timedelta=False)\n", - " # k_coef = ds[\"K_coef\"]\n", - " new_params = pws.parameters.PrmsParameters.from_netcdf(ff)\n", - " k_coef = new_params.data_vars[\"K_coef\"]\n", - " multipliers = k_coef / params.data_vars[\"K_coef\"]\n", - " assert (multipliers - multipliers[0] < 1e-15).all()\n", - " print(multipliers[0])" - ] - }, - { - "cell_type": "markdown", - "id": "cad8c37c-ddbc-4c0f-868e-c0f9fdce6e78", - "metadata": {}, - "source": [ - "## A helper function for running the parameters through the model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "97cc4ba6-9f81-4c6a-8a1f-d2e2a169c986", - "metadata": {}, - "outputs": [], - "source": [ - "def run_channel_model(output_dir_parent, param_file):\n", - " # for concurrent.futures we have to write this function to file/module\n", - " # so we have to import things that wont be in scope in that case.\n", - " import numpy as np\n", - " import pywatershed as pws\n", - "\n", - " domain_dir = pws.constants.__pywatershed_root__ / \"data/drb_2yr\"\n", - "\n", - " params = pws.parameters.PrmsParameters.from_netcdf(param_file)\n", - "\n", - " param_id = param_file.with_suffix(\"\").name.split(\"_\")[-1]\n", - " nc_output_dir = output_dir_parent / f\"run_params_{param_id}\"\n", - " nc_output_dir.mkdir(parents=True, exist_ok=True)\n", - "\n", - " control = pws.Control.load(domain_dir / \"control.test\")\n", - " control.edit_end_time(np.datetime64(\"1979-07-01T00:00:00\"))\n", - " control.options = control.options | {\n", - " \"input_dir\": \"01_multi-process_models/nhm_memory\",\n", - " \"budget_type\": \"warn\",\n", - " \"calc_method\": \"numba\",\n", - " \"netcdf_output_dir\": nc_output_dir,\n", - " }\n", - "\n", - " model = pws.Model(\n", - " [pws.PRMSChannel],\n", - " control=control,\n", - " parameters=params,\n", - " )\n", - "\n", - " model.run(finalize=True)\n", - " return nc_output_dir" - ] - }, - { - "cell_type": "markdown", - "id": "bf2cb631-91da-4488-962c-ed5c80378049", - "metadata": {}, - "source": [ - "## Serial execution of the model over the parameter files" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8dc58917-2b29-474a-bb56-1d401c68b4ef", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "serial_output_dirs = []\n", - "serial_output_parent = nb_output_dir / \"serial\"\n", - "if serial_output_parent.exists():\n", - " shutil.rmtree(serial_output_parent)\n", - "serial_output_parent.mkdir()\n", - "for ff in param_files:\n", - " serial_output_dirs += [run_channel_model(serial_output_parent, ff)]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81841620-fbc1-49cf-a699-ccad3b2db051", - "metadata": {}, - "outputs": [], - "source": [ - "serial_output_dirs" - ] - }, - { - "cell_type": "markdown", - "id": "e3ccefe5-2b6e-40e7-afee-fd6718a3af38", - "metadata": {}, - "source": [ - "## concurrent.futures approach\n", - "For concurrent futures to work in an interactive setting, we have to import the iterated/mapped function from a module, the function can not be defined in the notebook/interactive setting. We can easily just write the function out to file (ensure above that everything is in scope when imported, as noted in the function)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f2188410-5d3e-40b0-a539-b26292ed0e17", - "metadata": {}, - "outputs": [], - "source": [ - "import inspect\n", - "\n", - "with open(\"param_edits/run_channel_model.py\", \"w\") as the_file:\n", - " the_file.write(inspect.getsource(run_channel_model))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7913ba4b-3a46-432d-abb5-a86a11f6e02c", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "import time\n", - "from concurrent.futures import ProcessPoolExecutor as PoolExecutor\n", - "from concurrent.futures import as_completed\n", - "from functools import partial\n", - "from param_edits.run_channel_model import run_channel_model\n", - "\n", - "parallel_output_parent = nb_output_dir / \"parallel\"\n", - "if parallel_output_parent.exists():\n", - " shutil.rmtree(parallel_output_parent)\n", - "parallel_output_parent.mkdir()\n", - "\n", - "# you can set your choice of max_workers\n", - "with PoolExecutor(max_workers=11) as executor:\n", - " parallel_output_dirs = executor.map(\n", - " partial(run_channel_model, parallel_output_parent), param_files\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "34d68668-dac4-4f77-9917-ea72074f3dad", - "metadata": {}, - "source": [ - "# Checks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dc4d7600-8488-44f4-81fb-893cb0876c29", - "metadata": {}, - "outputs": [], - "source": [ - "# check serial == parallel\n", - "serial_runs = sorted(serial_output_parent.glob(\"*\"))\n", - "parallel_runs = sorted(parallel_output_parent.glob(\"*\"))\n", - "\n", - "for ss, pp in zip(serial_runs, parallel_runs):\n", - " serial_files = sorted(ss.glob(\"*.nc\"))\n", - " parallel_files = sorted(pp.glob(\"*.nc\"))\n", - " for sf, pf in zip(serial_files, parallel_files):\n", - " s_ds = xr.open_dataset(sf)\n", - " p_ds = xr.open_dataset(pf)\n", - " xr.testing.assert_allclose(s_ds, p_ds)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cca4dc2f-c91b-475d-9021-c52261c0c31e", - "metadata": {}, - "outputs": [], - "source": [ - "# check serial 5 is the same as in notebook 02\n", - "run_005 = serial_output_parent / \"run_params_005\"\n", - "files_005 = sorted(run_005.glob(\"*.nc\"))\n", - "for ff in files_005:\n", - " if ff.name == \"PRMSChannel_budget.nc\":\n", - " continue\n", - " ds_005 = xr.open_dataset(ff)\n", - " ds_02 = xr.open_dataset(\n", - " pl.Path(\"01_multi-process_models/nhm_memory\") / ff.name\n", - " )\n", - " xr.testing.assert_allclose(ds_005, ds_02)" - ] - } - ], - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "ipython" - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/preprocess_cbh_adj.ipynb b/examples/preprocess_cbh_adj.ipynb deleted file mode 100644 index 3ddc5249..00000000 --- a/examples/preprocess_cbh_adj.ipynb +++ /dev/null @@ -1,355 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "2aa72049-ae70-41ca-8720-5c7091b4dcf0", - "metadata": {}, - "source": [ - "# Preprocess CBH forcing files.\n", - "\n", - "This notebook demonstrates 2 steps in preprocessing CBH file: 1) conversion to netCDF, 2) applying parameter adjustments (with and without soltab input files)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b4d1ec1c-2526-4ccd-a8e5-3169c898cf68", - "metadata": {}, - "outputs": [], - "source": [ - "import pathlib as pl\n", - "from pprint import pprint\n", - "import shutil\n", - "\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "id": "e368cfc8-3a1d-4810-9a29-44dc3740c5ef", - "metadata": {}, - "source": [ - "## 1. Convert CBH files to netcdf" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6333def3-918c-412a-9a0d-a3732fdbcc05", - "metadata": {}, - "outputs": [], - "source": [ - "from pywatershed.parameters import PrmsParameters\n", - "from pywatershed.utils.cbh_utils import cbh_files_to_netcdf\n", - "\n", - "dom_name = 'drb_2yr'\n", - "dom_dir = pl.Path(f'../test_data/{dom_name}')\n", - "cbh_nc_dir = pl.Path('.') / f\"{dom_name}_cbh_files\"\n", - "\n", - "param_file = dom_dir / 'myparam.param'\n", - "control_file = dom_dir / 'control.test'\n", - "\n", - "params = PrmsParameters.load(dom_dir / 'myparam.param')\n", - "\n", - "cbh_files = {\n", - " 'prcp': dom_dir / 'prcp.cbh',\n", - " 'tmax': dom_dir / 'tmax.cbh',\n", - " 'tmin': dom_dir / 'tmin.cbh',\n", - "}\n", - "\n", - "if cbh_nc_dir.exists(): \n", - " shutil.rmtree(cbh_nc_dir)\n", - "cbh_nc_dir.mkdir() # this should not exist, it should be deleted a the end\n", - "\n", - "for kk, vv in cbh_files.items():\n", - " out_file = (cbh_nc_dir / f\"{kk}.nc\")\n", - " cbh_files_to_netcdf({kk: vv}, params, out_file)\n", - " \n", - "print(sorted(cbh_nc_dir.glob('*.nc')))" - ] - }, - { - "cell_type": "markdown", - "id": "1e5fa092-7623-4ef2-8907-d9f0597f8271", - "metadata": {}, - "source": [ - "## 2. Apply PRMS parameters to input CBH data to get forcings used by the hydrologic components of the model.\n", - "\n", - "When a `PRMSAtmosphere` object is initalized with a `netcdf_output_dir` argument, the adjusted forcings \n", - "are written to this location. Unless one requests specific variables only, all variables are written. \n", - "\n", - "Typically, the `soltab_potsw.nc` and `soltab_horad_potsw.nc` input files are not available as inputs. \n", - "(These are only output in a fixed width format by a version of PRMS5.2.1 in the pynhm repository\n", - "that is translated to netCDF when setting up test data). First it is shown how to get the CBH adjustments\n", - "to output files using PRMSSolarGeometry instead of soltab files. Second is shown how to use available\n", - "soltab files." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c809c84-ec8c-4f7f-8a3b-1f167af384a6", - "metadata": {}, - "outputs": [], - "source": [ - "from pywatershed import Control, PRMSAtmosphere, PRMSSolarGeometry" - ] - }, - { - "cell_type": "markdown", - "id": "6193aa78-3cdc-46c6-afde-e681a395df30", - "metadata": {}, - "source": [ - "From `help(PRMSAtmosphere)`:\n", - "\n", - "```\n", - "Help on class PRMSAtmosphere in module pywatershed.atmosphere.PRMSAtmosphere:\n", - "\n", - "class PRMSAtmosphere(pywatershed.base.storageUnit.StorageUnit)\n", - " | PRMSAtmosphere(\n", - " control: pywatershed.base.control.Control, \n", - " prcp: Union[str, pathlib.Path], \n", - " tmax: Union[str, pathlib.Path], \n", - " tmin: Union[str, pathlib.Path], \n", - " soltab_potsw: Union[str, numpy.ndarray, pywatershed.base.adapter.Adapter], \n", - " soltab_horad_potsw: Union[str, numpy.ndarray, pywatershed.base.adapter.Adapter], \n", - " budget_type: str = None, \n", - " verbose: bool = False, \n", - " netcdf_output_dir: Union[str, pathlib.Path] = None, \n", - " netcdf_output_vars: list = None, \n", - " n_time_chunk: int = -1, \n", - " load_n_time_batches: int = 1)\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "1be5ec8a-47f1-42b5-ada7-5f7d6ba2088d", - "metadata": {}, - "source": [ - "Thus the required inputs are `control`, `prcp`, `tmax`, `tmin`, `soltab_potsw`, `soltab_horad_potsw`. All but control can be specified as files and the `prcp`, `tmax`, and `tmin` must be specified as files. " - ] - }, - { - "cell_type": "markdown", - "id": "59213c1b-61c9-4fe5-80ad-8f117a9f6e18", - "metadata": {}, - "source": [ - "### 2a. If soltab output files are not present\n", - "For a test domain, specified above, we can generate a dict to pass as arguments specifying the required files." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "15fa6b5d-fadb-4336-a9d8-a14dd125f160", - "metadata": {}, - "outputs": [], - "source": [ - "input_vars = PRMSAtmosphere.get_inputs()\n", - "input_files_w_solar_geom = {}\n", - "for var in input_vars:\n", - " if \"soltab\" in var:\n", - " continue\n", - " nc_pth = cbh_nc_dir / f\"{var}.nc\"\n", - " input_files_w_solar_geom[var] = nc_pth\n", - "pprint(input_files_w_solar_geom)" - ] - }, - { - "cell_type": "markdown", - "id": "7e5a04da-d5c4-4f6e-a6c7-b3c928c58951", - "metadata": {}, - "source": [ - "We can query `PRMSAtmosphere` about its outputs and later we'll use this list to confirm that we get all of its variables as netCDF outputs when None was requested." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8ff025e9-e441-4273-ade0-b94f1a739012", - "metadata": {}, - "outputs": [], - "source": [ - "output_vars = PRMSAtmosphere.get_variables()\n", - "output_dir = pl.Path('preprocess_cbh_adj_files')\n", - "output_files = [output_dir / f\"{vv}.nc\" for vv in output_vars]\n", - "pprint(output_files)" - ] - }, - { - "cell_type": "markdown", - "id": "2601f9cb-26a3-4d07-83f5-fa6060c6307b", - "metadata": {}, - "source": [ - "Establish a file checking function and functions for achieving initialization of `PRMSAtmosphere` specifing a `netcdf_output_dir`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "69a37053-ce54-4dad-87d8-58b944a01089", - "metadata": {}, - "outputs": [], - "source": [ - "def check_output_files(netcdf_output_vars):\n", - " if netcdf_output_vars is None:\n", - " check_files = output_files\n", - " else: \n", - " check_files = [output_dir / f\"{vv}.nc\" for vv in netcdf_output_vars]\n", - " \n", - " for ff in check_files: \n", - " print(f\"checking {ff} file exists\")\n", - " assert ff.exists()\n", - " ff.unlink()\n", - " assert not ff.exists()\n", - " \n", - " return True\n", - "\n", - "\n", - "def preprocess_w_solar_geom(input_dict, netcdf_output_vars):\n", - " output_dir.mkdir(exist_ok=True)\n", - " solar_geom = PRMSSolarGeometry(control)\n", - " atm = PRMSAtmosphere(\n", - " control=control,\n", - " **input_dict,\n", - " soltab_horad_potsw=solar_geom.soltab_horad_potsw,\n", - " soltab_potsw=solar_geom.soltab_potsw,\n", - " budget_type=None,\n", - " netcdf_output_dir=output_dir,\n", - " netcdf_output_vars=netcdf_output_vars,\n", - " )\n", - " del atm\n", - " assert check_output_files(netcdf_output_vars)\n", - " shutil.rmtree(output_dir)\n", - " return None \n" - ] - }, - { - "cell_type": "markdown", - "id": "adfd07a4-4b9b-4eb5-9a40-76a3fd167b80", - "metadata": {}, - "source": [ - "We may also want to exercise control over the variables output to netCDF files. Here we'll speficy two options including the default which is all variables written to netcdf when `None` is used. (An empty list would give the same effect as not specifying a `netcdf_output_dir`.)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1b71a090-6935-443c-849a-6232546a77fc", - "metadata": {}, - "outputs": [], - "source": [ - "netcdf_output_vars_dict = {\n", - " 'Reduced output set': ['tmaxc', 'tminc', 'pptmix'], \n", - " 'Full/Default output set': None\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "b7406e3d-f1dd-4fb1-9bb3-6f299f207781", - "metadata": {}, - "source": [ - "Run both output variable sets when only the typical CBH files are available." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e27f7e40-f5ac-4061-b98a-e223b881c68f", - "metadata": {}, - "outputs": [], - "source": [ - "# Preprocess using PRMSSolarGeom (soltab netCDF files not available)\n", - "control = Control.load(control_file, params=params)\n", - "for desc, netcdf_output_vars in netcdf_output_vars_dict.items():\n", - " print(f\"{desc}:\")\n", - " preprocess_w_solar_geom(input_files_w_solar_geom, netcdf_output_vars)\n", - " print('')" - ] - }, - { - "cell_type": "markdown", - "id": "d72cfda1-9957-4283-a35d-f3f8f595a139", - "metadata": {}, - "source": [ - "### 2b. If soltab output files are present\n", - "We repeat the above, dropping the `PRMSSolarGeometry` object as its information is now coming from the soltab files. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d54fd129-085c-498c-a90d-d752e89c9b8a", - "metadata": {}, - "outputs": [], - "source": [ - "input_vars = PRMSAtmosphere.get_inputs()\n", - "input_files = {}\n", - "for var in input_vars:\n", - " if \"soltab\" in var:\n", - " # These are in dom_dir/output for the test datasets\n", - " nc_pth = dom_dir / f\"output/{var}.nc\"\n", - " else:\n", - " nc_pth = cbh_nc_dir / f\"{var}.nc\"\n", - " \n", - " input_files[var] = nc_pth\n", - " \n", - "pprint(input_files)\n", - "\n", - "def preprocess_w_soltab(input_dict, netcdf_output_vars):\n", - " output_dir.mkdir(exist_ok=True)\n", - " atm = PRMSAtmosphere(\n", - " control=control,\n", - " **input_dict,\n", - " budget_type=None,\n", - " netcdf_output_dir=output_dir,\n", - " netcdf_output_vars=netcdf_output_vars,\n", - " )\n", - " del atm\n", - " assert check_output_files(netcdf_output_vars)\n", - " shutil.rmtree(output_dir)\n", - " return None\n", - "\n", - "# Preprocess when soltab netCDF files are available (not typical)\n", - "for desc, netcdf_output_vars in netcdf_output_vars_dict.items():\n", - " print(f\"{desc}:\") \n", - " preprocess_w_soltab(input_files, netcdf_output_vars)\n", - " print('')" - ] - }, - { - "cell_type": "markdown", - "id": "09b646b5-9e52-4c27-847d-85ac324f88dd", - "metadata": {}, - "source": [ - "## Clean up" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "05ad9f0c-f550-4ee1-a50b-e39f7498b4c7", - "metadata": {}, - "outputs": [], - "source": [ - "# Clean up the cbh netcdf files that were created at the very beginning\n", - "shutil.rmtree(cbh_nc_dir)" - ] - } - ], - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "ipython" - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/pyproject.toml b/pyproject.toml index 19d5394e..ff0bc244 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,6 +85,7 @@ doc = [ "jinja2<3.1", "sphinx", "sphinx-book-theme>=0.3.3", + "sphinx-autodoc-typehints", "sphinx-autosummary-accessors", "sphinx-copybutton" ] diff --git a/pywatershed/__init__.py b/pywatershed/__init__.py index d47f0389..df881eb7 100644 --- a/pywatershed/__init__.py +++ b/pywatershed/__init__.py @@ -3,7 +3,7 @@ from .atmosphere.prms_atmosphere import PRMSAtmosphere from .atmosphere.prms_solar_geometry import PRMSSolarGeometry from .base import meta -from .base.adapter import Adapter +from .base.adapter import Adapter, AdapterNetcdf, adapter_factory from .base.budget import Budget from .base.control import Control from .base.model import Model @@ -27,5 +27,6 @@ "atmosphere", "base", "hydrology", + "meta", "utils", ] diff --git a/pywatershed/atmosphere/prms_atmosphere.py b/pywatershed/atmosphere/prms_atmosphere.py index ce016d80..69df37c3 100644 --- a/pywatershed/atmosphere/prms_atmosphere.py +++ b/pywatershed/atmosphere/prms_atmosphere.py @@ -27,14 +27,14 @@ def tile_time_to_space(arr: np.ndarray, n_space) -> np.ndarray: class PRMSAtmosphere(Process): """PRMS atmospheric boundary layer model. - Implementation based on PRMS 5.2.1 with theoretical documentation based on - PRMS-IV: + Implementation based on PRMS 5.2.1 with theoretical documentation given in + the PRMS-IV documentation: - Markstrom, S. L., Regan, R. S., Hay, L. E., Viger, R. J., Webb, R. M., + `Markstrom, S. L., Regan, R. S., Hay, L. E., Viger, R. J., Webb, R. M., Payn, R. A., & LaFontaine, J. H. (2015). PRMS-IV, the precipitation-runoff modeling system, version 4. US Geological Survey Techniques and Methods, 6, B7. - https://pubs.usgs.gov/tm/6b7/pdf/tm6-b7.pdf + `__ This representation uses precipitation and temperature inputs. Relative humidity could be added as well. @@ -42,25 +42,26 @@ class PRMSAtmosphere(Process): The boundary layer calculates and manages the following variables (given by PRMSAtmosphere.get_variables()): - *tmaxf, tminf, prmx, hru_ppt, hru_rain, hru_snow, swrad, potet, transp_on* + * tmaxf, tminf, prmx, hru_ppt, hru_rain, hru_snow, swrad, potet, transp_on PRMS adjustments to temperature and precipitation are applied here to the inputs. Shortwave radiation (using degree day method) and potential evapotranspiration (Jensen and Haise ,1963) and a temperature based transpiration flag (transp_on) are also calculated. - Note that all variables are calculated for all time upon initialization and - that all calculated variables are written to netcdf (when netcdf output is - requested) prior to the first model advance. This is effectively a complete - preprocessing of the input CBH files to the fields the model actually uses - on initialization. If you just want to preprocess these variables, see - `this notebook `_. + Note that all variables are calculated for all time upon the first advance + and that all calculated variables are written to NetCDF (when netcdf output + is requested) the first time output is requested. This is effectively a + complete preprocessing of the input CBH files to the fields the model + actually uses on initialization. For an example of preprocessing the + variables in PRMSAtmosphere, see + `this notebook `_. The full time version of a variable is given by the "private" version of the variable which is named with a single-leading underscore (eg tmaxf for all time is _tmaxf). - This full-time initialization ma not be tractable for large domains and/or + This full-time initialization may not be tractable for large domains and/or long periods of time and require changes to batch the processing of the variables. The benefits of full-time initialization are 1) the code is vectorized and fast for such a large calculation, 2) the initialization of @@ -81,12 +82,6 @@ class PRMSAtmosphere(Process): radiation on a horizontal plane verbose: Print extra information or not? - netcdf_output_dir: A directory to write netcdf outpuf files - netcdf_output_vars: A list of variables to output via netcdf. - netcdf_separate_files: Separate or a single netcdf output file - load_n_time_batches: How often to load from disk (not-implemented?) - n_time_chunk: the inverse of load_n_time_batches, the number of - times in a chunk/batch (implemented?) """ @@ -101,20 +96,14 @@ def __init__( soltab_potsw: adaptable, soltab_horad_potsw: adaptable, verbose: bool = False, - netcdf_output_dir: [str, pl.Path] = None, - netcdf_output_vars: list = None, - netcdf_separate_files: bool = None, - # from_file_dir: [str, pl.Path] = None, - n_time_chunk: int = -1, - load_n_time_batches: int = 1, ): # Defering handling batch handling of time chunks but self.n_time_chunk # is a dimension used in the metadata/variables dimensions. # TODO: make time chunking options work (esp with output) - if n_time_chunk <= 0: - self.n_time_chunk = control.n_times - else: - self.n_time_chunk = n_time_chunk + # if n_time_chunk <= 0: + # self.n_time_chunk = control.n_times + # else: + # self.n_time_chunk = n_time_chunk # Initialize full time with nans self._time = np.full(control.n_times, nan, dtype="datetime64[s]") @@ -775,11 +764,11 @@ def _write_netcdf_timeseries(self) -> None: if not self._netcdf_initialized: return - if self.netcdf_separate_files: + if self._netcdf_separate: for var in self.variables: if var not in self._netcdf_output_vars: continue - nc_path = self.netcdf_output_dir / f"{var}.nc" + nc_path = self._netcdf_output_dir / f"{var}.nc" nc = NetCdfWrite( nc_path, @@ -797,7 +786,7 @@ def _write_netcdf_timeseries(self) -> None: print(f"Wrote file: {nc_path}") else: - nc_path = self.netcdf_output_dir / f"{self.name}.nc" + nc_path = self._netcdf_output_dir / f"{self.name}.nc" nc = NetCdfWrite( nc_path, self.params.coords, @@ -822,8 +811,8 @@ def _write_netcdf_timeseries(self) -> None: def initialize_netcdf( self, - output_dir: [str, pl.Path], - separate_files: bool = True, + output_dir: [str, pl.Path] = None, + separate_files: bool = None, output_vars: list = None, **kwargs, ): @@ -839,18 +828,44 @@ def initialize_netcdf( warn(msg) return + if ( + "verbosity" in self.control.options.keys() + and self.control.options["verbosity"] > 5 + ): + print(f"initializing netcdf output for: {self.name}") + + ( + output_dir, + output_vars, + separate_files, + ) = self._reconcile_nc_args_w_control_opts( + output_dir, output_vars, separate_files + ) + + # apply defaults if necessary + if output_dir is None: + msg = ( + "An output directory is required to be specified for netcdf" + "initialization." + ) + raise ValueError(msg) + + if separate_files is None: + separate_files = True + + self._netcdf_separate = separate_files + self._netcdf_initialized = True - self.netcdf_separate_files = separate_files - self.netcdf_output_dir = output_dir + self._netcdf_output_dir = pl.Path(output_dir) + if output_vars is None: self._netcdf_output_vars = self.variables else: self._netcdf_output_vars = list( set(output_vars).intersection(set(self.variables)) ) - - if self._netcdf_output_vars is None: - self._netcdf_initialized = False + if len(self._netcdf_output_vars) == 0: + self._netcdf_initialized = False return diff --git a/pywatershed/atmosphere/prms_solar_geometry.py b/pywatershed/atmosphere/prms_solar_geometry.py index e776e4e1..99bc9587 100644 --- a/pywatershed/atmosphere/prms_solar_geometry.py +++ b/pywatershed/atmosphere/prms_solar_geometry.py @@ -31,7 +31,16 @@ def tile_space_to_time(arr: np.ndarray) -> np.ndarray: class PRMSSolarGeometry(Process): """PRMS solar geometry. - Swift's daily potential solar radiation and number of hours + Implementation based on PRMS 5.2.1 with theoretical documentation given in + the PRMS-IV documentation: + + `Markstrom, S. L., Regan, R. S., Hay, L. E., Viger, R. J., Webb, R. M., + Payn, R. A., & LaFontaine, J. H. (2015). PRMS-IV, the + precipitation-runoff modeling system, version 4. US Geological Survey + Techniques and Methods, 6, B7. + `__ + + Implements Swift's daily potential solar radiation and number of hours of duration on a sloping surface in [cal/cm2/day]. Swift, 1976, equation 6. @@ -45,10 +54,6 @@ class PRMSSolarGeometry(Process): verbose: Print extra information or not? from_prms_file: Load from a PRMS output file? from_nc_files_dir: [str, pl.Path] = None, - load_n_time_batches: How often to load from disk (not-implemented?) - netcdf_output_dir: A directory to write netcdf outpuf files - netcdf_separate_files: Separate or a single netcdf output file - netcdf_output_vars: A list of variables to output via netcdf. """ @@ -60,13 +65,7 @@ def __init__( verbose: bool = False, from_prms_file: [str, pl.Path] = None, from_nc_files_dir: [str, pl.Path] = None, - load_n_time_batches: int = 1, - netcdf_output_dir: [str, pl.Path] = None, - netcdf_separate_files: bool = True, - netcdf_output_vars: list = None, ): - self.netcdf_output_dir = netcdf_output_dir - # self._time is needed by Process for timeseries arrays # TODO: this is redundant because the parameter doy is set # on load of prms file. Could pass the name to use for @@ -92,8 +91,8 @@ def __init__( elif from_nc_files_dir: raise NotImplementedError() - self._calculated = False self._netcdf_initialized = False + self._calculated = False return @@ -412,11 +411,11 @@ def _write_netcdf_timeseries(self) -> None: if not self._netcdf_initialized: return - if self.netcdf_separate_files: + if self._netcdf_separate: for var in self.variables: if var not in self._netcdf_output_vars: continue - nc_path = self.netcdf_output_dir / f"{var}.nc" + nc_path = self._netcdf_output_dir / f"{var}.nc" nc = NetCdfWrite( nc_path, @@ -435,7 +434,7 @@ def _write_netcdf_timeseries(self) -> None: print(f"Wrote file: {nc_path}") else: - nc_path = self.netcdf_output_dir / f"{self.name}.nc" + nc_path = self._netcdf_output_dir / f"{self.name}.nc" nc = NetCdfWrite( nc_path, self.params.coords, @@ -465,8 +464,8 @@ def _finalize_netcdf(self) -> None: def initialize_netcdf( self, - output_dir: [str, pl.Path], - separate_files: bool = True, + output_dir: [str, pl.Path] = None, + separate_files: bool = None, output_vars: list = None, **kwargs, ): @@ -482,18 +481,44 @@ def initialize_netcdf( warnings.warn(msg) return + if ( + "verbosity" in self.control.options.keys() + and self.control.options["verbosity"] > 5 + ): + print(f"initializing netcdf output for: {self.name}") + + ( + output_dir, + output_vars, + separate_files, + ) = self._reconcile_nc_args_w_control_opts( + output_dir, output_vars, separate_files + ) + + # apply defaults if necessary + if output_dir is None: + msg = ( + "An output directory is required to be specified for netcdf" + "initialization." + ) + raise ValueError(msg) + + if separate_files is None: + separate_files = True + + self._netcdf_separate = separate_files + self._netcdf_initialized = True - self.netcdf_separate_files = separate_files - self.netcdf_output_dir = output_dir + self._netcdf_output_dir = pl.Path(output_dir) + if output_vars is None: self._netcdf_output_vars = self.variables else: - self.netcdf__output_vars = list( + self._netcdf_output_vars = list( set(output_vars).intersection(set(self.variables)) ) - - if self._netcdf_output_vars is None: - self._netcdf_initialized = False + if len(self._netcdf_output_vars) == 0: + self._netcdf_initialized = False return diff --git a/pywatershed/base/accessor.py b/pywatershed/base/accessor.py index 63b38448..5cc31f42 100644 --- a/pywatershed/base/accessor.py +++ b/pywatershed/base/accessor.py @@ -1,5 +1,10 @@ class Accessor: - """A base class for dict access on self.""" + """A base class for dict access on self. + + This class is used by most other classes to ``__setitem__`, + ``__get_item__``, and ``__delitem__`` in a ``dict`` style + + """ def __setitem__(self, name: str, value) -> None: setattr(self, name, value) diff --git a/pywatershed/base/adapter.py b/pywatershed/base/adapter.py index 1fe1ee61..3ecb79bc 100644 --- a/pywatershed/base/adapter.py +++ b/pywatershed/base/adapter.py @@ -1,3 +1,10 @@ +"""The adapter module. + +This module contains the Adapter base class, its several concrete subclasses +and an adapter factory to dispatch you the right subclass when you ask for it. + +""" + import pathlib as pl from typing import Union @@ -13,7 +20,7 @@ class Adapter: """Adapter base class for getting data from a variety of sources. Args: - variable: string name ov variable + variable: string name of variable """ def __init__( @@ -24,17 +31,21 @@ def __init__( self._variable = variable return None - def advance(self): - "Advance the adapter in time" + def advance(self) -> None: + """Advance the adapter in time""" raise NotImplementedError("Must be overridden") @property def current(self): + """Current time of the Adapter instance.""" return self._current_value class AdapterNetcdf(Adapter): - """Adapter class for a netcdf file + """Adapter subclass for a NetCDF file + + This requires that the NetCDF file have a time dimension named "time" or + "doy" (day of year) to be properly handled as a timeseries for input, etc. Args: fname: filename of netcdf as string or Path @@ -95,12 +106,22 @@ def advance(self): return None @property - def data(self): + def data(self) -> np.array: + """Return the data for the current time.""" # TODO JLM: seems like we'd want to cache this data if we invoke once return self._nc_read.all_time(self._variable).data class AdapterOnedarray(Adapter): + """Adapter subclass for an invariant 1-D numpy.array + + The data are constant and do not advance in time. + + Args: + data: the data to be adapted + variable: variable name string + """ + def __init__( self, data: np.ndarray, @@ -111,7 +132,8 @@ def __init__( self._current_value = data return - def advance(self, *args): + def advance(self, *args) -> None: + """A dummy method for compliance.""" return None @@ -125,13 +147,24 @@ def adapter_factory( variable_dim_sizes: tuple = None, variable_type: str = None, load_n_time_batches: int = 1, -): +) -> "Adapter": + """A function to return the appropriate subclass of Adapter + + Args: + var: the quantity to be adapted + variable_name: what you call the above var + control: a Control object + variable_dim_sizes: for an AdapterNetcdf + variable_type: for an AdapterNetcdf + load_n_time_batches: for an AdapterNetcdf + + """ if isinstance(var, Adapter): - """Adapt an adapter""" + # Adapt an adapter. return var elif isinstance(var, (str, pl.Path)): - """Paths and strings are considered paths to netcdf files""" + # Paths and strings are considered paths to netcdf files if pl.Path(var).suffix == ".nc": return AdapterNetcdf( var, @@ -143,15 +176,15 @@ def adapter_factory( ) elif isinstance(var, np.ndarray) and len(var.shape) == 1: - """Adapt 1-D np.ndarrays""" + # Adapt 1-D np.ndarrays return AdapterOnedarray(var, variable=variable_name) elif isinstance(var, TimeseriesArray): - """Adapt TimeseriesArrays as is.""" + # Adapt TimeseriesArrays as is. return var elif var is None: - """var is specified as None so return None""" + # var is specified as None so return None return None else: diff --git a/pywatershed/base/conservative_process.py b/pywatershed/base/conservative_process.py index a10590cb..7f2b22d6 100644 --- a/pywatershed/base/conservative_process.py +++ b/pywatershed/base/conservative_process.py @@ -11,32 +11,54 @@ class ConservativeProcess(Process): - """ConservativeProcess base class + """Base class for representation of conservative physical processes. - ConservativeProcess is a base class for mass and energy conservation. This - class extends the Process class with a budget on mass (energy in the - future). - - It has budgets that can optionally be established for mass an energy and - these can be enforced or simply diagnosed with the model run. + ConservativeProcess is a base class for mass and energy conservation which + extends the :func:`~pywatershed.base.Process` class with a budget on + mass (energy in the future). Please see :func:`~pywatershed.base.Process` + for many details on the design of this parent class. In ConservativeProcess + only mass conservation is currently implemented. Budgets can optionally be + established for mass (and eventually energ) and these can be enforced or + simply diagnosed with the model run. Conventions are adopted through the use of the following properties/methods: - mass_budget_terms/get_mass_budget_terms(): - These terms must all in in the same units across all components of - the budget (inputs, outputs, storage_changes). Diagnostic variables - should not appear in the budget terms, only prognostic variables - should. - - _calculate(): - This method is to be overridden by the subclass. Near the end of - the method, the subclass should calculate its changes in mass and - energy storage in an obvious way. As commented for - mass_budget_terms, storage changes should only be tracked for - prognostic variables. (For example is snow_water_equiv = snow_ice + - snow_liquid, then storage changes for snow_ice and snow_liquid - should be tracked and not for snow_water_equiv). + mass_budget_terms/get_mass_budget_terms(): + These terms must all in in the same units across all components of + the budget (inputs, outputs, storage_changes). Diagnostic variables + should not appear in the budget terms, only prognostic variables + should. + + _calculate(): + This method is to be overridden by the subclass. Near the end of + the method, the subclass should calculate its changes in mass and + energy storage in an obvious way. As commented for + mass_budget_terms, storage changes should only be tracked for + prognostic variables. (For example is snow_water_equiv = snow_ice + + snow_liquid, then storage changes for snow_ice and snow_liquid + should be tracked and not for snow_water_equiv). + + See Also + -------- + pywatershed.base.Process + pywatershed.base.Budget + + Args + ---- + control: + A Control object + discretization: + A discretization object + parameters: + The parameters for this object + budget_type: + Use a budget and what action to take on budget imbalance. + metadata_patches: + Override static metadata for any public parameter or variable -- + experimental. + metadata_patch_conflicts: + How to handle metadata_patches conflicts. Experimental. """ @@ -164,8 +186,8 @@ def calculate(self, time_length: float, **kwargs) -> None: def initialize_netcdf( self, - output_dir: [str, pl.Path], - separate_files: bool = True, + output_dir: [str, pl.Path] = None, + separate_files: bool = None, budget_args: dict = None, output_vars: list = None, ) -> None: @@ -186,7 +208,7 @@ def initialize_netcdf( if self.budget is not None: if budget_args is None: budget_args = {} - budget_args["output_dir"] = output_dir + budget_args["output_dir"] = self._netcdf_output_dir budget_args["params"] = self.params self.budget.initialize_netcdf(**budget_args) diff --git a/pywatershed/base/control.py b/pywatershed/base/control.py index 6aa0805b..1c3c9241 100644 --- a/pywatershed/base/control.py +++ b/pywatershed/base/control.py @@ -80,7 +80,6 @@ class Control(Accessor): time_step: the length fo the time step options: a dictionary of global Process options. - Available pywatershed options: * budget_type: one of [None, "warn", "error"] * calc_method: one of ["numpy", "numba", "fortran"] @@ -110,6 +109,52 @@ class Control(Accessor): "netcdf_output_var_names" * print_debug: translates to "verbosity" + + Examples: + --------- + + >>> import pathlib as pl + >>> + >>> import numpy as np + >>> import pywatershed as pws + >>> + >>> control = pws.Control( + ... start_time=np.datetime64("2023-01-01T00:00:00"), + ... end_time=np.datetime64("2023-01-02T00:00:00"), + ... time_step=np.timedelta64(24, "h"), + ... options={"input_dir": pl.Path("./input")}, + ... ) + >>> + >>> # A more interesting example reads from a PRMS control file + >>> pws_root = pws.constants.__pywatershed_root__ + >>> drb_control_file = pws_root / "data/drb_2yr/control.test" + >>> control_drb = pws.Control.load_prms( + ... drb_control_file, + ... warn_unused_options=False, + ... ) + >>> control_drb.current_time + numpy.datetime64('1978-12-31T00:00:00') + >>> control_drb.previous_time + >>> control_drb.init_time + numpy.datetime64('1978-12-31T00:00:00') + >>> control_drb.time_step + numpy.timedelta64(24,'h') + >>> control_drb.time_step_seconds + 86400.0 + >>> control_drb.start_time + numpy.datetime64('1979-01-01T00:00:00') + >>> control_drb.advance() + >>> control_drb.current_time + numpy.datetime64('1979-01-01T00:00:00') + >>> control_drb.current_doy + 1 + >>> control_drb.current_dowy + 93 + >>> control_drb.previous_time + numpy.datetime64('1978-12-31T00:00:00') + + + """ def __init__( @@ -140,7 +185,7 @@ def __init__( else: self._init_time = self._start_time - time_step - self._current_time = None + self._current_time = self._init_time self._previous_time = None self._itime_step = -1 @@ -169,7 +214,7 @@ def load_prms( control_file: fileish, warn_unused_options: bool = True, ) -> "Control": - """Initialize a control object from a PRMS control file + """Initialize a control object from a PRMS control file. Args: control_file: PRMS control file @@ -178,15 +223,7 @@ def load_prms( default. See below for a list of used/available legacy options. Returns: - Time: Time object initialized from a PRMS control file - - - - Available PRMS legacy options : - nhruOutVar_names: mapped to netcdf_output_var_names - nsegmentOutVar_names: mapped to netcdf_output_var_names - - + An instance of a Control object. """ control = ControlVariables.load(control_file) @@ -276,91 +313,92 @@ def __deepcopy__(self, memo): return result @property - def current_time(self): + def current_time(self) -> np.datetime64: """Get the current time.""" return self._current_time @property - def current_datetime(self): + def current_datetime(self) -> datetime.datetime: """Get the current time as a datetime.datetime object""" return self._current_time.astype(datetime.datetime) @property - def current_year(self): + def current_year(self) -> int: """Get the current year.""" return datetime_year(self._current_time) @property - def current_month(self, zero_based: bool = False): + def current_month(self) -> int: """Get the current month in 1-12 (unless zero based).""" - return datetime_month(self._current_time, zero_based=zero_based) + return datetime_month(self._current_time) @property - def current_doy(self, zero_based: bool = False): + def current_doy(self) -> int: """Get the current day of year in 1-366 (unless zero based).""" - return datetime_doy(self._current_time, zero_based=zero_based) + return datetime_doy(self._current_time) @property - def current_dowy(self, zero_based: bool = False): + def current_dowy(self) -> int: """Get the current day of water year in 1-366 (unless zero-based).""" - return datetime_dowy(self._current_time, zero_based=zero_based) + return datetime_dowy(self._current_time) @property - def current_epiweek(self): + def current_epiweek(self) -> int: """Get the current epiweek [1, 53].""" return datetime_epiweek(self._current_time) @property - def previous_time(self): + def previous_time(self) -> np.datetime64: """The previous time.""" return self._previous_time @property - def itime_step(self): + def itime_step(self) -> int: """The counth of the current time [0, self.n_times-1]""" return self._itime_step @property - def time_step(self): + def time_step(self) -> int: """The time step""" return self._time_step @property - def init_time(self): + def init_time(self) -> np.datetime64: """Get the simulation initialization time""" return self._init_time @property - def start_time(self): + def start_time(self) -> np.datetime64: """The simulation start time""" return self._start_time @property - def start_doy(self): - """The simulation start day of year""" + def start_doy(self) -> int: + """The simulation start day of year.""" return datetime_doy(self._start_time) @property - def start_month(self): - """The simulation start month""" + def start_month(self) -> int: + """The simulation start month.""" return datetime_month(self._start_time) @property - def end_time(self): - """The simulation end time""" + def end_time(self) -> np.datetime64: + """The simulation end time.""" return self._end_time @property - def n_times(self): - """The number of time steps""" + def n_times(self) -> int: + """The number of time steps.""" return self._n_times @property - def time_step_seconds(self): + def time_step_seconds(self) -> np.float64: + """The timestep length in units of seconds.""" return self.time_step / np.timedelta64(1, "s") - def advance(self): - """Advance time""" + def advance(self) -> None: + """Advance time.""" if self._current_time == self._end_time: raise ValueError("End of time reached") @@ -375,23 +413,31 @@ def advance(self): return None - def edit_end_time(self, new_end_time: np.datetime64): - "Supply a new end time for the simulation." + def edit_end_time(self, new_end_time: np.datetime64) -> None: + """Supply a new end time for the simulation. + + Args: + new_end_time: the new time at which to end the simulation. + """ self._end_time = new_end_time assert self._end_time - self._start_time > 0 self._n_times = ( int((self._end_time - self._start_time) / self._time_step) + 1 ) - return + return None + + def edit_n_time_steps(self, new_n_time_steps: int) -> None: + """Supply a new number of timesteps to change the simulation end time. - def edit_n_time_steps(self, new_n_time_steps: int): - "Supply a new number of timesteps to change the simulation end time." + Args: + new_n_time_steps: The new number of timesteps. + """ self._n_times = new_n_time_steps self._end_time = ( self._start_time + (self._n_times - 1) * self._time_step ) - return + return None def __str__(self): from pprint import pformat @@ -402,11 +448,11 @@ def __repr__(self): # TODO: this is not really an object representation return self.__str__() - def to_dict(self, deep_copy=True): + def to_dict(self, deep_copy=True) -> dict: """Export a control object to a dictionary Args: - None. + deep_copy: If the dictionary should be a deep copy or not. """ control_dict = {} @@ -430,9 +476,12 @@ def to_dict(self, deep_copy=True): return control_dict - def to_yaml(self, yaml_file: Union[pl.Path, str]): + def to_yaml(self, yaml_file: Union[pl.Path, str]) -> None: """Export to a yaml file + Args: + yaml_file: The file to write to. + Note: This flattens .options to the top level of the yaml/dict so that option keys are all at the same level as "start_time", "end_time", "time_step", and "time_step_units". Using .from_yaml @@ -459,34 +508,33 @@ def to_yaml(self, yaml_file: Union[pl.Path, str]): return None @staticmethod - def from_yaml(yaml_file): + def from_yaml(yaml_file: Union[str, pl.Path]) -> "Control": """Instantate a Control object from a yaml file + Args: + yaml_file: a yaml file to parse. + + Required key:value pairs: - start_time: ISO8601 string for numpy datetime64, - e.g. 1979-01-01T00:00:00 - end_time: ISO8601 string for numpy datetime64, - e.g. 1980-12-31T00:00:00 - time_step: The first argument to get a numpy.timedelta64, e.g. 24 - time_step_units: The second argument to get a numpy.timedelta64, - e.g. 'h' - verbosity: integer 0-10 - input_dir: path relative to this file. - budget_type: None | "warn" | "error" - calc_method: None | "numpy" | "numba" | "fortran" ( - depending on availability) - load_n_time_batches: integer < total number of timesteps ( - optionalize?) - init_vars_from_file: False (optionalize, rename restart) - dprst_flag: True (optionalize) only for PRMSSoilzone (should - be supplied in its process dictionary) + * budget_type: None | "warn" | "error" + * calc_method: None | "numpy" | "numba" | "fortran" (depending on + availability) + * end_time: ISO8601 string for numpy datetime64, e.g. + 1980-12-31T00:00:00. + * input_dir: path relative to this file. + * start_time: ISO8601 string for numpy datetime64, e.g. + 1979-01-01T00:00:00. + * time_step: The first argument to get a numpy.timedelta64, e.g. 24 + * time_step_units: The second argument to get a numpy.timedelta64, + e.g. 'h'. + * verbosity: integer 0-10 Optional key, value pairs: - netcdf_output: boolean - netcdf_output_var_names: list of variable names to output, e.g. - - albedo - - cap_infil_tot - - contrib_fraction + * netcdf_output: boolean + * netcdf_output_var_names: list of variable names to output, e.g. + - albedo + - cap_infil_tot + - contrib_fraction Returns: Control object diff --git a/pywatershed/base/data_model.py b/pywatershed/base/data_model.py index e8ade17c..e5e611ee 100644 --- a/pywatershed/base/data_model.py +++ b/pywatershed/base/data_model.py @@ -46,15 +46,23 @@ class DatasetDict(Accessor): - """DatasetDict class maps between netcdf conventions + """DatasetDict: a data model following NetCDF-like conventions This is the core class in the data model adopted by pywatershed. - Where typically metadata is stored on a variable, we maintain metadata - on a collocated dictionary. The data model is a DatasetDict with dims, - coords, data_vars, and metadata keys. The dims track the length of each - dimension. The coordinates are the discrete locations along dimensions or - sets of dimensions. The data_vars contain the data located on dims and + The DatasetDict handles dimensions, coordinates, data, and metadata in + a way like `NetCDF `__ and + `xarray `__ and provides invertible + mappings between both the + `netCDF4 `__ and + `xarray `__ Python packages. + + Where metadata is typically stored on a variable in NetCDF and in xarray, + a DatasetDict maintains metadata in dictionary collocated with coordinate + and data variables. The data model is a DatasetDict with dims, coords, + data_vars, and metadata keys. The dims track the length of each dimension. + The coordinates are the discrete locations along dimensions or sets of + dimensions. The data_vars contain the data located on dims and coordinates. The metadata describes the relationship between both coords and data_vars and their dims. Together the coords and data_vars are the variables of the DatasetDict. All keys in the variables must be present @@ -62,7 +70,7 @@ class DatasetDict(Accessor): attrs. The dims is a tuple of the variable's dimensions and attrs are more general attributes. - When a netcdf file is read from disk, it has encoding properties that may + When a NetCDF file is read from disk, it has encoding properties that may come along. Alternatively, encodings may be specified before writing to file. @@ -87,7 +95,13 @@ class DatasetDict(Accessor): of the supplied dictionaries - Examples: + See Also + -------- + pywatershed.Parameters + + + Examples + --------- .. # This code is commented, copy and paste in to python, then paste the diff --git a/pywatershed/base/model.py b/pywatershed/base/model.py index 520491e8..3f07c5f7 100644 --- a/pywatershed/base/model.py +++ b/pywatershed/base/model.py @@ -2,7 +2,6 @@ from copy import deepcopy from datetime import datetime from typing import Union -from warnings import warn from tqdm.auto import tqdm @@ -35,8 +34,9 @@ class Model: below: 1) PRMS-legacy instantation, 2) pywatershed-centric instatiation. Args: - process_list_or_model_dict: a process list or a model dictionary, - see ways of instatiation below. + process_list_or_model_dict: a process list (for PRMS-legacy + instantiation) or a model dictionary (for pywatershed style + instatiation), see ways of instatiation below. control: Control object or None, see below. parameters: A Parameters object of a dictionary of Parameters objects. Default is None. See ways of instatnation below. @@ -52,35 +52,30 @@ class Model: PRMS-legacy instantiation ----------------------------- - This method of instantiation may be deprecated in the future. It is - intended to support PRMS files with few modifications. + This method of instantiation supports PRMS files with few modifications. + The first three arguments to instantiate a Model this way are: - Note: Pywatershed v0.2.0 has a minor break to backwards compatibility (pre - v0.2.0) where the list of processes is not passed as a list rather than an - upacked list or a number of arguments. + * process_list_or_model_dict: A process list of PRMS model components. + * control: A Control object. + * parameters: A PrmsParameters object. - Old way, first three args: - - - **process_list_or_model_dict** - A process list of PRMS model components - - **control** - A control object - - **parameters** - A PrmsParameters object - - See first example below for more details. - There are variations of this where + The first example below provides details. An extended example is given by + `examples/02_prms_legacy_models.ipynb `__. pywatershed-centric instatiation ------------------------------------ - The new/pywatershed way was introduced to handle more aribtrary and subdry - models. It is loosely based on how MF6 structures its inputs. All of the + The pywatershed style instatniation handles aribtrary and sundry + processes. It is loosely based on how MF6 structures its inputs. All of the work is contained in the first argument, the model dictionary, and the control and parameters are both None. - New way, only one of first three args: + Instantating a Model this way, only the first of the first three args is + used: - - **process_list_or_model_dict** - a "model dictionary", detailed below. - - (*control* - None is default) - - (*parameters* - None is default) + - process_list_or_model_dict: a "model dictionary", detailed below. + - control: Do not pass, None is default. + - parameters: Do not pass, None is default. This means that the user must understand the model dictionary supplied. A model dictionary has aribtrary keys but prescribed values. Values may be @@ -91,6 +86,10 @@ class Model: requirements for the values are described as they are (necessarily) different. + See the second and third examples below for more details and see + `examples/01_multi-process_models.ipynb `__ + for an extended example. + Model dictionary values description: ==================================== @@ -134,25 +133,6 @@ class Model: Construct a PRMS-legacy based model: - .. - import pywatershed as pws - test_data_dir = pws.constants.__pywatershed_root__ / "../test_data" - domain_dir = test_data_dir / "drb_2yr" - # A PRMS-native control file - control_file = domain_dir / "control.test" - # PRMS-native parameter file - parameter_file = domain_dir / "myparam.param" - control = pws.Control.load(control_file) - control.options['input_dir'] = domain_dir / "output" - params = pws.parameters.PrmsParameters.load(parameter_file) - model_procs = [pws.PRMSGroundwater, pws.PRMSChannel,] - model = pws.Model( - model_procs, - control=control, - parameters=params, - ) - model.run() - >>> import pywatershed as pws >>> test_data_dir = pws.constants.__pywatershed_root__ / "../test_data" >>> domain_dir = test_data_dir / "drb_2yr" @@ -160,7 +140,9 @@ class Model: >>> control_file = domain_dir / "control.test" >>> # PRMS-native parameter file >>> parameter_file = domain_dir / "myparam.param" - >>> control = pws.Control.load(control_file) + >>> control = pws.Control.load_prms( + ... control_file, warn_unused_options=False + ... ) >>> control.options["input_dir"] = domain_dir / "output" >>> params = pws.parameters.PrmsParameters.load(parameter_file) >>> model_procs = [ @@ -181,53 +163,6 @@ class Model: Construct a model the pywatershed-centric way, in memory: - .. - import pywatershed as pws - test_data_dir = pws.constants.__pywatershed_root__ / "../test_data" - domain_dir = test_data_dir / "drb_2yr" - dis_hru = pws.Parameters.from_netcdf( - domain_dir / "parameters_dis_hru.nc", encoding=False - ) - control_file = domain_dir / "control.test" - control = pws.Control.load(control_file) - control.options['input_dir'] = domain_dir - params = {} - for proc in ["SolarGeometry", "Atmosphere", "Canopy", "Snow"]: - param_file = domain_dir / f"parameters_PRMS{proc}.nc" - params[proc.lower()] = pws.Parameters.from_netcdf(param_file) - model_dict = { - 'control': control, - 'dis_hru': dis_hru, - 'model_order': [ - 'prmssolargeometry', - 'prmsatmosphere', - 'prmscanopy', - 'prmssnow', - ], - 'prmssolargeometry': { - 'class': pws.PRMSSolarGeometry, - 'parameters': params['solargeometry'], - 'dis': 'dis_hru', - }, - 'prmsatmosphere': { - 'class': pws.PRMSAtmosphere, - 'parameters': params['atmosphere'], - 'dis': 'dis_hru', - }, - 'prmscanopy': { - 'class': pws.PRMSCanopy, - 'parameters': params['canopy'], - 'dis': 'dis_hru', - }, - 'prmssnow': { - 'class': pws.PRMSSnow, - 'parameters': params['snow'], - 'dis': 'dis_hru', - } - } - model = pws.Model(model_dict) - model.run() - >>> import pywatershed as pws >>> test_data_dir = pws.constants.__pywatershed_root__ / "../test_data" >>> domain_dir = test_data_dir / "drb_2yr" @@ -235,7 +170,9 @@ class Model: ... domain_dir / "parameters_dis_hru.nc", encoding=False ... ) >>> control_file = domain_dir / "control.test" - >>> control = pws.Control.load(control_file) + >>> control = pws.Control.load_prms( + ... control_file, warn_unused_options=False + ... ) >>> control.options["input_dir"] = domain_dir >>> params = {} >>> for proc in ["SolarGeometry", "Atmosphere", "Canopy", "Snow"]: @@ -276,68 +213,11 @@ class Model: PRMSCanopy jit compiling with numba PRMSSnow jit compiling with numba >>> model.run() - 0%| | 0/731 [00:00>> import yaml >>> import pywatershed as pws >>> test_data_dir = pws.constants.__pywatershed_root__ / "../test_data" @@ -349,10 +229,9 @@ class Model: ... "time_step_units": "h", ... "verbosity": 0, ... "budget_type": "warn", - ... "init_vars_from_file": 0, ... "input_dir": str(domain_dir), ... } - >>> control_file = domain_dir / "example_control.yml" + >>> control_file = domain_dir / "example_control.yaml" >>> model_dict = { ... "control": str(control_file), ... "dis_hru": "parameters_dis_hru.nc", @@ -379,23 +258,17 @@ class Model: ... }, ... "model_order": ["solargeometry", "atmosphere", "canopy", "snow"], ... } - >>> model_dict_file = domain_dir / "example_model_dict.yml" + >>> model_dict_file = domain_dir / "example_model_dict.yaml" >>> dump_dict = {control_file: control, model_dict_file: model_dict} >>> for key, val in dump_dict.items(): ... with open(key, "w") as file: ... documents = yaml.dump(val, file) ... - >>> model = pws.Model.from_yml(model_dict_file) + >>> model = pws.Model.from_yaml(model_dict_file) PRMSCanopy jit compiling with numba PRMSSnow jit compiling with numba >>> model.run() - 0%| | 0/731 [00:00>> control_file.unlink() >>> model_dict_file.unlink() @@ -404,7 +277,7 @@ class Model: def __init__( self, - process_list_or_model_dict, + process_list_or_model_dict: Union[list, dict], control: Control = None, parameters: Union[Parameters, dict[Parameters]] = None, find_input_files: bool = True, @@ -725,30 +598,34 @@ def from_yaml(yaml_file: Union[str, pl.Path]): Yaml file structure (strict order not required, but suggested): - Control object: Any name can be used but the value must be a control - yaml file specified with the suffix ".yaml". E.g - "name: control.yaml" - would appear in the passed yaml file. Only one control - specification is allowed in the yaml_file. For details on the - requirements of the control.yaml file see `Control.from_yaml` - Discretization objects: Any number of discretization objects can be - supplied with arbitrary (though unique) names. The values supplied - for each discretization must be a valid netcdf file with suffix - ".nc". These can generally be obtained by calling - `parameters.to_netcdf()` on subclasses of Parameters. - Process objects: Any number of processes may be specified with - arbitrary (though unique) names. Processes are specified as - dictionaries in the yaml file, they have additonal key:value pairs. - Required key:value pairs are: - class: a class that can be `from pywatershed import class` - parameters: a netcdf file that specifies the parameters for - the class - dis: the name of the discretization for the class as given by - a discretization object speficication above. - Optional key:value pairs: - TBD - Model order list: a list supplying the order in which the processes are - to be executed. + * Control object: Any name can be used but the value must be a control + yaml file specified with the suffix ".yaml". E.g + "name: control.yaml" + would appear in the passed yaml file. Only one control + specification is allowed in the yaml_file. For details on the + requirements of the control.yaml file see `Control.from_yaml` + + * Discretization objects: Any number of discretization objects can be + supplied with arbitrary (though unique) names. The values supplied + for each discretization must be a valid netcdf file with suffix + ".nc". These can generally be obtained by calling + `parameters.to_netcdf()` on subclasses of Parameters. + + * Process objects: Any number of processes may be specified with + arbitrary (though unique) names. Processes are specified as + dictionaries in the yaml file, they have additonal key:value pairs. + Required key:value pairs are: + + * class: a class that can be `from pywatershed import class` + * parameters: a netcdf file that specifies the parameters for + the class + * dis: the name of the discretization for the class as given by + a discretization object speficication above. + + Optional key:value pairs: TBD + + * Model order list: a list supplying the order in which the processes + are to be executed. Note: To get a model_dict specfied by the yaml_file, call `model_dict_from_yaml` instead. @@ -775,40 +652,14 @@ def initialize_netcdf( names are silently skipped. Defaults to None which writes all variables for all Processes. """ - if self._netcdf_initialized: - msg = ( - "Model class previously initialized netcdf output " - f"in {self._netcdf_dir}" - ) - raise RuntimeError(msg) - print("model initializing NetCDF output") if not self._found_input_files: self._find_input_files() - ( - output_dir, - output_vars, - separate_files, - ) = self._reconcile_nc_args_w_control_opts( - output_dir, output_vars, separate_files - ) - - # apply defaults if necessary - if output_dir is None: - msg = ( - "An output directory is required to be specified for netcdf" - "initialization." - ) - raise ValueError(msg) - if separate_files is None: - separate_files = True - - self._netcdf_dir = pl.Path(output_dir) for cls in self.process_order: self.processes[cls].initialize_netcdf( - output_dir=self._netcdf_dir, + output_dir=output_dir, separate_files=separate_files, budget_args=budget_args, output_vars=output_vars, @@ -841,9 +692,6 @@ def run( n_time_steps: the number of timesteps to run output_vars: the vars to output to the netcdf_dir """ - # Can supply options ton initialize netcdf on .run but not with - # .advance. However, the first advance takes care of finding - # the input files. if netcdf_dir or ( not self._netcdf_initialized and self._default_nc_out_dir is not None @@ -897,50 +745,3 @@ def finalize(self): for cls in self.process_order: self.processes[cls].finalize() return - - def _reconcile_nc_args_w_control_opts( - self, output_dir, output_vars, separate_files - ): - # can treat the other args but they are not yet in the available opts - arg_opt_name_map = { - "output_dir": "netcdf_output_dir", - "output_vars": "netcdf_output_var_names", - "separate_files": "netcdf_output_separate_files", - } - - args = { - "output_dir": output_dir, - "output_vars": output_vars, - "separate_files": separate_files, - } - - for vv in args.keys(): - arg_val = args[vv] - opt_name = arg_opt_name_map[vv] - opts = self.control.options - if opt_name in opts.keys(): - opt_val = opts[opt_name] - else: - opt_val = None - - # set the arg vals to return - - if opt_val is None and arg_val is None: - pass - - elif opt_val is None: - pass - - elif arg_val is None: - args[vv] = opt_val - - else: - msg = ( - f"control.option '{opt_name}' being superceeded by " - f"model.initialize_netcdf argument {vv}" - ) - # TODO: should this edit control? and then model writes control - # at the end of run to the output dir? - warn(msg) - - return args["output_dir"], args["output_vars"], args["separate_files"] diff --git a/pywatershed/base/parameters.py b/pywatershed/base/parameters.py index 1d45b083..10a3d712 100644 --- a/pywatershed/base/parameters.py +++ b/pywatershed/base/parameters.py @@ -15,12 +15,11 @@ class Parameters(DatasetDict): """Parameter base class - This is a subclass of data_model.DatasetDict, but all the data are - read-only by design. - Parameters has all the same methods as DatasetDict plus several new - ones that map to DatasetDict as follows: + This is a subclass of :func:`~pywatershed.base.DatasetDict` where data are + read-only by design. Parameters has all the same methods as DatasetDict + plus several new ones that map to DatasetDict as follows: - * parameters: dd.variables + * parameters: dd.variables (dd.coords + dd.data) * get_param_values: get values from dd.variables * get_dim_values: get values from dd.dims @@ -40,9 +39,76 @@ class Parameters(DatasetDict): The metadata argument may also contain a special `global` key paired with a dictionary of global metadata of arbitrary name and values of string, integer, or float types. + encoding: The encoding attributes to/from file when reading/writing. validate: A bool that defaults to True, enforcing the consistency - of the supplied dictionaries + of the supplied dictionaries. + + See Also + -------- + pywatershed.base.DatasetDict + + Examples + -------- + + See :func:`~pywatershed.base.DatasetDict` for more examples. + + .. + >>> from pprint import pprint + >>> import numpy as np + >>> import pywatershed as pws + >>> nreach = 3 + >>> params = pws.parameters.PrmsParameters( + ... dims={ + ... "nsegment": nreach, + ... }, + ... coords={ + ... "nsegment": np.array(range(nreach)), + ... }, + ... data_vars={ + ... "tosegment": np.array( + ... [2, 3, 0] + ... ), # one-based index, 0 is outflow + ... "seg_length": np.ones(nreach) * 1.0e3, + ... }, + ... metadata={ + ... "nsegment": {"dims": ["nsegment"]}, + ... "tosegment": {"dims": ["nsegment"]}, + ... "seg_length": {"dims": ["nsegment"]}, + ... }, + ... validate=True, + ... ) + >>> params + + >>> params.dims + mappingproxy({'nsegment': 3}) + >>> params.coords + mappingproxy({'nsegment': array([0, 1, 2])}) + >>> pprint(params.data) + {'coords': mappingproxy({'nsegment': array([0, 1, 2])}), + 'data_vars': mappingproxy({'seg_length': array([1000., 1000., 1000.]), + 'tosegment': array([2, 3, 0])}), + 'dims': mappingproxy({'nsegment': 3}), + 'encoding': mappingproxy({}), + 'metadata': mappingproxy({'global': mappingproxy({}), + 'nsegment': mappingproxy({'dims': ['nsegment']}), + 'seg_length': mappingproxy({'dims': ['nsegment']}), + 'tosegment': mappingproxy({'dims': ['nsegment']})})} + >>> pprint(params.metadata) + mappingproxy({'global': mappingproxy({}), + 'nsegment': mappingproxy({'dims': ['nsegment']}), + 'seg_length': mappingproxy({'dims': ['nsegment']}), + 'tosegment': mappingproxy({'dims': ['nsegment']})}) + >>> xrds = params.to_xr_ds() + >>> xrds + + Dimensions: (nsegment: 3) + Coordinates: + * nsegment (nsegment) int64 0 1 2 + Data variables: + tosegment (nsegment) int64 2 3 0 + seg_length (nsegment) float64 1e+03 1e+03 1e+03 + """ def __init__( diff --git a/pywatershed/base/process.py b/pywatershed/base/process.py index c90f29db..f43c9436 100644 --- a/pywatershed/base/process.py +++ b/pywatershed/base/process.py @@ -17,63 +17,75 @@ class Process(Accessor): - """Process base class + """Base class for physical process representation. - Process is a base class for physical processes. - - The class aims to describe itself through it staticmethods and + The class aims to describe itself through its staticmethods and properties. Conventions are adopted through the use of the following properties/methods: - inputs/get_inputs(): - List the names of variables required from external sources. - Still working on conventions if these are to be modified. - For an input to be successfully inicluded, - that variable must be defined in the metadata - (pywatershed/static/metadata/variables.yaml). - Efforts should be made to not use diagnostic variables as input - as much as possible. - variables/get_variables(): - List the names of internal public variables. If not necessary, - to be public, variables should be made private with a single, - leading underscore and not maintained in this list. For an input - to be successfully inicluded, that variable must be defined in the - metadata (pywatershed/static/metadata/variables.yaml). - Efforts should be made not to track diagnostic variables in this - public variable set, as much as possible. - parameters/get_parameters(): - List the names of parameters used by the subclass. - description: - Return a dictionary of with the process subclass name and its - metadata for all variables for each of inputs, variables, and - parameters. - get_init_values: - Return a dictionary of initialization values for variables. - Note that these may be overridden by subclass initialization - routines (e.g. using parameters) or by restart values. So these - are not "initial values", they are initialization values that - are set when the variable is declared from metadata in - _initialize_var(). Initization values should be nan as much as - possible. - _advance_variables(): - This advance should exactly specify the prognostic variables in - setting previous values to current values. When/if necessary to - keep previous diagnostic variables, those must not appear here but - in _calculate(). - _calculate(): - This method is to be overridden by the subclass. Near the end of - the method, the subclass should calculate its changes in mass and - energy storage in an obvious way. As commented for - mass_budget_terms, storage changes should only be tracked for - prognostic variables. (For example is snow_water_equiv = snow_ice + - snow_liquid, then storage changes for snow_ice and snow_liquid - should be tracked and not for snow_water_equiv). - - Args: - control: a Control object - + inputs/get_inputs(): + List the names of variables required from external sources. + Still working on conventions if these are to be modified. + For an input to be successfully inicluded, + that variable must be defined in the metadata + (pywatershed/static/metadata/variables.yaml). + Efforts should be made to not use diagnostic variables as input + as much as possible. + variables/get_variables(): + List the names of internal public variables. If not necessary, + to be public, variables should be made private with a single, + leading underscore and not maintained in this list. For an input + to be successfully inicluded, that variable must be defined in the + metadata (pywatershed/static/metadata/variables.yaml). + Efforts should be made not to track diagnostic variables in this + public variable set, as much as possible. + parameters/get_parameters(): + List the names of parameters used by the subclass. + description: + Return a dictionary of with the process subclass name and its + metadata for all variables for each of inputs, variables, and + parameters. + get_init_values: + Return a dictionary of initialization values for variables. + Note that these may be overridden by subclass initialization + routines (e.g. using parameters) or by restart values. So these + are not "initial values", they are initialization values that + are set when the variable is declared from metadata in + _initialize_var(). Initization values should be nan as much as + possible. + _advance_variables(): + This advance should exactly specify the prognostic variables in + setting previous values to current values. When/if necessary to + keep previous diagnostic variables, those must not appear here but + in _calculate(). + _calculate(): + This method is to be overridden by the subclass. Near the end of + the method, the subclass should calculate its changes in mass and + energy storage in an obvious way. As commented for + mass_budget_terms, storage changes should only be tracked for + prognostic variables. (For example is snow_water_equiv = snow_ice + + snow_liquid, then storage changes for snow_ice and snow_liquid + should be tracked and not for snow_water_equiv). + + See Also + -------- + pywatershed.base.ConservativeProcess + + Args + ---- + control: + A Control object + discretization: + A discretization object + parameters: + The parameters for this object + metadata_patches: + Override static metadata for any public parameter or variable -- + experimental. + metadata_patch_conflicts: + How to handle metadata_patches conflicts. Experimental. """ def __init__( @@ -98,9 +110,6 @@ def __init__( # netcdf output variables self._netcdf_initialized = False - self._netcdf_output_dir = None - self._netcdf = None - self._separate_netcdf = True self._itime_step = -1 @@ -328,7 +337,6 @@ def _set_options(self, init_locals): inputs_args = self.inputs non_option_args = process_init_args.union(inputs_args) option_names = init_arg_names.difference(non_option_args) - for opt in option_names: if opt in init_locals.keys() and init_locals[opt] is not None: setattr(self, f"_{opt}", init_locals[opt]) @@ -437,8 +445,8 @@ def output_to_csv(self, pth): def initialize_netcdf( self, - output_dir: [str, pl.Path], - separate_files: bool = True, + output_dir: [str, pl.Path] = None, + separate_files: bool = None, output_vars: list = None, ) -> None: """Initialize NetCDF output. @@ -466,28 +474,46 @@ def initialize_netcdf( if self._verbose: print(f"initializing netcdf output for: {self.name}") + ( + output_dir, + output_vars, + separate_files, + ) = self._reconcile_nc_args_w_control_opts( + output_dir, output_vars, separate_files + ) + + # apply defaults if necessary + if output_dir is None: + msg = ( + "An output directory is required to be specified for netcdf" + "initialization." + ) + raise ValueError(msg) + + if separate_files is None: + separate_files = True + self._netcdf_separate = separate_files + self._netcdf_initialized = True self._netcdf_output_dir = pl.Path(output_dir) if output_vars is None: - self._output_vars = self.variables + self._netcdf_output_vars = self.variables else: - self._output_vars = list( + self._netcdf_output_vars = list( set(output_vars).intersection(set(self.variables)) ) - - if self._output_vars is None: - self._netcdf_initialized = False - return + if len(self._netcdf_output_vars) == 0: + self._netcdf_initialized = False + return self._netcdf = {} - if separate_files: - self._separate_netcdf = True + if self._netcdf_separate: # make working directory self._netcdf_output_dir.mkdir(parents=True, exist_ok=True) for variable_name in self.variables: - if (self._output_vars is not None) and ( - variable_name not in self._output_vars + if (self._netcdf_output_vars is not None) and ( + variable_name not in self._netcdf_output_vars ): continue nc_path = self._netcdf_output_dir / f"{variable_name}.nc" @@ -498,17 +524,23 @@ def initialize_netcdf( {variable_name: self.meta[variable_name]}, {"process class": self.name}, ) + else: - initial_variable = self.variables[0] + if self._netcdf_output_vars is None: + the_out_vars = self.variables + else: + the_out_vars = self._netcdf_output_vars + + initial_variable = the_out_vars[0] self._netcdf_output_dir.mkdir(parents=True, exist_ok=True) self._netcdf[initial_variable] = NetCdfWrite( self._netcdf_output_dir / f"{self.name}.nc", self.params.coords, - self.variables, + self._netcdf_output_vars, self.meta, {"process class": self.name}, ) - for variable in self.variables[1:]: + for variable in the_out_vars[1:]: self._netcdf[variable] = self._netcdf[initial_variable] return @@ -523,11 +555,11 @@ def _output_netcdf(self) -> None: if self._netcdf_initialized: time_added = False for variable in self.variables: - if (self._output_vars is not None) and ( - variable not in self._output_vars + if (self._netcdf_output_vars is not None) and ( + variable not in self._netcdf_output_vars ): continue - if not time_added or self._separate_netcdf: + if not time_added or self._netcdf_separate: time_added = True self._netcdf[variable].add_simulation_time( self.control.itime_step, self.control.current_datetime @@ -548,12 +580,64 @@ def _finalize_netcdf(self) -> None: """ if self._netcdf_initialized: for idx, variable in enumerate(self.variables): - if (self._output_vars is not None) and ( - variable not in self._output_vars + if (self._netcdf_output_vars is not None) and ( + variable not in self._netcdf_output_vars ): continue self._netcdf[variable].close() - if not self._separate_netcdf: + if not self._netcdf_separate: break return + + def _reconcile_nc_args_w_control_opts( + self, output_dir, output_vars, separate_files + ): + # can treat the other args but they are not yet in the available opts + arg_opt_name_map = { + "output_dir": "netcdf_output_dir", + "output_vars": "netcdf_output_var_names", + "separate_files": "netcdf_output_separate_files", + } + + args = { + "output_dir": output_dir, + "output_vars": output_vars, + "separate_files": separate_files, + } + + for vv in args.keys(): + arg_val = args[vv] + opt_name = arg_opt_name_map[vv] + opts = self.control.options + if opt_name in opts.keys(): + opt_val = opts[opt_name] + else: + opt_val = None + + # if options is a list (output_vars), take it's intersection with + # self.variables + + # set value into args to return to return + + # the 4 cases: + if opt_val is None and arg_val is None: + pass + + elif opt_val is None: + pass + + elif arg_val is None: + args[vv] = opt_val + + elif opt_val is not None and arg_val is not None: + if opt_val == arg_val: + pass + else: + msg = ( + f"control.option '{opt_name}' conflicts with " + f"initialize_netcdf() argument {vv}" + ) + raise ValueError(msg) + + return args["output_dir"], args["output_vars"], args["separate_files"] diff --git a/pywatershed/hydrology/prms_canopy.py b/pywatershed/hydrology/prms_canopy.py index 3681b616..2622ae4d 100644 --- a/pywatershed/hydrology/prms_canopy.py +++ b/pywatershed/hydrology/prms_canopy.py @@ -40,6 +40,18 @@ class PRMSCanopy(ConservativeProcess): """PRMS canopy class. + A canopy or vegetation representation from PRMS. + + Implementation based on PRMS 5.2.1 with theoretical documentation given in + the PRMS-IV documentation: + + `Markstrom, S. L., Regan, R. S., Hay, L. E., Viger, R. J., Webb, R. M., + Payn, R. A., & LaFontaine, J. H. (2015). PRMS-IV, the + precipitation-runoff modeling system, version 4. US Geological Survey + Techniques and Methods, 6, B7. + `__ + + Args: control: a Control object discretization: a discretization of class Parameters diff --git a/pywatershed/hydrology/prms_channel.py b/pywatershed/hydrology/prms_channel.py index 3447e8bb..791b30ca 100644 --- a/pywatershed/hydrology/prms_channel.py +++ b/pywatershed/hydrology/prms_channel.py @@ -21,6 +21,17 @@ class PRMSChannel(ConservativeProcess): """PRMS channel flow (muskingum_mann). + A representation of channel flow from PRMS. + + Implementation based on PRMS 5.2.1 with theoretical documentation given in + the PRMS-IV documentation: + + `Markstrom, S. L., Regan, R. S., Hay, L. E., Viger, R. J., Webb, R. M., + Payn, R. A., & LaFontaine, J. H. (2015). PRMS-IV, the + precipitation-runoff modeling system, version 4. US Geological Survey + Techniques and Methods, 6, B7. + `__ + The muskingum module was originally developed for the Precipitation Runoff Modeling System (PRMS) by Mastin and Vaccaro (2002) and developed further by Markstrom and others (2008). This module has been modified from past @@ -52,11 +63,11 @@ class PRMSChannel(ConservativeProcess): using mann_n, seg_length, seg_depth (bank full), and seg_slope. The velocity at bank full segment depth is calculated using Manning's equation - velocity = ((1/n) sqrt(seg_slope) seg_depth**(2/3) + ``velocity = ((1/n) sqrt(seg_slope) seg_depth**(2/3)`` K_coef ,in hours, is then calculated using - K_coef = seg_length / (velocity * 60 * 60) + ``K_coef = seg_length / (velocity * 60 * 60)`` K_coef values computed greater than 24.0 are set to 24.0, values computed less than 0.01 are set to 0.01, and the value for lake HRUs is set to 24.0. diff --git a/pywatershed/hydrology/prms_groundwater.py b/pywatershed/hydrology/prms_groundwater.py index d2e69c98..8e7ca191 100644 --- a/pywatershed/hydrology/prms_groundwater.py +++ b/pywatershed/hydrology/prms_groundwater.py @@ -20,6 +20,15 @@ class PRMSGroundwater(ConservativeProcess): """PRMS groundwater reservoir. + Implementation based on PRMS 5.2.1 with theoretical documentation given in + the PRMS-IV documentation: + + `Markstrom, S. L., Regan, R. S., Hay, L. E., Viger, R. J., Webb, R. M., + Payn, R. A., & LaFontaine, J. H. (2015). PRMS-IV, the + precipitation-runoff modeling system, version 4. US Geological Survey + Techniques and Methods, 6, B7. + `__ + Args: control: a Control object discretization: a discretization of class Parameters diff --git a/pywatershed/hydrology/prms_runoff.py b/pywatershed/hydrology/prms_runoff.py index 301bd4f3..356c29b8 100644 --- a/pywatershed/hydrology/prms_runoff.py +++ b/pywatershed/hydrology/prms_runoff.py @@ -35,6 +35,18 @@ class PRMSRunoff(ConservativeProcess): """PRMS surface runoff. + A surface runoff representation from PRMS. + + Implementation based on PRMS 5.2.1 with theoretical documentation given in + the PRMS-IV documentation: + + `Markstrom, S. L., Regan, R. S., Hay, L. E., Viger, R. J., Webb, R. M., + Payn, R. A., & LaFontaine, J. H. (2015). PRMS-IV, the + precipitation-runoff modeling system, version 4. US Geological Survey + Techniques and Methods, 6, B7. + `__ + + Args: control: a Control object discretization: a discretization of class Parameters diff --git a/pywatershed/hydrology/prms_snow.py b/pywatershed/hydrology/prms_snow.py index d1a37bf3..1bdbce52 100644 --- a/pywatershed/hydrology/prms_snow.py +++ b/pywatershed/hydrology/prms_snow.py @@ -75,6 +75,17 @@ class PRMSSnow(ConservativeProcess): """PRMS snow pack. + A snow representation from PRMS. + + Implementation based on PRMS 5.2.1 with theoretical documentation given in + the PRMS-IV documentation: + + `Markstrom, S. L., Regan, R. S., Hay, L. E., Viger, R. J., Webb, R. M., + Payn, R. A., & LaFontaine, J. H. (2015). PRMS-IV, the + precipitation-runoff modeling system, version 4. US Geological Survey + Techniques and Methods, 6, B7. + `__ + Args: control: a Control object discretization: a discretization of class Parameters diff --git a/pywatershed/hydrology/prms_soilzone.py b/pywatershed/hydrology/prms_soilzone.py index 732ed7fa..6de65784 100644 --- a/pywatershed/hydrology/prms_soilzone.py +++ b/pywatershed/hydrology/prms_soilzone.py @@ -26,6 +26,15 @@ class PRMSSoilzone(ConservativeProcess): """PRMS soil zone. + Implementation based on PRMS 5.2.1 with theoretical documentation given in + the PRMS-IV documentation: + + `Markstrom, S. L., Regan, R. S., Hay, L. E., Viger, R. J., Webb, R. M., + Payn, R. A., & LaFontaine, J. H. (2015). PRMS-IV, the + precipitation-runoff modeling system, version 4. US Geological Survey + Techniques and Methods, 6, B7. + `__ + Args: control: a Control object discretization: a discretization of class Parameters diff --git a/pywatershed/parameters/prms_parameters.py b/pywatershed/parameters/prms_parameters.py index 74573a9b..6d26d98d 100644 --- a/pywatershed/parameters/prms_parameters.py +++ b/pywatershed/parameters/prms_parameters.py @@ -49,23 +49,11 @@ def _json_load(json_filename): class PrmsParameters(Parameters): - """ - PRMS parameter class - - Args: - parameter_dict : dict - parameters dictionary: either structure - - * param: value - * process: {param: value ... } + """A parameter class with methods for native PRMS files. - where the later is a parameter dictionary grouped by process. - The keys for process should be either the class itself, class.name, - or type(class.__name__). - parameter_dimensions_dict : dict - parameters dimensions dictionary with a structure mirring the - parameter dict as described above but with shape tuples in place - of parameter value data. + See Also + -------- + pywatershed.Parameters """ @@ -77,6 +65,7 @@ def __init__( metadata: dict = None, encoding: dict = None, validate: bool = True, + copy: bool = True, ) -> None: if dims is None: dims = {} @@ -96,6 +85,7 @@ def __init__( metadata=metadata, encoding=encoding, validate=validate, + copy=copy, ) return @@ -114,7 +104,7 @@ def dimensions(self) -> dict: dimensions[key] = value return dimensions - def parameters_to_json(self, json_filename): + def parameters_to_json(self, json_filename) -> None: """write the parameters dictionary out to a json file""" json.dump( {**self.dims, **self.parameters}, @@ -122,8 +112,7 @@ def parameters_to_json(self, json_filename): indent=4, cls=JSONParameterEncoder, ) - - return + return None @staticmethod def _from_dict(param_dict: dict) -> "PrmsParameters": @@ -132,14 +121,9 @@ def _from_dict(param_dict: dict) -> "PrmsParameters": @staticmethod def load_from_json(json_filename: fileish) -> "PrmsParameters": - """Load parameters from a json file - + """Load parameters from a json file. Args: - : json file path - - Returns: - PrmsParameters: full PRMS parameter dictionary - + json_filename: json file path """ pars = _json_load(json_filename) params = PrmsParameters._process_file_input(pars) diff --git a/pywatershed/utils/time_utils.py b/pywatershed/utils/time_utils.py index d2f0b7bd..05c0d7f4 100644 --- a/pywatershed/utils/time_utils.py +++ b/pywatershed/utils/time_utils.py @@ -20,19 +20,28 @@ def datetime_year(dt64: np.datetime64) -> int: def datetime_month(dt64: np.datetime64, zero_based=False) -> int: - """Get the month from np.datetime64""" + """Get the month from np.datetime64 + Args: + zero_based: count as a zero-based index. + """ return dt64.astype("datetime64[M]").astype(int) % 12 + _offset(zero_based) def datetime_doy(dt64: np.datetime64, zero_based=False) -> int: - """Get day of year from np.datetime64""" + """Get day of year from np.datetime64 + Args: + zero_based: count as a zero-based index. + """ return (dt64 - dt64.astype("datetime64[Y]")).astype( "timedelta64[D]" ).astype(int) + _offset(zero_based) def datetime_dowy(dt64: np.datetime64, zero_based=False) -> int: - """Get day of water year from np.datetime64""" + """Get day of water year from np.datetime64 + Args: + zero_based: count as a zero-based index. + """ year_start = datetime_year(dt64) if datetime_month(dt64) < 10: year_start -= 1