From 554899cf161e4d7848db2288caeb39429cd3b93d Mon Sep 17 00:00:00 2001 From: James McCreight Date: Tue, 10 Oct 2023 14:10:07 -0600 Subject: [PATCH] Control .to_yaml and .to_dict methods --- autotest/test_control.py | 8 +++ autotest/test_model.py | 9 +-- examples/00_processes.ipynb | 15 +---- examples/01_multi-process_models.ipynb | 87 +++++++++++--------------- examples/02_prms_legacy_models.ipynb | 18 ++++++ pywatershed/base/control.py | 79 +++++++++++++++++++++-- pywatershed/base/model.py | 41 ++++++------ pywatershed/utils/path.py | 27 +++++--- pywatershed/utils/utils.py | 4 +- 9 files changed, 185 insertions(+), 103 deletions(-) diff --git a/autotest/test_control.py b/autotest/test_control.py index 851a22a3..49e1631e 100644 --- a/autotest/test_control.py +++ b/autotest/test_control.py @@ -193,3 +193,11 @@ def test_setitem_setattr(domain): # The value for options must be a dictionary with pytest.raises(ValueError): ctl.options = None + + +def test_yaml_roundtrip(domain, tmp_path): + ctl = Control.load_prms(domain["control_file"], warn_unused_options=False) + yml_file = tmp_path / "control.yaml" + ctl.to_yaml(yml_file) + ctl_2 = Control.from_yaml(yml_file) + np.testing.assert_equal(ctl.to_dict(), ctl_2.to_dict()) diff --git a/autotest/test_model.py b/autotest/test_model.py index 4312c89c..3a2ef04b 100644 --- a/autotest/test_model.py +++ b/autotest/test_model.py @@ -31,7 +31,7 @@ } -invoke_style = ("prms", "model_dict", "model_dict_from_yml") +invoke_style = ("prms", "model_dict", "model_dict_from_yaml") @pytest.fixture(scope="function") @@ -106,9 +106,9 @@ def model_args(domain, control, discretization, request): "parameters": None, } - elif invoke_style == "model_dict_from_yml": - yml_file = domain["dir"] / "nhm_model.yml" - model_dict = Model.model_dict_from_yml(yml_file) + elif invoke_style == "model_dict_from_yaml": + yaml_file = domain["dir"] / "nhm_model.yml" + model_dict = Model.model_dict_from_yaml(yaml_file) args = { "process_list_or_model_dict": model_dict, @@ -143,6 +143,7 @@ def test_model(domain, model_args, tmp_path): control = model_args["control"] control.options["input_dir"] = input_dir + control.options["netcdf_output_dir"] = tmp_path / "output" model = Model(**model_args) diff --git a/examples/00_processes.ipynb b/examples/00_processes.ipynb index 6ada3abb..58d17fc8 100644 --- a/examples/00_processes.ipynb +++ b/examples/00_processes.ipynb @@ -54,8 +54,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "outputs": [], "source": [ @@ -545,22 +544,14 @@ } ], "metadata": { - "kernelspec": { - "display_name": "Python [conda env:pws2] *", - "language": "python", - "name": "conda-env-pws2-py" - }, "language_info": { "codemirror_mode": { - "name": "ipython", - "version": 3 + "name": "ipython" }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" + "nbconvert_exporter": "python" } }, "nbformat": 4, diff --git a/examples/01_multi-process_models.ipynb b/examples/01_multi-process_models.ipynb index b1cf7bfe..c3442c65 100644 --- a/examples/01_multi-process_models.ipynb +++ b/examples/01_multi-process_models.ipynb @@ -351,15 +351,8 @@ "source": [ "run_dir = pl.Path(nb_output_dir / \"nhm_yaml\")\n", "run_dir.mkdir(exist_ok=True)\n", - "control_dict = control.options | {\n", - " \"start_time\": str(control.start_time),\n", - " \"end_time\": str(control.end_time),\n", - " \"time_step\": str(control.time_step)[0:2],\n", - " \"time_step_units\": str(control.time_step)[3:4],\n", - " \"netcdf_output_dir\": run_dir,\n", - "}\n", - "\n", - "pprint(control_dict, sort_dicts=False)" + "control_yaml_file = run_dir / \"control.yml\"\n", + "control.to_yaml(control_yaml_file)" ] }, { @@ -389,11 +382,7 @@ " elif isinstance(val, pl.Path):\n", " the_dict[key] = str(val)\n", "\n", - " return the_dict\n", - "\n", - "\n", - "control_dict = dict_pl_to_str(control_dict)\n", - "pprint(control_dict, sort_dicts=False)" + " return the_dict" ] }, { @@ -411,7 +400,6 @@ "metadata": {}, "outputs": [], "source": [ - "control_yaml_file = run_dir / \"control.yml\"\n", "model_dict = {\n", " \"control\": control_yaml_file.resolve(),\n", " \"dis_hru\": domain_dir / \"parameters_dis_hru.nc\",\n", @@ -486,11 +474,8 @@ "outputs": [], "source": [ "model_dict_yaml_file = run_dir / \"model_dict.yml\"\n", - "# the control yaml file was given above and is in the model_dict\n", - "dump_dict = {control_yaml_file: control_dict, model_dict_yaml_file: model_dict}\n", - "for key, val in dump_dict.items():\n", - " with open(key, \"w\") as file:\n", - " documents = yaml.dump(val, file)" + "with open(model_dict_yaml_file, \"w\") as file:\n", + " _ = yaml.dump(model_dict, file)" ] }, { @@ -536,7 +521,7 @@ "metadata": {}, "outputs": [], "source": [ - "model_yml = pws.Model.from_yml(model_dict_yaml_file)\n", + "model_yml = pws.Model.from_yaml(model_dict_yaml_file)\n", "model_yml" ] }, @@ -698,8 +683,8 @@ "metadata": {}, "outputs": [], "source": [ - "control_dict_copy = deepcopy(control_dict)\n", - "model_dict_copy = deepcopy(model_dict)" + "run_dir = pl.Path(nb_output_dir / \"yml_less_output\").resolve()\n", + "run_dir.mkdir(exist_ok=True)" ] }, { @@ -709,12 +694,9 @@ "metadata": {}, "outputs": [], "source": [ - "run_dir = pl.Path(nb_output_dir / \"yml_less_output\").resolve()\n", - "run_dir.mkdir(exist_ok=True)\n", - "\n", - "control_dict_copy[\"netcdf_output_dir\"] = str(run_dir.resolve())\n", - "control_yaml_file = run_dir / \"control.yml\"\n", - "control_dict_copy[\"netcdf_output_var_names\"] = [\n", + "control_cp = deepcopy(control)\n", + "control_cp.options[\"netcdf_output_dir\"] = str(run_dir.resolve())\n", + "control_cp.options[\"netcdf_output_var_names\"] = [\n", " var\n", " for ll in [\n", " pws.PRMSGroundwater.get_variables(),\n", @@ -722,7 +704,10 @@ " ]\n", " for var in ll\n", "]\n", - "pprint(control_dict_copy, sort_dicts=False)" + "pprint(control_cp.to_dict(), sort_dicts=False)\n", + "\n", + "control_yaml_file = run_dir / \"control.yml\"\n", + "control_cp.to_yaml(control_yaml_file)" ] }, { @@ -740,6 +725,7 @@ "metadata": {}, "outputs": [], "source": [ + "model_dict_copy = deepcopy(model_dict)\n", "model_dict_copy[\"control\"] = str(control_yaml_file)\n", "model_dict_yaml_file = run_dir / \"model_dict.yml\"" ] @@ -759,13 +745,8 @@ "metadata": {}, "outputs": [], "source": [ - "dump_dict = {\n", - " control_yaml_file: control_dict_copy,\n", - " model_dict_yaml_file: model_dict_copy,\n", - "}\n", - "for key, val in dump_dict.items():\n", - " with open(key, \"w\") as file:\n", - " documents = yaml.dump(val, file)" + "with open(model_dict_yaml_file, \"w\") as file:\n", + " _ = yaml.dump(model_dict_copy, file)" ] }, { @@ -783,7 +764,7 @@ "metadata": {}, "outputs": [], "source": [ - "submodel = pws.Model.from_yml(model_dict_yaml_file)\n", + "submodel = pws.Model.from_yaml(model_dict_yaml_file)\n", "submodel" ] }, @@ -915,9 +896,9 @@ }, "outputs": [], "source": [ - "yml_output_dir = pl.Path(control_dict[\"netcdf_output_dir\"])\n", + "yaml_output_dir = pl.Path(control.options[\"netcdf_output_dir\"])\n", "for ii in submodel_file_inputs:\n", - " input_file = yml_output_dir / f\"{ii}.nc\"\n", + " input_file = yaml_output_dir / f\"{ii}.nc\"\n", " assert input_file.exists()\n", " print(input_file)" ] @@ -946,9 +927,12 @@ "run_dir.mkdir(exist_ok=True)\n", "\n", "# key that inputs exist from previous full-model run\n", - "control_dict[\"input_dir\"] = str(yml_output_dir.resolve())\n", - "control_dict[\"netcdf_output_dir\"] = str(run_dir.resolve())\n", - "control_yaml_file = run_dir / \"control.yml\"" + "control_cp = deepcopy(control)\n", + "control_cp.options[\"input_dir\"] = yaml_output_dir.resolve()\n", + "control_cp.options[\"netcdf_output_dir\"] = run_dir.resolve()\n", + "control_yaml_file = run_dir / \"control.yml\"\n", + "control_cp.to_yaml(control_yaml_file)\n", + "pprint(control.to_dict(), sort_dicts=False)" ] }, { @@ -973,7 +957,8 @@ "for kk in list(model_dict.keys()):\n", " if isinstance(model_dict[kk], dict) and kk not in keep_procs:\n", " del model_dict[kk]\n", - "pprint(control_dict, sort_dicts=False)\n", + "\n", + "\n", "pprint(model_dict, sort_dicts=False)" ] }, @@ -992,10 +977,8 @@ "metadata": {}, "outputs": [], "source": [ - "dump_dict = {control_yaml_file: control_dict, model_dict_yaml_file: model_dict}\n", - "for key, val in dump_dict.items():\n", - " with open(key, \"w\") as file:\n", - " documents = yaml.dump(val, file)" + "with open(model_dict_yaml_file, \"w\") as file:\n", + " _ = yaml.dump(model_dict, file)" ] }, { @@ -1013,7 +996,7 @@ "metadata": {}, "outputs": [], "source": [ - "submodel = pws.Model.from_yml(model_dict_yaml_file)\n", + "submodel = pws.Model.from_yaml(model_dict_yaml_file)\n", "submodel" ] }, @@ -1154,7 +1137,7 @@ "outputs": [], "source": [ "var = \"recharge\"\n", - "nhm_ds = xr.open_dataset(yml_output_dir / f\"{var}.nc\")\n", + "nhm_ds = xr.open_dataset(yaml_output_dir / f\"{var}.nc\")\n", "sub_ds = xr.open_dataset(run_dir / f\"{var}.nc\")" ] }, @@ -1187,7 +1170,7 @@ "outputs": [], "source": [ "for var in submodel_variables:\n", - " nhm_da = xr.open_dataset(yml_output_dir / f\"{var}.nc\")[var]\n", + " nhm_da = xr.open_dataset(yaml_output_dir / f\"{var}.nc\")[var]\n", " sub_da = xr.open_dataset(run_dir / f\"{var}.nc\")[var]\n", " xr.testing.assert_equal(nhm_da, sub_da)" ] @@ -1200,7 +1183,7 @@ "outputs": [], "source": [ "# var_name = \"dprst_seep_hru\"\n", - "nhm_da = xr.open_dataset(yml_output_dir / f\"{var_name}.nc\")[var_name]\n", + "nhm_da = xr.open_dataset(yaml_output_dir / f\"{var_name}.nc\")[var_name]\n", "sub_da = xr.open_dataset(run_dir / f\"{var_name}.nc\")[var_name]\n", "scat = xr.merge(\n", " [nhm_da.rename(f\"{var_name}_yaml\"), sub_da.rename(f\"{var_name}_subset\")]\n", diff --git a/examples/02_prms_legacy_models.ipynb b/examples/02_prms_legacy_models.ipynb index c45adca9..04be7707 100644 --- a/examples/02_prms_legacy_models.ipynb +++ b/examples/02_prms_legacy_models.ipynb @@ -250,6 +250,24 @@ "}" ] }, + { + "cell_type": "markdown", + "id": "ed82f8d1-8bfc-469e-a968-f86e029c7a5f", + "metadata": {}, + "source": [ + "We note that the `netcdf_output_var_names` in `control.options` is the combination of `nhruOutVar_names` and `nsegmentOutVar_names` from the PRMS-native `control.test` file. In the next section we'll customize this list of variables names, but here we list what we'll output with our current simulation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e28f2df1-df17-451f-87ed-5d8d1e9d8b7e", + "metadata": {}, + "outputs": [], + "source": [ + "control.options[\"netcdf_output_var_names\"]" + ] + }, { "cell_type": "markdown", "id": "0b46e9ca-e84b-40b3-bdc5-179fd6c85555", diff --git a/pywatershed/base/control.py b/pywatershed/base/control.py index 0153f54b..a0b8d618 100644 --- a/pywatershed/base/control.py +++ b/pywatershed/base/control.py @@ -2,14 +2,16 @@ import pathlib as pl from collections import UserDict from copy import deepcopy +from typing import Union from warnings import warn import numpy as np +import yaml from ..base import meta from ..constants import fileish from ..utils import ControlVariables -from ..utils.path import assert_exists, path_rel_to_yml +from ..utils.path import assert_exists, dict_pl_to_str, path_rel_to_yaml from ..utils.time_utils import ( datetime_dowy, datetime_doy, @@ -360,9 +362,74 @@ def edit_n_time_steps(self, new_n_time_steps: int): ) return + def __str__(self): + from pprint import pformat + + return pformat(self.to_dict()) + + def __repr__(self): + # TODO: this is not really an object representation + return self.__str__() + + def to_dict(self, deep_copy=True): + """Export a control object to a dictionary + + Args: + None. + """ + + control_dict = {} + + # I suppose this list could grow with time but these are + # the only non .option items in __dict__ required to reconstitute a + # Control instance + control_dict["start_time"] = str(self.start_time) + control_dict["end_time"] = str(self.end_time) + control_dict["time_step"] = str(self.time_step)[0:2] + control_dict["time_step_units"] = str(self.time_step)[3:4] + + if deep_copy: + control = deepcopy(self) + else: + control = self + + control_dict["options"] = {} + for kk, vv in control.options.items(): + control_dict["options"][kk] = control.options[kk] + + return control_dict + + def to_yaml(self, yaml_file: Union[pl.Path, str]): + """Export to a yaml file + + Note: This flattens .options to the top level of the yaml/dict + so that option keys are all at the same level as "start_time", + "end_time", "time_step", and "time_step_units". Using .from_yaml + will restore options to a nested dictionary. + + Args: + yaml_file: pl.Path or str to designate the output path/file. + """ + control_dict = dict_pl_to_str(self.to_dict()) + opts = control_dict["options"] + for kk, vv in opts.items(): + if kk in control_dict.keys(): + msg = "Control option keys collide with non-option keys" + raise ValueError(msg) + control_dict[kk] = vv + + del control_dict["options"] + + yaml_file = pl.Path(yaml_file) + with open(yaml_file, "w") as file: + _ = yaml.dump(control_dict, file) + + assert yaml_file.exists() + return None + @staticmethod - def from_yml(yml_file): - """Instantate a Control object from a yml file + def from_yaml(yaml_file): + """Instantate a Control object from a yaml file Required key:value pairs: start_time: ISO8601 string for numpy datetime64, @@ -395,7 +462,7 @@ def from_yml(yml_file): """ import yaml - with pl.Path(yml_file).open("r") as file_stream: + with pl.Path(yaml_file).open("r") as file_stream: control_dict = yaml.load(file_stream, Loader=yaml.Loader) start_time = np.datetime64(control_dict["start_time"]) @@ -411,8 +478,8 @@ def from_yml(yml_file): paths_to_convert = ["input_dir"] for path_name in paths_to_convert: if path_name in control_dict.keys(): - control_dict[path_name] = path_rel_to_yml( - control_dict[path_name], yml_file + control_dict[path_name] = path_rel_to_yaml( + control_dict[path_name], yaml_file ) assert_exists(control_dict[path_name]) diff --git a/pywatershed/base/model.py b/pywatershed/base/model.py index de0dc8fb..66771d23 100644 --- a/pywatershed/base/model.py +++ b/pywatershed/base/model.py @@ -10,7 +10,7 @@ from ..base.control import Control from ..constants import fileish from ..parameters import Parameters, PrmsParameters -from ..utils.path import path_rel_to_yml +from ..utils.path import path_rel_to_yaml # This is a convenience process_order_nhm = [ @@ -95,7 +95,7 @@ class Model: Only one control object can be included in the model dictionary. Though the key for the control can be arbitrary, the value is either an instance of class Control or, in the case of a yaml model dictionary, a control - yaml file to be loaded by Control.from_yml() (todo: link to this + yaml file to be loaded by Control.from_yaml() (todo: link to this staticmethod). - **discretizations** - Multiple discretizations may be supplied to the model dictionary, each with arbitrary names. These provide spatial @@ -328,7 +328,7 @@ class Model: with open(key, "w") as file: documents = yaml.dump(val, file) - model = pws.Model.from_yml(model_dict_file) + model = pws.Model.from_yaml(model_dict_file) model.run() control_file.unlink() model_dict_file.unlink() @@ -643,14 +643,14 @@ def _find_input_files(self) -> None: return @staticmethod - def model_dict_from_yml(yml_file: Union[str, pl.Path]) -> dict: + def model_dict_from_yaml(yaml_file: Union[str, pl.Path]) -> dict: """Generate a model dictionary from a yaml file. - Instead of Model.from_yml() it can be useful to get the model + Instead of Model.from_yaml() it can be useful to get the model dictionary before passing it to Model. Args: - yml_file: a yml file + yaml_file: a yaml file Returns: A model dictionary. @@ -659,19 +659,19 @@ def model_dict_from_yml(yml_file: Union[str, pl.Path]) -> dict: import pywatershed - with pl.Path(yml_file).open("r") as file_stream: + with pl.Path(yaml_file).open("r") as file_stream: model_dict = yaml.load(file_stream, Loader=yaml.Loader) for key, val in model_dict.items(): if isinstance(val, str): - val_pl = path_rel_to_yml(val, yml_file) - if val.endswith(".yml"): - model_dict[key] = Control.from_yml(val_pl) + val_pl = path_rel_to_yaml(val, yaml_file) + if (val.endswith(".yml")) or (val.endswith(".yaml")): + model_dict[key] = Control.from_yaml(val_pl) elif val.endswith(".nc"): model_dict[key] = Parameters.from_netcdf(val_pl) else: msg = ( - "Unsupported file extension for control (.yml)" + "Unsupported file extension for control (.yml/.yaml)" "and parameter (.nc) file paths in model yaml file" ) raise ValueError(msg) @@ -684,7 +684,7 @@ def model_dict_from_yml(yml_file: Union[str, pl.Path]) -> dict: cls = val["class"] val["class"] = getattr(pywatershed, cls) par = val["parameters"] - par_pl = path_rel_to_yml(par, yml_file) + par_pl = path_rel_to_yaml(par, yaml_file) val["parameters"] = Parameters.from_netcdf( par_pl, encoding=False ) @@ -697,13 +697,13 @@ def model_dict_from_yml(yml_file: Union[str, pl.Path]) -> dict: return model_dict @staticmethod - def from_yml(yml_file: Union[str, pl.Path]): + def from_yaml(yaml_file: Union[str, pl.Path]): """Instantiate a Model from a yaml file A yaml file that specifies a model_dict as the first argument of Model. Args: - yml_file: str or pathlib.Path + yaml_file: str or pathlib.Path Returns: An instance of Model. @@ -711,10 +711,11 @@ def from_yml(yml_file: Union[str, pl.Path]): Yaml file structure (strict order not required, but suggested): Control object: Any name can be used but the value must be a control - yaml file specified with the suffix ".yml". E.g "name: control.yml" + yaml file specified with the suffix ".yaml". E.g + "name: control.yaml" would appear in the passed yaml file. Only one control - specification is allowed in the yml_file. For details on the - requirements of the control.yml file see `Control.from_yml` + specification is allowed in the yaml_file. For details on the + requirements of the control.yaml file see `Control.from_yaml` Discretization objects: Any number of discretization objects can be supplied with arbitrary (though unique) names. The values supplied for each discretization must be a valid netcdf file with suffix @@ -734,11 +735,11 @@ def from_yml(yml_file: Union[str, pl.Path]): Model order list: a list supplying the order in which the processes are to be executed. - Note: To get a model_dict specfied by the yml_file, call - `model_dict_from_yml` instead. + Note: To get a model_dict specfied by the yaml_file, call + `model_dict_from_yaml` instead. """ - return Model(Model.model_dict_from_yml(yml_file)) + return Model(Model.model_dict_from_yaml(yaml_file)) def initialize_netcdf( self, diff --git a/pywatershed/utils/path.py b/pywatershed/utils/path.py index 50a28244..e8fa16ee 100644 --- a/pywatershed/utils/path.py +++ b/pywatershed/utils/path.py @@ -4,29 +4,40 @@ # A module for path/file utilities -def path_rel_to_yml( - file_in_yml: Union[pl.Path, str], yml: Union[pl.Path, str] +def path_rel_to_yaml( + file_in_yaml: Union[pl.Path, str], yaml: Union[pl.Path, str] ): """Resolve a path from a yaml file - Given a yaml file (yml) and a file specified within that yaml file, + Given a yaml file (yaml) and a file specified within that yaml file, if the file is an absolute path, return it as a pathlib.Path object, otherwise resolve the file path relative to the location of the yaml file. Args: - file_in_yml: a str or pathlib.Path from within a yaml file - yml: the path of the yaml file. + file_in_yaml: a str or pathlib.Path from within a yaml file + yaml: the path of the yaml file. Return: pathlib.Path object with resolved/absolute path """ - yml_pl = pl.Path(yml) - file_pl = pl.Path(file_in_yml) + yaml_pl = pl.Path(yaml) + file_pl = pl.Path(file_in_yaml) if not file_pl.is_absolute(): - file_pl = (yml_pl.parent / file_pl).resolve() + file_pl = (yaml_pl.parent / file_pl).resolve() return file_pl def assert_exists(path): assert pl.Path(path).exists() return + + +def dict_pl_to_str(the_dict): + """Convert dictionary items of pathlib.Path class to strings, recursively""" + for key, val in the_dict.items(): + if isinstance(val, dict): + the_dict[key] = dict_pl_to_str(val) + elif isinstance(val, pl.Path): + the_dict[key] = str(val) + + return the_dict diff --git a/pywatershed/utils/utils.py b/pywatershed/utils/utils.py index 2c10c35b..014f3e4c 100644 --- a/pywatershed/utils/utils.py +++ b/pywatershed/utils/utils.py @@ -1,9 +1,11 @@ import functools +import pathlib as pl from time import time def timer(func): - # Use as a decorator to print the execution time of the passed function + """Use as a decorator to print the execution time of the passed function""" + @functools.wraps(func) def wrap_func(*args, **kwargs): t1 = time()