From 8f045873027a9e1e501f67858892c0d029c1fbe0 Mon Sep 17 00:00:00 2001 From: James McCreight Date: Thu, 28 Sep 2023 17:20:02 -0600 Subject: [PATCH 01/14] Control refactor: 1. warn of unused PRMS legacy options 2. translate PRMS legacy options to PWS options in clear way, documentation WIP on that 3. #2 includes outputting the variables requuested in the PRMS control file --- autotest/test_control.py | 3 +- autotest/test_model.py | 5 +- autotest/test_netcdf_output.py | 33 +++--- autotest/test_nhm_self_drive.py | 14 ++- autotest/test_prms_atmosphere.py | 2 +- autotest/test_prms_canopy.py | 2 +- autotest/test_prms_canopy_runoff.py | 2 +- autotest/test_prms_channel.py | 2 +- autotest/test_prms_et.py | 2 +- autotest/test_prms_et_can_runoff.py | 2 +- autotest/test_prms_et_canopy.py | 2 +- autotest/test_prms_groundwater.py | 2 +- autotest/test_prms_runoff.py | 2 +- autotest/test_prms_snow.py | 2 +- autotest/test_prms_soilzone.py | 2 +- autotest/test_prms_solar_geom.py | 2 +- pywatershed/base/control.py | 127 +++++++++++++++++++++-- pywatershed/base/model.py | 134 ++++++++++++++++--------- pywatershed/base/process.py | 2 +- pywatershed/hydrology/prms_snow.py | 2 +- pywatershed/hydrology/prms_soilzone.py | 6 +- pywatershed/utils/prms_to_mf6.py | 6 +- test_data/drb_2yr/control.yml | 2 +- test_data/generate/conftest.py | 15 ++- test_data/hru_1/control.yml | 2 +- test_data/ucb_2yr/control.yml | 2 +- 26 files changed, 281 insertions(+), 96 deletions(-) diff --git a/autotest/test_control.py b/autotest/test_control.py index 946425c1..fb57aac9 100644 --- a/autotest/test_control.py +++ b/autotest/test_control.py @@ -152,5 +152,6 @@ def test_control_advance(control_simple, params_simple): def test_init_load(domain): - control = Control.load(domain["control_file"]) + with pytest.warns(RuntimeWarning): + _ = Control.load_prms(domain["control_file"]) return None diff --git a/autotest/test_model.py b/autotest/test_model.py index 8cc1bdd9..26eab017 100644 --- a/autotest/test_model.py +++ b/autotest/test_model.py @@ -36,11 +36,14 @@ @pytest.fixture(scope="function") def control(domain): - control = Control.load(domain["control_file"]) + control = Control.load_prms( + domain["control_file"], warn_unused_options=False + ) control.options["verbose"] = 10 control.options["budget_type"] = None control.options["calc_method"] = "fortran" control.options["load_n_time_batches"] = 1 + del control.options["netcdf_output_var_names"] return control diff --git a/autotest/test_netcdf_output.py b/autotest/test_netcdf_output.py index 91694435..3f00a6ad 100644 --- a/autotest/test_netcdf_output.py +++ b/autotest/test_netcdf_output.py @@ -26,9 +26,12 @@ def params(domain): @pytest.fixture(scope="function") def control(domain): - control = Control.load(domain["control_file"]) + control = Control.load_prms( + domain["control_file"], warn_unused_options=False + ) control.edit_n_time_steps(n_time_steps) control.options["budget_type"] = "error" + del control.options["netcdf_output_var_names"] return control @@ -96,12 +99,6 @@ def test_process_budgets(domain, control, params, tmp_path, budget_sum_param): ] output_vars = None - model.initialize_netcdf( - tmp_dir, - budget_args=budget_args, - output_vars=output_vars, - ) - with pytest.warns(UserWarning): model.initialize_netcdf( tmp_dir, @@ -109,6 +106,13 @@ def test_process_budgets(domain, control, params, tmp_path, budget_sum_param): output_vars=output_vars, ) + with pytest.raises(RuntimeError): + model.initialize_netcdf( + tmp_dir, + budget_args=budget_args, + output_vars=output_vars, + ) + for tt in range(n_time_steps): model.advance() model.calculate() @@ -197,6 +201,7 @@ def test_separate_together_var_list( control.options["input_dir"] = input_dir control.options["netcdf_output_var_names"] = output_vars control.options["netcdf_output_separate_files"] = separate + del control.options["netcdf_output_dir"] # Could limit this to just the variables in model_procs for ff in domain_output_dir.resolve().glob("*.nc"): @@ -204,12 +209,14 @@ def test_separate_together_var_list( for ff in domain_output_dir.parent.resolve().glob("*.nc"): shutil.copy(ff, input_dir / ff.name) - with pytest.raises(RuntimeError): - model = Model( - model_procs, - control=control, - parameters=params, - ) + model = Model( + model_procs, + control=control, + parameters=params, + ) + with pytest.raises(ValueError): + # passing no output_dir arg and none in opts throws an error + model.initialize_netcdf() control.options["netcdf_output_dir"] = test_output_dir model = Model( diff --git a/autotest/test_nhm_self_drive.py b/autotest/test_nhm_self_drive.py index f8b734ba..b2e62c1d 100644 --- a/autotest/test_nhm_self_drive.py +++ b/autotest/test_nhm_self_drive.py @@ -1,5 +1,6 @@ import pathlib as pl +import pytest import xarray as xr import pywatershed as pws @@ -33,18 +34,23 @@ def test_drive_indiv_process(domain, tmp_path): nhm_output_dir = pl.Path(tmp_path) / "nhm_output" params = pws.parameters.PrmsParameters.load(domain["param_file"]) - control = pws.Control.load(domain["control_file"]) + control = pws.Control.load_prms( + domain["control_file"], warn_unused_options=False + ) control.edit_n_time_steps(n_time_steps) control.options["budget_type"] = "warn" control.options["calc_method"] = "numba" control.options["input_dir"] = domain["prms_run_dir"] + del control.options["netcdf_output_var_names"] nhm = pws.Model( nhm_processes, control=control, parameters=params, ) - nhm.initialize_netcdf(output_dir=nhm_output_dir) + with pytest.warns(UserWarning): + nhm.initialize_netcdf(output_dir=nhm_output_dir) + nhm.run(finalize=True) del nhm, params, control @@ -60,7 +66,9 @@ def test_drive_indiv_process(domain, tmp_path): proc_model_output_dir.mkdir() params = pws.parameters.PrmsParameters.load(domain["param_file"]) - control = pws.Control.load(domain["control_file"]) + control = pws.Control.load_prms( + domain["control_file"], warn_unused_options=False + ) control.edit_n_time_steps(n_time_steps) control.options["budget_type"] = "warn" control.options["calc_method"] = "numba" diff --git a/autotest/test_prms_atmosphere.py b/autotest/test_prms_atmosphere.py index 5874c7cc..8307587a 100644 --- a/autotest/test_prms_atmosphere.py +++ b/autotest/test_prms_atmosphere.py @@ -34,7 +34,7 @@ @pytest.fixture(scope="function") def control(domain): - return Control.load(domain["control_file"]) + return Control.load_prms(domain["control_file"], warn_unused_options=False) @pytest.fixture(scope="function") diff --git a/autotest/test_prms_canopy.py b/autotest/test_prms_canopy.py index f87bd2e1..45a6ee63 100644 --- a/autotest/test_prms_canopy.py +++ b/autotest/test_prms_canopy.py @@ -14,7 +14,7 @@ @pytest.fixture(scope="function") def control(domain): - return Control.load(domain["control_file"]) + return Control.load_prms(domain["control_file"], warn_unused_options=False) @pytest.fixture(scope="function") diff --git a/autotest/test_prms_canopy_runoff.py b/autotest/test_prms_canopy_runoff.py index 493e3b2c..7c658f0d 100644 --- a/autotest/test_prms_canopy_runoff.py +++ b/autotest/test_prms_canopy_runoff.py @@ -17,7 +17,7 @@ def params(domain): @pytest.fixture(scope="function") def control(domain): - return Control.load(domain["control_file"]) + return Control.load_prms(domain["control_file"], warn_unused_options=False) def test_canopy_runoff(domain, control, params, tmp_path): diff --git a/autotest/test_prms_channel.py b/autotest/test_prms_channel.py index 5160e816..251599b0 100644 --- a/autotest/test_prms_channel.py +++ b/autotest/test_prms_channel.py @@ -21,7 +21,7 @@ @pytest.fixture(scope="function") def control(domain): - return Control.load(domain["control_file"]) + return Control.load_prms(domain["control_file"], warn_unused_options=False) @pytest.fixture(scope="function") diff --git a/autotest/test_prms_et.py b/autotest/test_prms_et.py index 588a53b4..78bdf50a 100644 --- a/autotest/test_prms_et.py +++ b/autotest/test_prms_et.py @@ -16,7 +16,7 @@ def params(domain): @pytest.fixture(scope="function") def control(domain): - return Control.load(domain["control_file"]) + return Control.load_prms(domain["control_file"], warn_unused_options=False) class TestPRMSEt: diff --git a/autotest/test_prms_et_can_runoff.py b/autotest/test_prms_et_can_runoff.py index faea1ef1..cfea7030 100644 --- a/autotest/test_prms_et_can_runoff.py +++ b/autotest/test_prms_et_can_runoff.py @@ -18,7 +18,7 @@ def params(domain): @pytest.fixture(scope="function") def control(domain): - return Control.load(domain["control_file"]) + return Control.load_prms(domain["control_file"], warn_unused_options=False) def test_et_can_runoff(domain, control, params, tmp_path): diff --git a/autotest/test_prms_et_canopy.py b/autotest/test_prms_et_canopy.py index 2f493ee0..c8d1697c 100644 --- a/autotest/test_prms_et_canopy.py +++ b/autotest/test_prms_et_canopy.py @@ -17,7 +17,7 @@ def params(domain): @pytest.fixture(scope="function") def control(domain): - return Control.load(domain["control_file"]) + return Control.load_prms(domain["control_file"], warn_unused_options=False) def test_et(domain, control, params, tmp_path): diff --git a/autotest/test_prms_groundwater.py b/autotest/test_prms_groundwater.py index f0209d8d..2296abfe 100644 --- a/autotest/test_prms_groundwater.py +++ b/autotest/test_prms_groundwater.py @@ -18,7 +18,7 @@ @pytest.fixture(scope="function") def control(domain): - return Control.load(domain["control_file"]) + return Control.load_prms(domain["control_file"], warn_unused_options=False) @pytest.fixture(scope="function") diff --git a/autotest/test_prms_runoff.py b/autotest/test_prms_runoff.py index f0fe0de6..32d2843c 100644 --- a/autotest/test_prms_runoff.py +++ b/autotest/test_prms_runoff.py @@ -14,7 +14,7 @@ @pytest.fixture(scope="function") def control(domain): - return Control.load(domain["control_file"]) + return Control.load_prms(domain["control_file"], warn_unused_options=False) @pytest.fixture(scope="function") diff --git a/autotest/test_prms_snow.py b/autotest/test_prms_snow.py index 73c9a3e7..92ccbfa5 100644 --- a/autotest/test_prms_snow.py +++ b/autotest/test_prms_snow.py @@ -15,7 +15,7 @@ @pytest.fixture(scope="function") def control(domain): - return Control.load(domain["control_file"]) + return Control.load_prms(domain["control_file"], warn_unused_options=False) @pytest.fixture(scope="function") diff --git a/autotest/test_prms_soilzone.py b/autotest/test_prms_soilzone.py index a296bbd2..9bfc6212 100644 --- a/autotest/test_prms_soilzone.py +++ b/autotest/test_prms_soilzone.py @@ -14,7 +14,7 @@ @pytest.fixture(scope="function") def control(domain): - return Control.load(domain["control_file"]) + return Control.load_prms(domain["control_file"], warn_unused_options=False) @pytest.fixture(scope="function") diff --git a/autotest/test_prms_solar_geom.py b/autotest/test_prms_solar_geom.py index f2a4560c..4c95dfd7 100644 --- a/autotest/test_prms_solar_geom.py +++ b/autotest/test_prms_solar_geom.py @@ -11,7 +11,7 @@ @pytest.fixture(scope="function") def control(domain): - return Control.load(domain["control_file"]) + return Control.load_prms(domain["control_file"], warn_unused_options=False) @pytest.fixture(scope="function") diff --git a/pywatershed/base/control.py b/pywatershed/base/control.py index 3f3730cf..e2aacd61 100644 --- a/pywatershed/base/control.py +++ b/pywatershed/base/control.py @@ -1,6 +1,6 @@ -"""The control class.""" import datetime import pathlib as pl +from warnings import warn import numpy as np @@ -17,6 +17,56 @@ ) from .accessor import Accessor +# This is the list of control variables currently used by pywatershed +# It is important to maintain this list to issue warnings about what +# variables are unrecognized/ignored in legacy and non-legacy control +# files +# TODO: where should these be documented? +# TODO: identify which are PRMS-legacy? +pws_control_options_avail = [ + "budget_type", + "calc_method", + "dprst_flag", # to remove? + "restart", + "input_dir", + "load_n_time_batches", + "netcdf_output_dir", + "netcdf_output_var_names", + # "netcdf_output_separate_files", + # "netcdf_budget_args", + "start_time", + "time_step_units", + "verbosity", +] + +prms_legacy_options_avail = [ + "dprst_flag", + "end_time", + "init_vars_from_file", + "initial_deltat", + "nhruOutBaseFileName", + "nhruOutVar_names", + "nsegmentOutBaseFileName", + "nsegmentOutVar_names", + "start_time", + "print_debug", +] + +prms_to_pws_option_map = { + "init_vars_from_file": "restart", + "initial_deltat": "time_step", + "nhruOutBaseFileName": "netcdf_output_dir", + "nhruOutVar_names": "netcdf_output_var_names", + "nsegmentOutBaseFileName": "netcdf_output_dir", + "nsegmentOutVar_names": "netcdf_output_var_names", + "print_debug": "verbosity", +} + +assert ( + len(set(prms_to_pws_option_map.keys()) - set(prms_legacy_options_avail)) + == 0 +) + class Control(Accessor): """Control manages global time and options, and provides metadata. @@ -26,7 +76,7 @@ class Control(Accessor): time end_time: the last integration time time_step: the length fo the time step - options: a dictionary of global Process options + options: a dictionary of global Process options. """ @@ -64,7 +114,8 @@ def __init__( if options is None: options = {} - self.options = options + self.options = {} + self._set_options(options) self.meta = meta # This will have the time dimension name # This will have the time coordimate name @@ -73,25 +124,89 @@ def __init__( def load( cls, control_file: fileish, + ) -> "Control": + msg = "Control.load will be deprecated for Control.load_prms" + warn(msg, PendingDeprecationWarning) + return Control.load_prms(control_file) + + @classmethod + def load_prms( + cls, + control_file: fileish, + warn_unused_options: bool = True, ) -> "Control": """Initialize a control object from a PRMS control file Args: control_file: PRMS control file + warn_unused_options: bool if warnings are to be issued for unused + options from the PRMS control file. Recommended and True by + default. See below for a list of used/available legacy options. Returns: Time: Time object initialized from a PRMS control file + + + Available PRMS legacy options : + nhruOutVar_names: mapped to netcdf_output_var_names + nsegmentOutVar_names: mapped to netcdf_output_var_names + + """ control = ControlVariables.load(control_file) + if warn_unused_options: + for vv in control.control.keys(): + if vv not in prms_legacy_options_avail: + msg = ( + f"Option '{vv}' in supplied control file is not used " + "by pywatershed" + ) + warn(msg, RuntimeWarning) + + opts = control.control + opt_names = list(opts.keys()) + + for oo in opt_names: + if oo not in prms_legacy_options_avail: + del opts[oo] + if oo in prms_to_pws_option_map.keys(): + pws_option_key = prms_to_pws_option_map[oo] + val = opts[oo] + del opts[oo] + if pws_option_key in opts.keys(): + # combine to a list with only unique entries + # use value instead of list if only one value in list + opts[pws_option_key] = list( + set(opts[pws_option_key].tolist() + val.tolist()) + ) + if len(opts[pws_option_key]) == 1: + opts[pws_option_key] = opts[pws_option_key][0] + else: + opts[pws_option_key] = val + + start_time = control.control["start_time"] + end_time = control.control["end_time"] + time_step = control.control["time_step"] + del control.control["start_time"] + del control.control["end_time"] + del control.control["time_step"] + return cls( - control.control["start_time"], - control.control["end_time"], - control.control["initial_deltat"], + start_time=start_time, + end_time=end_time, + time_step=time_step, options=control.control, ) + def _set_options(self, options): + for okey, oval in options.items(): + if okey not in pws_control_options_avail: + msg = f"'{okey}' is not an available control option" + raise ValueError(msg) + self.options[okey] = oval + @property def current_time(self): """Get the current time.""" diff --git a/pywatershed/base/model.py b/pywatershed/base/model.py index a43cd56c..14096930 100644 --- a/pywatershed/base/model.py +++ b/pywatershed/base/model.py @@ -453,11 +453,12 @@ def __init__( if find_input_files: self._find_input_files() - # methodize this netcdf section - self._parse_netcdf_control_options() self._netcdf_initialized = False - if "netcdf_output_dir" in self.control.options.keys(): - self.initialize_netcdf(**self._netcdf_opts) + opts = self.control.options + if "netcdf_output_dir" in opts.keys(): + self._default_nc_out_dir = opts["netcdf_output_dir"] + else: + self._default_nc_out_dir = None return @@ -741,24 +742,50 @@ def from_yml(yml_file: Union[str, pl.Path]): def initialize_netcdf( self, - output_dir: str, - separate_files: bool = True, + output_dir: str = None, + separate_files: bool = None, budget_args: dict = None, output_vars: list = None, ): - """Initialize NetCDF output files for model (all processes).""" + """Initialize NetCDF output files for model (all processes). + Args: + output_dir: pl.Path or str of the directory where to write files + separate_files: For a given Process, write a single file or + separate files for the process' variables. DEFAULTS to True + for performance reasons. + budget_args: see Budget.initialize_netcdf(). defaults to None + output_vars: A list of variables to write. Unrecognized variable + names are silently skipped. Defaults to None which writes + all variables for all Processes. + """ if self._netcdf_initialized: msg = ( "Model class previously initialized netcdf output " f"in {self._netcdf_dir}" ) - warn(msg) - return + raise RuntimeError(msg) + + print("model initializing NetCDF output") if not self._found_input_files: self._find_input_files() + ( + output_dir, + output_vars, + separate_files, + ) = self._reconcile_nc_args_w_control_opts( + output_dir, output_vars, separate_files + ) + + # apply defaults if necessary + if output_dir is None: + msg = "An output directory is required to be specified for netcdf initialization." + raise ValueError(msg) + if separate_files is None: + separate_files = True + self._netcdf_dir = pl.Path(output_dir) for cls in self.process_order: self.processes[cls].initialize_netcdf( @@ -795,11 +822,13 @@ def run( n_time_steps: the number of timesteps to run output_vars: the vars to output to the netcdf_dir """ - if not self._found_input_files: - self._find_input_files() - - if netcdf_dir: - print("model.run(): initializing NetCDF output") + # Can supply options ton initialize netcdf on .run but not with + # .advance. However, the first advance takes care of finding + # the input files. + if netcdf_dir or ( + not self._netcdf_initialized + and self._default_nc_out_dir is not None + ): self.initialize_netcdf(netcdf_dir, output_vars=output_vars) if not n_time_steps: @@ -821,6 +850,12 @@ def advance(self): if not self._found_input_files: self._find_input_files() + if ( + not self._netcdf_initialized + and self._default_nc_out_dir is not None + ): + self.initialize_netcdf() + self.control.advance() for cls in self.process_order: self.processes[cls].advance() @@ -844,44 +879,49 @@ def finalize(self): self.processes[cls].finalize() return - def _parse_netcdf_control_options(self): - # defaults - output_dir = None - output_vars = None - separate_files = True - budget_args = None + def _reconcile_nc_args_w_control_opts( + self, output_dir, output_vars, separate_files + ): + # can treat the other args but they are not yet in the available opts + arg_opt_name_map = { + "output_dir": "netcdf_output_dir", + "output_vars": "netcdf_output_var_names", + "separate_files": "netcdf_output_separate_files", + } - if "netcdf_output_dir" in self.control.options.keys(): - output_dir = self.control.options["netcdf_output_dir"] + args = { + "output_dir": output_dir, + "output_vars": output_vars, + "separate_files": separate_files, + } - if "netcdf_output_var_names" in self.control.options.keys(): - output_vars = self.control.options["netcdf_output_var_names"] + for vv in args.keys(): + arg_val = args[vv] + opt_name = arg_opt_name_map[vv] + opts = self.control.options + if opt_name in opts.keys(): + opt_val = opts[opt_name] + else: + opt_val = None - if "netcdf_output_separate_files" in self.control.options.keys(): - separate_files = self.control.options[ - "netcdf_output_separate_files" - ] + # set the arg vals to return - if "netcdf_budget_args" in self.control.options.keys(): - budget_args = self.control.options["netcdf_budget_args"] + if opt_val is None and arg_val is None: + pass - any_netcdf_options = False - for kk in self.control.options.keys(): - if "netcdf" in kk: - any_netcdf_options = True + elif opt_val is None: + pass - if output_dir is None and any_netcdf_options: - raise RuntimeError( - "All netcdf options should be in control.options or passed " - "to Model.initialize_netcdf() but not mixed. You have not " - "supplied 'netcdf_output_dir' in control.options." - ) + elif arg_val is None: + args[vv] = opt_val - self._netcdf_opts = { - "output_dir": output_dir, - "output_vars": output_vars, - "separate_files": separate_files, - "budget_args": budget_args, - } + else: + msg = ( + f"control.option '{opt_name}' being superceeded by " + f"model.initialize_netcdf argument {vv}" + ) + # TODO: should this edit control? and then model writes control + # at the end of run to the output dir? + warn(msg) - return + return args["output_dir"], args["output_vars"], args["separate_files"] diff --git a/pywatershed/base/process.py b/pywatershed/base/process.py index c8b5677a..c90f29db 100644 --- a/pywatershed/base/process.py +++ b/pywatershed/base/process.py @@ -464,7 +464,7 @@ def initialize_netcdf( return if self._verbose: - print(f"initializing netcdf output for: {self.output_dir}") + print(f"initializing netcdf output for: {self.name}") self._netcdf_initialized = True self._netcdf_output_dir = pl.Path(output_dir) diff --git a/pywatershed/hydrology/prms_snow.py b/pywatershed/hydrology/prms_snow.py index f46f970d..747024b9 100644 --- a/pywatershed/hydrology/prms_snow.py +++ b/pywatershed/hydrology/prms_snow.py @@ -319,7 +319,7 @@ def _set_initial_conditions(self): sd = int(self.ndeplval / 11) self.snarea_curve_2d = np.reshape(self.snarea_curve, (sd, 11)) - if self.control.options["init_vars_from_file"] in [0, 2, 3]: + if self.control.options["restart"] in [0, 2, 3]: # The super().__init__ already set_initial_conditions using its # set_initial_conditions # Below Im just following PRMS6, will reconcile later with the diff --git a/pywatershed/hydrology/prms_soilzone.py b/pywatershed/hydrology/prms_soilzone.py index 4a77a017..e1b7b5dc 100644 --- a/pywatershed/hydrology/prms_soilzone.py +++ b/pywatershed/hydrology/prms_soilzone.py @@ -249,7 +249,7 @@ def _set_initial_conditions(self): self._pref_flow_den[wh_not_land] = zero # variables - if self.control.options["init_vars_from_file"] in [0, 2, 5]: + if self.control.options["restart"] in [0, 2, 5]: # these are set in sm_climateflow self.soil_moist[:] = ( self.soil_moist_init_frac * self.soil_moist_max @@ -269,7 +269,7 @@ def _set_initial_conditions(self): # expectations. Move this parameter business to __init__ # ssres_stor - if self.control.options["init_vars_from_file"] in [0, 2, 5]: + if self.control.options["restart"] in [0, 2, 5]: self.ssres_stor = self.ssstor_init_frac * self._sat_threshold wh_inactive_or_lake = np.where( (self.hru_type == HruType.INACTIVE.value) @@ -341,7 +341,7 @@ def _set_initial_conditions(self): self._pref_flow_flag[wh_land_and_prf_den] = True # can this one be combined with the restart read logic above? - if self.control.options["init_vars_from_file"] in [0, 2, 5]: + if self.control.options["restart"] in [0, 2, 5]: wh_land_or_swale = np.where( (self.hru_type == HruType.LAND.value) | (self.hru_type == HruType.SWALE.value) diff --git a/pywatershed/utils/prms_to_mf6.py b/pywatershed/utils/prms_to_mf6.py index 75fc6f52..5270d241 100644 --- a/pywatershed/utils/prms_to_mf6.py +++ b/pywatershed/utils/prms_to_mf6.py @@ -129,7 +129,11 @@ def __init__( if key == "param": setattr(self, "params", PrmsParameters.load(obj_file)) else: - setattr(self, "control", Control.load(obj_file)) + setattr( + self, + "control", + Control.load_prms(obj_file, warn_unused_options=False), + ) else: setattr(self, f"{key}_file", None) diff --git a/test_data/drb_2yr/control.yml b/test_data/drb_2yr/control.yml index 0dcbbc0a..082829b0 100644 --- a/test_data/drb_2yr/control.yml +++ b/test_data/drb_2yr/control.yml @@ -17,7 +17,7 @@ calc_method: numba load_n_time_batches: 1 # still used by snow and soilzone, but should be removed -init_vars_from_file: 0 +restart: 0 dprst_flag: True diff --git a/test_data/generate/conftest.py b/test_data/generate/conftest.py index 31468dd7..bd048f81 100644 --- a/test_data/generate/conftest.py +++ b/test_data/generate/conftest.py @@ -4,6 +4,7 @@ from fnmatch import fnmatch from platform import processor from typing import List +from warnings import warn import pytest @@ -70,10 +71,14 @@ def enforce_scheduler(test_dir): fnmatch(str(test_dir), gg) for gg in domain_globs_schedule ) if any(glob_match): - raise RuntimeError( - f"Domain '{test_dir}' must be scheduled (use --force to override)" + msg = ( + f"Skipping domain '{test_dir}' which must be scheduled or use " + "--force to override skip" ) - return None + warn(msg, UserWarning) + return True + + return False def collect_simulations( @@ -91,7 +96,9 @@ def collect_simulations( # optionally enforce scheduler if not force: - enforce_scheduler(test_dir) + skip = enforce_scheduler(test_dir) + if skip: + continue # if control file is found, add simulation ctrl_file = next( diff --git a/test_data/hru_1/control.yml b/test_data/hru_1/control.yml index b960b1a9..c4305fec 100644 --- a/test_data/hru_1/control.yml +++ b/test_data/hru_1/control.yml @@ -17,6 +17,6 @@ calc_method: numba load_n_time_batches: 1 # still used by snow and soilzone, but should be removed -init_vars_from_file: 0 +restart: 0 dprst_flag: True diff --git a/test_data/ucb_2yr/control.yml b/test_data/ucb_2yr/control.yml index 88ebbfb9..ff5fae62 100644 --- a/test_data/ucb_2yr/control.yml +++ b/test_data/ucb_2yr/control.yml @@ -17,6 +17,6 @@ calc_method: numba load_n_time_batches: 1 # still used by snow and soilzone, but should be removed -init_vars_from_file: 0 +restart: 0 dprst_flag: True From 558c99546da110aac3ead4263a10af31f939ff00 Mon Sep 17 00:00:00 2001 From: James McCreight Date: Thu, 28 Sep 2023 19:19:00 -0600 Subject: [PATCH 02/14] catch up notebooks to control options changes and warnings --- examples/01_multi-process_models.ipynb | 50 ++++++++------------------ examples/02_prms_legacy_models.ipynb | 22 ++++++++++-- 2 files changed, 35 insertions(+), 37 deletions(-) diff --git a/examples/01_multi-process_models.ipynb b/examples/01_multi-process_models.ipynb index 51d5dae7..b1cf7bfe 100644 --- a/examples/01_multi-process_models.ipynb +++ b/examples/01_multi-process_models.ipynb @@ -7,8 +7,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "source": [ "# Multi-process models in pywatershed\n", @@ -219,8 +218,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "source": [ "For the time being, `PRMSChannel` needs to know about both HRUs and segments, so `dis_both` is used. We plan to remove this requirement in the near future by implementing \"exchanges\" between processes into the model dictionary. Stay tuned.\n", @@ -245,7 +243,7 @@ " \"input_dir\": domain_dir,\n", " \"budget_type\": None,\n", " \"netcdf_output_dir\": nb_output_dir / \"nhm_memory\",\n", - " \"init_vars_from_file\": 0,\n", + " \"restart\": 0,\n", " \"dprst_flag\": True,\n", " },\n", ")\n", @@ -288,8 +286,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "outputs": [], "source": [ @@ -372,8 +369,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "source": [ "We add the option `netcdf_output_dir` to the control since we assume we wont be able to do so at run time. Note that this option and the `input_dir` option are `pathlib.Path` objects. These are not what we want to write to file. We want their string version. We could do `str()` on each one by hand, but it will be more handy to write a small, recursive function to do this on a supplied dictionary since this will be a recurring task with the model dictionary we will create after the control YAML file." @@ -600,8 +596,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "outputs": [], "source": [ @@ -620,8 +615,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "source": [ "Now compare the values of all variables:" @@ -670,8 +664,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "outputs": [], "source": [ @@ -812,8 +805,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "source": [ "Reducing the output significantly reduced the time, in this case (on my machine) from 25s to 15s, or about 60%." @@ -919,8 +911,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "outputs": [], "source": [ @@ -938,8 +929,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "source": [ "Well, that was a lot of work. But, as alluded to above, the `Model` object does the above so you dont have to. You just learned something about how the flow of information between processes is enabled by the design and how one can query individual processes in `pywatershed`. But we could instantiate the submodel and plot this wiring up, just as we plotted the `ModelGraph` of the full model. We'll create the submodel in a new `run_dir` and we'll use outputs from the full model above as inputs to this submodel." @@ -1043,8 +1033,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "outputs": [], "source": [ @@ -1067,8 +1056,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "source": [ "Note that the required inputs to the submodel are quire different and rely on the existence of these files having already been output by the full model. \n", @@ -1247,22 +1235,14 @@ } ], "metadata": { - "kernelspec": { - "display_name": "Python [conda env:pws2] *", - "language": "python", - "name": "conda-env-pws2-py" - }, "language_info": { "codemirror_mode": { - "name": "ipython", - "version": 3 + "name": "ipython" }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" + "nbconvert_exporter": "python" } }, "nbformat": 4, diff --git a/examples/02_prms_legacy_models.ipynb b/examples/02_prms_legacy_models.ipynb index fe7767ac..c45adca9 100644 --- a/examples/02_prms_legacy_models.ipynb +++ b/examples/02_prms_legacy_models.ipynb @@ -52,6 +52,7 @@ "from pprint import pprint\n", "from shutil import rmtree\n", "from sys import platform\n", + "import warnings\n", "\n", "import pydoc\n", "\n", @@ -205,7 +206,10 @@ "metadata": {}, "outputs": [], "source": [ - "control = pws.Control.load(domain_dir / \"control.test\")\n", + "with warnings.catch_warnings():\n", + " warnings.simplefilter(\"ignore\")\n", + " control = pws.Control.load(domain_dir / \"control.test\")\n", + "\n", "control" ] }, @@ -219,6 +223,8 @@ } }, "source": [ + "We suppress warnings when loading legacy PRMS parameter control files indicating which options are not being used. \n", + "\n", "Now we'll edit this control object. First we'll reduce the total simulation time to six months for the purposes of this demonstration (but feel free to increase this to the full 2 years available, if you like). Next we'll specify several global options, including the location of the atmospheric forcing/input data, the budget type, and the calculation method." ] }, @@ -404,7 +410,10 @@ }, "outputs": [], "source": [ - "control = pws.Control.load(domain_dir / \"control.test\")\n", + "with warnings.catch_warnings():\n", + " warnings.simplefilter(\"ignore\")\n", + " control = pws.Control.load(domain_dir / \"control.test\")\n", + "\n", "control.edit_end_time(np.datetime64(\"1979-07-01T00:00:00\"))\n", "control.options = control.options | {\n", " \"input_dir\": run_dir,\n", @@ -413,6 +422,7 @@ " \"netcdf_output_dir\": nb_output_dir / \"nhm_submodel\",\n", "}\n", "\n", + "\n", "control.options[\"netcdf_output_var_names\"] = pws.PRMSChannel.get_variables()" ] }, @@ -461,6 +471,14 @@ "%%time\n", "submodel.run(finalize=True)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfcc3aa8-cc8f-40b9-9a72-124567c2c8bf", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 59685feadc1ffc4adefb39f8da061b9dcb2db65f Mon Sep 17 00:00:00 2001 From: James McCreight Date: Thu, 28 Sep 2023 22:26:58 -0600 Subject: [PATCH 03/14] Control: 1) copy and deepcopy methods, 2) __setitem__ and __setattr__ for managing options attribute/item that enforces valid options, 3) control.options as an OptsDict which also enforces valid keys, 4) tests for the previous --- autotest/test_control.py | 38 +++++++++++++++++++++ autotest/test_model.py | 2 +- pywatershed/base/control.py | 66 ++++++++++++++++++++++++++++++------- pywatershed/base/model.py | 2 +- 4 files changed, 95 insertions(+), 13 deletions(-) diff --git a/autotest/test_control.py b/autotest/test_control.py index fb57aac9..851a22a3 100644 --- a/autotest/test_control.py +++ b/autotest/test_control.py @@ -1,3 +1,4 @@ +from copy import copy, deepcopy from datetime import datetime import numpy as np @@ -155,3 +156,40 @@ def test_init_load(domain): with pytest.warns(RuntimeWarning): _ = Control.load_prms(domain["control_file"]) return None + + +def test_deepcopy(domain): + ctl = Control.load_prms(domain["control_file"], warn_unused_options=False) + ctl_sh = copy(ctl) + ctl_dp = deepcopy(ctl) + + opt_restart_orig = ctl.options["restart"] + opt_restart_new = "something_else" + ctl.options["restart"] = opt_restart_new + assert ctl_sh.options["restart"] == opt_restart_new + assert ctl_dp.options["restart"] == opt_restart_orig + + return None + + +def test_setitem_setattr(domain): + ctl = Control.load_prms(domain["control_file"], warn_unused_options=False) + + # __setitem__ on OptsDict + ctl.options["restart"] = 12 + with pytest.raises(NameError): + ctl.options["foobar"] = 12 + + # __setattr__ on Control + ctl.options = {"restart": 45} + with pytest.raises(NameError): + ctl.options = {"foobar": 12} + + # __setitem__ on Control + ctl["options"] = {"restart": 45} + with pytest.raises(NameError): + ctl["options"] = {"foobar": 12} + + # The value for options must be a dictionary + with pytest.raises(ValueError): + ctl.options = None diff --git a/autotest/test_model.py b/autotest/test_model.py index 26eab017..4312c89c 100644 --- a/autotest/test_model.py +++ b/autotest/test_model.py @@ -39,7 +39,7 @@ def control(domain): control = Control.load_prms( domain["control_file"], warn_unused_options=False ) - control.options["verbose"] = 10 + control.options["verbosity"] = 10 control.options["budget_type"] = None control.options["calc_method"] = "fortran" control.options["load_n_time_batches"] = 1 diff --git a/pywatershed/base/control.py b/pywatershed/base/control.py index e2aacd61..08c0d1bc 100644 --- a/pywatershed/base/control.py +++ b/pywatershed/base/control.py @@ -1,3 +1,5 @@ +from collections import UserDict +from copy import deepcopy import datetime import pathlib as pl from warnings import warn @@ -32,8 +34,8 @@ "load_n_time_batches", "netcdf_output_dir", "netcdf_output_var_names", - # "netcdf_output_separate_files", - # "netcdf_budget_args", + "netcdf_output_separate_files", + "netcdf_budget_args", "start_time", "time_step_units", "verbosity", @@ -113,9 +115,8 @@ def __init__( self._itime_step = -1 if options is None: - options = {} - self.options = {} - self._set_options(options) + options = OptsDict() + self.options = options self.meta = meta # This will have the time dimension name # This will have the time coordimate name @@ -200,12 +201,46 @@ def load_prms( options=control.control, ) - def _set_options(self, options): - for okey, oval in options.items(): - if okey not in pws_control_options_avail: - msg = f"'{okey}' is not an available control option" - raise ValueError(msg) - self.options[okey] = oval + def _set_options(self, options: dict): + if not isinstance(options, (OptsDict, dict)): + raise ValueError("control.options must be a dictionary") + valid_options = OptsDict() + for key, val in options.items(): + valid_options[key] = val + + return valid_options + + def __setitem__(self, key, value) -> None: + if key == "options": + value = self._set_options(value) + + super().__setitem__(key, value) + return None + + def __setattr__(self, name, value) -> None: + if name == "options": + value = self._set_options(value) + + super().__setattr__(name, value) + return None + + def __copy__(self): + cls = self.__class__ + result = cls.__new__(cls) + result.__dict__.update(self.__dict__) + return result + + def __deepcopy__(self, memo): + del self.meta + cls = self.__class__ + result = cls.__new__(cls) + memo[id(self)] = result + for k, v in self.__dict__.items(): + setattr(result, k, deepcopy(v, memo)) + + self.meta = meta + result.meta = meta + return result @property def current_time(self): @@ -388,3 +423,12 @@ def from_yml(yml_file): options=control_dict, ) return control + + +class OptsDict(UserDict): + def __setitem__(self, key, value): + if key not in pws_control_options_avail: + msg = f"'{key}' is not an available control option" + raise NameError(msg) + super().__setitem__(key, value) + return None diff --git a/pywatershed/base/model.py b/pywatershed/base/model.py index 14096930..de0dc8fb 100644 --- a/pywatershed/base/model.py +++ b/pywatershed/base/model.py @@ -404,7 +404,7 @@ def __init__( parameters: Union[Parameters, dict[Parameters]] = None, find_input_files: bool = True, ): - self.control = control + self.control = deepcopy(control) self.parameters = parameters # This is for backwards compatibility From b58484df59fcddc57e8e69757596430b4666c924 Mon Sep 17 00:00:00 2001 From: James McCreight Date: Thu, 28 Sep 2023 22:28:01 -0600 Subject: [PATCH 04/14] lint --- pywatershed/base/control.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pywatershed/base/control.py b/pywatershed/base/control.py index 08c0d1bc..0153f54b 100644 --- a/pywatershed/base/control.py +++ b/pywatershed/base/control.py @@ -1,7 +1,7 @@ -from collections import UserDict -from copy import deepcopy import datetime import pathlib as pl +from collections import UserDict +from copy import deepcopy from warnings import warn import numpy as np From 554899cf161e4d7848db2288caeb39429cd3b93d Mon Sep 17 00:00:00 2001 From: James McCreight Date: Tue, 10 Oct 2023 14:10:07 -0600 Subject: [PATCH 05/14] Control .to_yaml and .to_dict methods --- autotest/test_control.py | 8 +++ autotest/test_model.py | 9 +-- examples/00_processes.ipynb | 15 +---- examples/01_multi-process_models.ipynb | 87 +++++++++++--------------- examples/02_prms_legacy_models.ipynb | 18 ++++++ pywatershed/base/control.py | 79 +++++++++++++++++++++-- pywatershed/base/model.py | 41 ++++++------ pywatershed/utils/path.py | 27 +++++--- pywatershed/utils/utils.py | 4 +- 9 files changed, 185 insertions(+), 103 deletions(-) diff --git a/autotest/test_control.py b/autotest/test_control.py index 851a22a3..49e1631e 100644 --- a/autotest/test_control.py +++ b/autotest/test_control.py @@ -193,3 +193,11 @@ def test_setitem_setattr(domain): # The value for options must be a dictionary with pytest.raises(ValueError): ctl.options = None + + +def test_yaml_roundtrip(domain, tmp_path): + ctl = Control.load_prms(domain["control_file"], warn_unused_options=False) + yml_file = tmp_path / "control.yaml" + ctl.to_yaml(yml_file) + ctl_2 = Control.from_yaml(yml_file) + np.testing.assert_equal(ctl.to_dict(), ctl_2.to_dict()) diff --git a/autotest/test_model.py b/autotest/test_model.py index 4312c89c..3a2ef04b 100644 --- a/autotest/test_model.py +++ b/autotest/test_model.py @@ -31,7 +31,7 @@ } -invoke_style = ("prms", "model_dict", "model_dict_from_yml") +invoke_style = ("prms", "model_dict", "model_dict_from_yaml") @pytest.fixture(scope="function") @@ -106,9 +106,9 @@ def model_args(domain, control, discretization, request): "parameters": None, } - elif invoke_style == "model_dict_from_yml": - yml_file = domain["dir"] / "nhm_model.yml" - model_dict = Model.model_dict_from_yml(yml_file) + elif invoke_style == "model_dict_from_yaml": + yaml_file = domain["dir"] / "nhm_model.yml" + model_dict = Model.model_dict_from_yaml(yaml_file) args = { "process_list_or_model_dict": model_dict, @@ -143,6 +143,7 @@ def test_model(domain, model_args, tmp_path): control = model_args["control"] control.options["input_dir"] = input_dir + control.options["netcdf_output_dir"] = tmp_path / "output" model = Model(**model_args) diff --git a/examples/00_processes.ipynb b/examples/00_processes.ipynb index 6ada3abb..58d17fc8 100644 --- a/examples/00_processes.ipynb +++ b/examples/00_processes.ipynb @@ -54,8 +54,7 @@ "editable": true, "slideshow": { "slide_type": "" - }, - "tags": [] + } }, "outputs": [], "source": [ @@ -545,22 +544,14 @@ } ], "metadata": { - "kernelspec": { - "display_name": "Python [conda env:pws2] *", - "language": "python", - "name": "conda-env-pws2-py" - }, "language_info": { "codemirror_mode": { - "name": "ipython", - "version": 3 + "name": "ipython" }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" + "nbconvert_exporter": "python" } }, "nbformat": 4, diff --git a/examples/01_multi-process_models.ipynb b/examples/01_multi-process_models.ipynb index b1cf7bfe..c3442c65 100644 --- a/examples/01_multi-process_models.ipynb +++ b/examples/01_multi-process_models.ipynb @@ -351,15 +351,8 @@ "source": [ "run_dir = pl.Path(nb_output_dir / \"nhm_yaml\")\n", "run_dir.mkdir(exist_ok=True)\n", - "control_dict = control.options | {\n", - " \"start_time\": str(control.start_time),\n", - " \"end_time\": str(control.end_time),\n", - " \"time_step\": str(control.time_step)[0:2],\n", - " \"time_step_units\": str(control.time_step)[3:4],\n", - " \"netcdf_output_dir\": run_dir,\n", - "}\n", - "\n", - "pprint(control_dict, sort_dicts=False)" + "control_yaml_file = run_dir / \"control.yml\"\n", + "control.to_yaml(control_yaml_file)" ] }, { @@ -389,11 +382,7 @@ " elif isinstance(val, pl.Path):\n", " the_dict[key] = str(val)\n", "\n", - " return the_dict\n", - "\n", - "\n", - "control_dict = dict_pl_to_str(control_dict)\n", - "pprint(control_dict, sort_dicts=False)" + " return the_dict" ] }, { @@ -411,7 +400,6 @@ "metadata": {}, "outputs": [], "source": [ - "control_yaml_file = run_dir / \"control.yml\"\n", "model_dict = {\n", " \"control\": control_yaml_file.resolve(),\n", " \"dis_hru\": domain_dir / \"parameters_dis_hru.nc\",\n", @@ -486,11 +474,8 @@ "outputs": [], "source": [ "model_dict_yaml_file = run_dir / \"model_dict.yml\"\n", - "# the control yaml file was given above and is in the model_dict\n", - "dump_dict = {control_yaml_file: control_dict, model_dict_yaml_file: model_dict}\n", - "for key, val in dump_dict.items():\n", - " with open(key, \"w\") as file:\n", - " documents = yaml.dump(val, file)" + "with open(model_dict_yaml_file, \"w\") as file:\n", + " _ = yaml.dump(model_dict, file)" ] }, { @@ -536,7 +521,7 @@ "metadata": {}, "outputs": [], "source": [ - "model_yml = pws.Model.from_yml(model_dict_yaml_file)\n", + "model_yml = pws.Model.from_yaml(model_dict_yaml_file)\n", "model_yml" ] }, @@ -698,8 +683,8 @@ "metadata": {}, "outputs": [], "source": [ - "control_dict_copy = deepcopy(control_dict)\n", - "model_dict_copy = deepcopy(model_dict)" + "run_dir = pl.Path(nb_output_dir / \"yml_less_output\").resolve()\n", + "run_dir.mkdir(exist_ok=True)" ] }, { @@ -709,12 +694,9 @@ "metadata": {}, "outputs": [], "source": [ - "run_dir = pl.Path(nb_output_dir / \"yml_less_output\").resolve()\n", - "run_dir.mkdir(exist_ok=True)\n", - "\n", - "control_dict_copy[\"netcdf_output_dir\"] = str(run_dir.resolve())\n", - "control_yaml_file = run_dir / \"control.yml\"\n", - "control_dict_copy[\"netcdf_output_var_names\"] = [\n", + "control_cp = deepcopy(control)\n", + "control_cp.options[\"netcdf_output_dir\"] = str(run_dir.resolve())\n", + "control_cp.options[\"netcdf_output_var_names\"] = [\n", " var\n", " for ll in [\n", " pws.PRMSGroundwater.get_variables(),\n", @@ -722,7 +704,10 @@ " ]\n", " for var in ll\n", "]\n", - "pprint(control_dict_copy, sort_dicts=False)" + "pprint(control_cp.to_dict(), sort_dicts=False)\n", + "\n", + "control_yaml_file = run_dir / \"control.yml\"\n", + "control_cp.to_yaml(control_yaml_file)" ] }, { @@ -740,6 +725,7 @@ "metadata": {}, "outputs": [], "source": [ + "model_dict_copy = deepcopy(model_dict)\n", "model_dict_copy[\"control\"] = str(control_yaml_file)\n", "model_dict_yaml_file = run_dir / \"model_dict.yml\"" ] @@ -759,13 +745,8 @@ "metadata": {}, "outputs": [], "source": [ - "dump_dict = {\n", - " control_yaml_file: control_dict_copy,\n", - " model_dict_yaml_file: model_dict_copy,\n", - "}\n", - "for key, val in dump_dict.items():\n", - " with open(key, \"w\") as file:\n", - " documents = yaml.dump(val, file)" + "with open(model_dict_yaml_file, \"w\") as file:\n", + " _ = yaml.dump(model_dict_copy, file)" ] }, { @@ -783,7 +764,7 @@ "metadata": {}, "outputs": [], "source": [ - "submodel = pws.Model.from_yml(model_dict_yaml_file)\n", + "submodel = pws.Model.from_yaml(model_dict_yaml_file)\n", "submodel" ] }, @@ -915,9 +896,9 @@ }, "outputs": [], "source": [ - "yml_output_dir = pl.Path(control_dict[\"netcdf_output_dir\"])\n", + "yaml_output_dir = pl.Path(control.options[\"netcdf_output_dir\"])\n", "for ii in submodel_file_inputs:\n", - " input_file = yml_output_dir / f\"{ii}.nc\"\n", + " input_file = yaml_output_dir / f\"{ii}.nc\"\n", " assert input_file.exists()\n", " print(input_file)" ] @@ -946,9 +927,12 @@ "run_dir.mkdir(exist_ok=True)\n", "\n", "# key that inputs exist from previous full-model run\n", - "control_dict[\"input_dir\"] = str(yml_output_dir.resolve())\n", - "control_dict[\"netcdf_output_dir\"] = str(run_dir.resolve())\n", - "control_yaml_file = run_dir / \"control.yml\"" + "control_cp = deepcopy(control)\n", + "control_cp.options[\"input_dir\"] = yaml_output_dir.resolve()\n", + "control_cp.options[\"netcdf_output_dir\"] = run_dir.resolve()\n", + "control_yaml_file = run_dir / \"control.yml\"\n", + "control_cp.to_yaml(control_yaml_file)\n", + "pprint(control.to_dict(), sort_dicts=False)" ] }, { @@ -973,7 +957,8 @@ "for kk in list(model_dict.keys()):\n", " if isinstance(model_dict[kk], dict) and kk not in keep_procs:\n", " del model_dict[kk]\n", - "pprint(control_dict, sort_dicts=False)\n", + "\n", + "\n", "pprint(model_dict, sort_dicts=False)" ] }, @@ -992,10 +977,8 @@ "metadata": {}, "outputs": [], "source": [ - "dump_dict = {control_yaml_file: control_dict, model_dict_yaml_file: model_dict}\n", - "for key, val in dump_dict.items():\n", - " with open(key, \"w\") as file:\n", - " documents = yaml.dump(val, file)" + "with open(model_dict_yaml_file, \"w\") as file:\n", + " _ = yaml.dump(model_dict, file)" ] }, { @@ -1013,7 +996,7 @@ "metadata": {}, "outputs": [], "source": [ - "submodel = pws.Model.from_yml(model_dict_yaml_file)\n", + "submodel = pws.Model.from_yaml(model_dict_yaml_file)\n", "submodel" ] }, @@ -1154,7 +1137,7 @@ "outputs": [], "source": [ "var = \"recharge\"\n", - "nhm_ds = xr.open_dataset(yml_output_dir / f\"{var}.nc\")\n", + "nhm_ds = xr.open_dataset(yaml_output_dir / f\"{var}.nc\")\n", "sub_ds = xr.open_dataset(run_dir / f\"{var}.nc\")" ] }, @@ -1187,7 +1170,7 @@ "outputs": [], "source": [ "for var in submodel_variables:\n", - " nhm_da = xr.open_dataset(yml_output_dir / f\"{var}.nc\")[var]\n", + " nhm_da = xr.open_dataset(yaml_output_dir / f\"{var}.nc\")[var]\n", " sub_da = xr.open_dataset(run_dir / f\"{var}.nc\")[var]\n", " xr.testing.assert_equal(nhm_da, sub_da)" ] @@ -1200,7 +1183,7 @@ "outputs": [], "source": [ "# var_name = \"dprst_seep_hru\"\n", - "nhm_da = xr.open_dataset(yml_output_dir / f\"{var_name}.nc\")[var_name]\n", + "nhm_da = xr.open_dataset(yaml_output_dir / f\"{var_name}.nc\")[var_name]\n", "sub_da = xr.open_dataset(run_dir / f\"{var_name}.nc\")[var_name]\n", "scat = xr.merge(\n", " [nhm_da.rename(f\"{var_name}_yaml\"), sub_da.rename(f\"{var_name}_subset\")]\n", diff --git a/examples/02_prms_legacy_models.ipynb b/examples/02_prms_legacy_models.ipynb index c45adca9..04be7707 100644 --- a/examples/02_prms_legacy_models.ipynb +++ b/examples/02_prms_legacy_models.ipynb @@ -250,6 +250,24 @@ "}" ] }, + { + "cell_type": "markdown", + "id": "ed82f8d1-8bfc-469e-a968-f86e029c7a5f", + "metadata": {}, + "source": [ + "We note that the `netcdf_output_var_names` in `control.options` is the combination of `nhruOutVar_names` and `nsegmentOutVar_names` from the PRMS-native `control.test` file. In the next section we'll customize this list of variables names, but here we list what we'll output with our current simulation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e28f2df1-df17-451f-87ed-5d8d1e9d8b7e", + "metadata": {}, + "outputs": [], + "source": [ + "control.options[\"netcdf_output_var_names\"]" + ] + }, { "cell_type": "markdown", "id": "0b46e9ca-e84b-40b3-bdc5-179fd6c85555", diff --git a/pywatershed/base/control.py b/pywatershed/base/control.py index 0153f54b..a0b8d618 100644 --- a/pywatershed/base/control.py +++ b/pywatershed/base/control.py @@ -2,14 +2,16 @@ import pathlib as pl from collections import UserDict from copy import deepcopy +from typing import Union from warnings import warn import numpy as np +import yaml from ..base import meta from ..constants import fileish from ..utils import ControlVariables -from ..utils.path import assert_exists, path_rel_to_yml +from ..utils.path import assert_exists, dict_pl_to_str, path_rel_to_yaml from ..utils.time_utils import ( datetime_dowy, datetime_doy, @@ -360,9 +362,74 @@ def edit_n_time_steps(self, new_n_time_steps: int): ) return + def __str__(self): + from pprint import pformat + + return pformat(self.to_dict()) + + def __repr__(self): + # TODO: this is not really an object representation + return self.__str__() + + def to_dict(self, deep_copy=True): + """Export a control object to a dictionary + + Args: + None. + """ + + control_dict = {} + + # I suppose this list could grow with time but these are + # the only non .option items in __dict__ required to reconstitute a + # Control instance + control_dict["start_time"] = str(self.start_time) + control_dict["end_time"] = str(self.end_time) + control_dict["time_step"] = str(self.time_step)[0:2] + control_dict["time_step_units"] = str(self.time_step)[3:4] + + if deep_copy: + control = deepcopy(self) + else: + control = self + + control_dict["options"] = {} + for kk, vv in control.options.items(): + control_dict["options"][kk] = control.options[kk] + + return control_dict + + def to_yaml(self, yaml_file: Union[pl.Path, str]): + """Export to a yaml file + + Note: This flattens .options to the top level of the yaml/dict + so that option keys are all at the same level as "start_time", + "end_time", "time_step", and "time_step_units". Using .from_yaml + will restore options to a nested dictionary. + + Args: + yaml_file: pl.Path or str to designate the output path/file. + """ + control_dict = dict_pl_to_str(self.to_dict()) + opts = control_dict["options"] + for kk, vv in opts.items(): + if kk in control_dict.keys(): + msg = "Control option keys collide with non-option keys" + raise ValueError(msg) + control_dict[kk] = vv + + del control_dict["options"] + + yaml_file = pl.Path(yaml_file) + with open(yaml_file, "w") as file: + _ = yaml.dump(control_dict, file) + + assert yaml_file.exists() + return None + @staticmethod - def from_yml(yml_file): - """Instantate a Control object from a yml file + def from_yaml(yaml_file): + """Instantate a Control object from a yaml file Required key:value pairs: start_time: ISO8601 string for numpy datetime64, @@ -395,7 +462,7 @@ def from_yml(yml_file): """ import yaml - with pl.Path(yml_file).open("r") as file_stream: + with pl.Path(yaml_file).open("r") as file_stream: control_dict = yaml.load(file_stream, Loader=yaml.Loader) start_time = np.datetime64(control_dict["start_time"]) @@ -411,8 +478,8 @@ def from_yml(yml_file): paths_to_convert = ["input_dir"] for path_name in paths_to_convert: if path_name in control_dict.keys(): - control_dict[path_name] = path_rel_to_yml( - control_dict[path_name], yml_file + control_dict[path_name] = path_rel_to_yaml( + control_dict[path_name], yaml_file ) assert_exists(control_dict[path_name]) diff --git a/pywatershed/base/model.py b/pywatershed/base/model.py index de0dc8fb..66771d23 100644 --- a/pywatershed/base/model.py +++ b/pywatershed/base/model.py @@ -10,7 +10,7 @@ from ..base.control import Control from ..constants import fileish from ..parameters import Parameters, PrmsParameters -from ..utils.path import path_rel_to_yml +from ..utils.path import path_rel_to_yaml # This is a convenience process_order_nhm = [ @@ -95,7 +95,7 @@ class Model: Only one control object can be included in the model dictionary. Though the key for the control can be arbitrary, the value is either an instance of class Control or, in the case of a yaml model dictionary, a control - yaml file to be loaded by Control.from_yml() (todo: link to this + yaml file to be loaded by Control.from_yaml() (todo: link to this staticmethod). - **discretizations** - Multiple discretizations may be supplied to the model dictionary, each with arbitrary names. These provide spatial @@ -328,7 +328,7 @@ class Model: with open(key, "w") as file: documents = yaml.dump(val, file) - model = pws.Model.from_yml(model_dict_file) + model = pws.Model.from_yaml(model_dict_file) model.run() control_file.unlink() model_dict_file.unlink() @@ -643,14 +643,14 @@ def _find_input_files(self) -> None: return @staticmethod - def model_dict_from_yml(yml_file: Union[str, pl.Path]) -> dict: + def model_dict_from_yaml(yaml_file: Union[str, pl.Path]) -> dict: """Generate a model dictionary from a yaml file. - Instead of Model.from_yml() it can be useful to get the model + Instead of Model.from_yaml() it can be useful to get the model dictionary before passing it to Model. Args: - yml_file: a yml file + yaml_file: a yaml file Returns: A model dictionary. @@ -659,19 +659,19 @@ def model_dict_from_yml(yml_file: Union[str, pl.Path]) -> dict: import pywatershed - with pl.Path(yml_file).open("r") as file_stream: + with pl.Path(yaml_file).open("r") as file_stream: model_dict = yaml.load(file_stream, Loader=yaml.Loader) for key, val in model_dict.items(): if isinstance(val, str): - val_pl = path_rel_to_yml(val, yml_file) - if val.endswith(".yml"): - model_dict[key] = Control.from_yml(val_pl) + val_pl = path_rel_to_yaml(val, yaml_file) + if (val.endswith(".yml")) or (val.endswith(".yaml")): + model_dict[key] = Control.from_yaml(val_pl) elif val.endswith(".nc"): model_dict[key] = Parameters.from_netcdf(val_pl) else: msg = ( - "Unsupported file extension for control (.yml)" + "Unsupported file extension for control (.yml/.yaml)" "and parameter (.nc) file paths in model yaml file" ) raise ValueError(msg) @@ -684,7 +684,7 @@ def model_dict_from_yml(yml_file: Union[str, pl.Path]) -> dict: cls = val["class"] val["class"] = getattr(pywatershed, cls) par = val["parameters"] - par_pl = path_rel_to_yml(par, yml_file) + par_pl = path_rel_to_yaml(par, yaml_file) val["parameters"] = Parameters.from_netcdf( par_pl, encoding=False ) @@ -697,13 +697,13 @@ def model_dict_from_yml(yml_file: Union[str, pl.Path]) -> dict: return model_dict @staticmethod - def from_yml(yml_file: Union[str, pl.Path]): + def from_yaml(yaml_file: Union[str, pl.Path]): """Instantiate a Model from a yaml file A yaml file that specifies a model_dict as the first argument of Model. Args: - yml_file: str or pathlib.Path + yaml_file: str or pathlib.Path Returns: An instance of Model. @@ -711,10 +711,11 @@ def from_yml(yml_file: Union[str, pl.Path]): Yaml file structure (strict order not required, but suggested): Control object: Any name can be used but the value must be a control - yaml file specified with the suffix ".yml". E.g "name: control.yml" + yaml file specified with the suffix ".yaml". E.g + "name: control.yaml" would appear in the passed yaml file. Only one control - specification is allowed in the yml_file. For details on the - requirements of the control.yml file see `Control.from_yml` + specification is allowed in the yaml_file. For details on the + requirements of the control.yaml file see `Control.from_yaml` Discretization objects: Any number of discretization objects can be supplied with arbitrary (though unique) names. The values supplied for each discretization must be a valid netcdf file with suffix @@ -734,11 +735,11 @@ def from_yml(yml_file: Union[str, pl.Path]): Model order list: a list supplying the order in which the processes are to be executed. - Note: To get a model_dict specfied by the yml_file, call - `model_dict_from_yml` instead. + Note: To get a model_dict specfied by the yaml_file, call + `model_dict_from_yaml` instead. """ - return Model(Model.model_dict_from_yml(yml_file)) + return Model(Model.model_dict_from_yaml(yaml_file)) def initialize_netcdf( self, diff --git a/pywatershed/utils/path.py b/pywatershed/utils/path.py index 50a28244..e8fa16ee 100644 --- a/pywatershed/utils/path.py +++ b/pywatershed/utils/path.py @@ -4,29 +4,40 @@ # A module for path/file utilities -def path_rel_to_yml( - file_in_yml: Union[pl.Path, str], yml: Union[pl.Path, str] +def path_rel_to_yaml( + file_in_yaml: Union[pl.Path, str], yaml: Union[pl.Path, str] ): """Resolve a path from a yaml file - Given a yaml file (yml) and a file specified within that yaml file, + Given a yaml file (yaml) and a file specified within that yaml file, if the file is an absolute path, return it as a pathlib.Path object, otherwise resolve the file path relative to the location of the yaml file. Args: - file_in_yml: a str or pathlib.Path from within a yaml file - yml: the path of the yaml file. + file_in_yaml: a str or pathlib.Path from within a yaml file + yaml: the path of the yaml file. Return: pathlib.Path object with resolved/absolute path """ - yml_pl = pl.Path(yml) - file_pl = pl.Path(file_in_yml) + yaml_pl = pl.Path(yaml) + file_pl = pl.Path(file_in_yaml) if not file_pl.is_absolute(): - file_pl = (yml_pl.parent / file_pl).resolve() + file_pl = (yaml_pl.parent / file_pl).resolve() return file_pl def assert_exists(path): assert pl.Path(path).exists() return + + +def dict_pl_to_str(the_dict): + """Convert dictionary items of pathlib.Path class to strings, recursively""" + for key, val in the_dict.items(): + if isinstance(val, dict): + the_dict[key] = dict_pl_to_str(val) + elif isinstance(val, pl.Path): + the_dict[key] = str(val) + + return the_dict diff --git a/pywatershed/utils/utils.py b/pywatershed/utils/utils.py index 2c10c35b..014f3e4c 100644 --- a/pywatershed/utils/utils.py +++ b/pywatershed/utils/utils.py @@ -1,9 +1,11 @@ import functools +import pathlib as pl from time import time def timer(func): - # Use as a decorator to print the execution time of the passed function + """Use as a decorator to print the execution time of the passed function""" + @functools.wraps(func) def wrap_func(*args, **kwargs): t1 = time() From 33a82f6322d2ffa8eea86c79c653066ac800536e Mon Sep 17 00:00:00 2001 From: James McCreight Date: Mon, 23 Oct 2023 14:49:56 -0600 Subject: [PATCH 06/14] remove dprst_flag from pws control options --- pywatershed/base/control.py | 11 ++++++----- pywatershed/hydrology/prms_runoff.py | 22 ++++++++++++---------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/pywatershed/base/control.py b/pywatershed/base/control.py index a0b8d618..fe1a2199 100644 --- a/pywatershed/base/control.py +++ b/pywatershed/base/control.py @@ -25,12 +25,11 @@ # It is important to maintain this list to issue warnings about what # variables are unrecognized/ignored in legacy and non-legacy control # files -# TODO: where should these be documented? -# TODO: identify which are PRMS-legacy? +# The following are duplicated in the Control docstring below and that +# docstring needs updated whenever any of these change. pws_control_options_avail = [ "budget_type", "calc_method", - "dprst_flag", # to remove? "restart", "input_dir", "load_n_time_batches", @@ -44,7 +43,6 @@ ] prms_legacy_options_avail = [ - "dprst_flag", "end_time", "init_vars_from_file", "initial_deltat", @@ -127,10 +125,13 @@ def __init__( def load( cls, control_file: fileish, + warn_unused_options: bool = True, ) -> "Control": msg = "Control.load will be deprecated for Control.load_prms" warn(msg, PendingDeprecationWarning) - return Control.load_prms(control_file) + return Control.load_prms( + control_file, warn_unused_options=warn_unused_options + ) @classmethod def load_prms( diff --git a/pywatershed/hydrology/prms_runoff.py b/pywatershed/hydrology/prms_runoff.py index 62565615..acd718ee 100644 --- a/pywatershed/hydrology/prms_runoff.py +++ b/pywatershed/hydrology/prms_runoff.py @@ -61,6 +61,7 @@ class PRMSRunoff(ConservativeProcess): canopy for each HRU intcp_changeover: Canopy throughfall caused by canopy density change from winter to summer + dprst_flag: bool=True by default, use depression storage or not? budget_type: one of [None, "warn", "error"] calc_method: one of ["fortran", "numba", "numpy"]. None defaults to "numba". @@ -85,10 +86,13 @@ def __init__( through_rain: adaptable, hru_intcpevap: adaptable, intcp_changeover: adaptable, + dprst_flag: bool = True, budget_type: Literal[None, "warn", "error"] = None, calc_method: Literal["numba", "numpy"] = None, verbose: bool = None, ) -> None: + self.dprst_flag = dprst_flag + super().__init__( control=control, discretization=discretization, @@ -102,6 +106,10 @@ def __init__( self._set_budget() self._init_calc_method() + + self.basin_init() + self.dprst_init() + return def _set_initial_conditions(self): @@ -119,13 +127,6 @@ def _set_initial_conditions(self): self.dprst_frac_clos = np.zeros(self.nhru, dtype=float) self.dprst_vol_thres_open = np.zeros(self.nhru, dtype=float) - # call the basin_init hack to calculate basin - # variables - self.basin_init() - - # call the depression storage init - self.dprst_init() - return @staticmethod @@ -246,7 +247,7 @@ def basin_init(self): probably go somewhere else at some point as I suspect other components may need similar information. """ - dprst_flag = ACTIVE + # dprst_flag = ACTIVE self.hru_perv = np.zeros(self.nhru, float) self.hru_frac_perv = np.zeros(self.nhru, float) self.hru_imperv = np.zeros(self.nhru, float) @@ -259,7 +260,7 @@ def basin_init(self): self.hru_imperv[i] = self.hru_percent_imperv[i] * harea perv_area = perv_area - self.hru_imperv[i] - if dprst_flag == ACTIVE: + if self.dprst_flag == ACTIVE: self.dprst_area_max[i] = self.dprst_frac[i] * harea if self.dprst_area_max[i] > 0.0: self.dprst_area_open_max[i] = ( @@ -511,6 +512,7 @@ def _calculate(self, time_length, vectorized=False): dprst_comp=self.dprst_comp, imperv_et=self.imperv_et, through_rain=self.through_rain, + dprst_flag=self.dprst_flag, ) self.infil_hru[:] = self.infil * self.hru_frac_perv @@ -598,6 +600,7 @@ def _calculate_numpy( dprst_comp, imperv_et, through_rain, + dprst_flag, ): dprst_chk = 0 infil[:] = 0.0 @@ -659,7 +662,6 @@ def _calculate_numpy( through_rain=through_rain[i], ) - dprst_flag = ACTIVE # cdl todo: hardwired frzen = OFF # cdl todo: hardwired if dprst_flag == ACTIVE: From feb7a8c0e97763a97fd07d197654502088cc19af Mon Sep 17 00:00:00 2001 From: James McCreight Date: Mon, 23 Oct 2023 15:35:36 -0600 Subject: [PATCH 07/14] document pws and prms-legacy options for Control.options --- pywatershed/base/control.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/pywatershed/base/control.py b/pywatershed/base/control.py index fe1a2199..e1e5515a 100644 --- a/pywatershed/base/control.py +++ b/pywatershed/base/control.py @@ -30,9 +30,9 @@ pws_control_options_avail = [ "budget_type", "calc_method", - "restart", + # "restart", "input_dir", - "load_n_time_batches", + # "load_n_time_batches", "netcdf_output_dir", "netcdf_output_var_names", "netcdf_output_separate_files", @@ -44,7 +44,7 @@ prms_legacy_options_avail = [ "end_time", - "init_vars_from_file", + # "init_vars_from_file", "initial_deltat", "nhruOutBaseFileName", "nhruOutVar_names", @@ -55,7 +55,7 @@ ] prms_to_pws_option_map = { - "init_vars_from_file": "restart", + # "init_vars_from_file": "restart", "initial_deltat": "time_step", "nhruOutBaseFileName": "netcdf_output_dir", "nhruOutVar_names": "netcdf_output_var_names", @@ -80,6 +80,33 @@ class Control(Accessor): time_step: the length fo the time step options: a dictionary of global Process options. + + Available pywatershed options: + * budget_type: one of [None, "warn", "error"] + * calc_method: one of ["numpy", "numba", "fortran"] + * input_dir: str or pathlib.path directory to search for input data + * netcdf_output_dir: str or pathlib.Path directory for output + * netcdf_output_var_names: a list of variable names to output + * netcdf_output_separate_files: bool if output is grouped by Process or if each variable is written to an individual file + * netcdf_budget_args: + * start_time: np.datetime64 + * end_time: np.datetime64 + * time_step_units: str containing single character code for np.timedelta64 + * verbosity: 0-10 + + Available PRMS legacy options: + Either used as-is or mapped to pywatershed options as indicated below. + + * start_time + * end_time + * initial_deltat: translates to "time_step" + * init_vars_from_file: translates to "restart" + * nhruOutBaseFileName: translates to "netcdf_output_dir" + * nhruOutVar_names: translates to a subset of "netcdf_output_var_names" + * nsegmentOutBaseFileName: translates to "netcdf_output_dir" + * nsegmentOutVar_names: translates to a subset of "netcdf_output_var_names" + * print_debug: translates to "verbosity" + """ def __init__( From 75aabd3116c0117862c0dd44c0fd52b357f1b33b Mon Sep 17 00:00:00 2001 From: James McCreight Date: Wed, 25 Oct 2023 10:31:02 -0600 Subject: [PATCH 08/14] remove unused control options; model can optionally save the used control object to file --- autotest/test_model.py | 12 ++++++++---- examples/01_multi-process_models.ipynb | 4 +--- pywatershed/base/model.py | 18 +++++++++++++++++- pywatershed/hydrology/prms_snow.py | 6 +++++- pywatershed/hydrology/prms_soilzone.py | 20 ++++++++++++++++---- pywatershed/utils/netcdf_utils.py | 2 +- test_data/drb_2yr/control.yml | 10 ++++------ test_data/hru_1/control.yml | 9 +++------ test_data/ucb_2yr/control.yml | 9 +++------ 9 files changed, 58 insertions(+), 32 deletions(-) diff --git a/autotest/test_model.py b/autotest/test_model.py index b071b33a..251fa40c 100644 --- a/autotest/test_model.py +++ b/autotest/test_model.py @@ -49,7 +49,6 @@ def control(domain): control.options["calc_method"] = "fortran" else: control.options["calc_method"] = "numba" - control.options["load_n_time_batches"] = 1 del control.options["netcdf_output_var_names"] return control @@ -150,13 +149,18 @@ def test_model(domain, model_args, tmp_path): control = model_args["control"] control.options["input_dir"] = input_dir - control.options["netcdf_output_dir"] = tmp_path / "output" + model_out_dir = tmp_path / "output" + control.options["netcdf_output_dir"] = model_out_dir if control.options["calc_method"] == "fortran": with pytest.warns(UserWarning): - model = Model(**model_args) + model = Model(**model_args, write_control=model_out_dir) else: - model = Model(**model_args) + model = Model(**model_args, write_control=model_out_dir) + + # check that control yaml file was written + control_yaml_file = sorted(model_out_dir.glob("*model_control.yaml")) + assert len(control_yaml_file) == 1 # Test passing of control calc_method option if fortran_avail: diff --git a/examples/01_multi-process_models.ipynb b/examples/01_multi-process_models.ipynb index c3442c65..f250c4f4 100644 --- a/examples/01_multi-process_models.ipynb +++ b/examples/01_multi-process_models.ipynb @@ -243,8 +243,6 @@ " \"input_dir\": domain_dir,\n", " \"budget_type\": None,\n", " \"netcdf_output_dir\": nb_output_dir / \"nhm_memory\",\n", - " \"restart\": 0,\n", - " \"dprst_flag\": True,\n", " },\n", ")\n", "model_order = [\"prms_\" + proc.__name__[4:].lower() for proc in nhm_processes]\n", @@ -704,7 +702,7 @@ " ]\n", " for var in ll\n", "]\n", - "pprint(control_cp.to_dict(), sort_dicts=False)\n", + "print(control_cp) # .to_dict(), sort_dicts=False)\n", "\n", "control_yaml_file = run_dir / \"control.yml\"\n", "control_cp.to_yaml(control_yaml_file)" diff --git a/pywatershed/base/model.py b/pywatershed/base/model.py index 66771d23..a23f255d 100644 --- a/pywatershed/base/model.py +++ b/pywatershed/base/model.py @@ -1,5 +1,6 @@ import pathlib as pl from copy import deepcopy +from datetime import datetime from pprint import pprint from typing import Union from warnings import warn @@ -43,6 +44,11 @@ class Model: find_input_files: Search/find input file on __init__ or delay until run or advance of the model. Delaying (False) allows ModelGraph of the specified model without the need for input files. + write_control: bool, str, or pl.Path a directory into which a copy of + the passed control is to be written, default is False. This is for + convenience when lost of in-memory manipulations may be made before + passing to the model. The output file name has the form + %Y-%m-%dT%H:%M:%S.model_control.yaml PRMS-legacy instantiation ----------------------------- @@ -403,11 +409,12 @@ def __init__( control: Control = None, parameters: Union[Parameters, dict[Parameters]] = None, find_input_files: bool = True, + write_control: Union[bool, str, pl.Path] = False, ): self.control = deepcopy(control) self.parameters = parameters - # This is for backwards compatibility + # This is for backwards compatibility: make a method? msg = "Inputs are inconsistent" if isinstance(process_list_or_model_dict, (list, tuple)): # take the old-school-style inputs and convert to new-school inputs @@ -460,6 +467,15 @@ def __init__( else: self._default_nc_out_dir = None + if write_control or isinstance(write_control, (pl.Path, str)): + if isinstance(write_control, bool): + write_control = pl.Path(".") + format_fn = "%Y-%m-%dT%H:%M:%S.model_control.yaml" + yaml_fn = write_control / datetime.now().strftime(format_fn) + if not yaml_fn.parent.exists(): + yaml_fn.parent.mkdir(parents=True) + self.control.to_yaml(yaml_fn) + return def _categorize_model_dict(self): diff --git a/pywatershed/hydrology/prms_snow.py b/pywatershed/hydrology/prms_snow.py index 76faa43d..3e797e45 100644 --- a/pywatershed/hydrology/prms_snow.py +++ b/pywatershed/hydrology/prms_snow.py @@ -319,7 +319,11 @@ def _set_initial_conditions(self): sd = int(self.ndeplval / 11) self.snarea_curve_2d = np.reshape(self.snarea_curve, (sd, 11)) - if self.control.options["restart"] in [0, 2, 3]: + if True: + # For now there is no restart capability. we'll use the following + # line when there is + # if self.control.options["restart"] in [0, 2, 3]: + # The super().__init__ already set_initial_conditions using its # set_initial_conditions # Below Im just following PRMS6, will reconcile later with the diff --git a/pywatershed/hydrology/prms_soilzone.py b/pywatershed/hydrology/prms_soilzone.py index a30aa24f..7193da29 100644 --- a/pywatershed/hydrology/prms_soilzone.py +++ b/pywatershed/hydrology/prms_soilzone.py @@ -249,7 +249,11 @@ def _set_initial_conditions(self): self._pref_flow_den[wh_not_land] = zero # variables - if self.control.options["restart"] in [0, 2, 5]: + if True: + # For now there is no restart capability. we'll use the following + # line when there is + # if self.control.options["restart"] in [0, 2, 5]: + # these are set in sm_climateflow self.soil_moist[:] = ( self.soil_moist_init_frac * self.soil_moist_max @@ -269,7 +273,11 @@ def _set_initial_conditions(self): # expectations. Move this parameter business to __init__ # ssres_stor - if self.control.options["restart"] in [0, 2, 5]: + if True: + # For now there is no restart capability. we'll use the following + # line when there is + # if self.control.options["restart"] in [0, 2, 5]: + self.ssres_stor = self.ssstor_init_frac * self._sat_threshold wh_inactive_or_lake = np.where( (self.hru_type == HruType.INACTIVE.value) @@ -341,7 +349,11 @@ def _set_initial_conditions(self): self._pref_flow_flag[wh_land_and_prf_den] = True # can this one be combined with the restart read logic above? - if self.control.options["restart"] in [0, 2, 5]: + if True: + # For now there is no restart capability. we'll use the following + # line when there is + # if self.control.options["restart"] in [0, 2, 5]: + wh_land_or_swale = np.where( (self.hru_type == HruType.LAND.value) | (self.hru_type == HruType.SWALE.value) @@ -487,7 +499,7 @@ def _calculate(self, simulation_time): cov_type=self.cov_type, current_time=self.control.current_time, dprst_evap_hru=self.dprst_evap_hru, - dprst_flag=self.control.options["dprst_flag"], + dprst_flag=True, # self.control.options["dprst_flag"], dprst_seep_hru=self.dprst_seep_hru, dunnian_flow=self.dunnian_flow, fastcoef_lin=self.fastcoef_lin, diff --git a/pywatershed/utils/netcdf_utils.py b/pywatershed/utils/netcdf_utils.py index 6ca6744e..93c79781 100644 --- a/pywatershed/utils/netcdf_utils.py +++ b/pywatershed/utils/netcdf_utils.py @@ -375,7 +375,7 @@ def __init__( clobber: bool = True, zlib: bool = True, complevel: int = 4, - chunk_sizes: dict = {"time": 30, "hruid": 0}, + chunk_sizes: dict = {"time": 1, "hruid": 0}, ): if isinstance(variables, dict): group_variables = [] diff --git a/test_data/drb_2yr/control.yml b/test_data/drb_2yr/control.yml index 082829b0..1cd891e3 100644 --- a/test_data/drb_2yr/control.yml +++ b/test_data/drb_2yr/control.yml @@ -14,12 +14,10 @@ verbosity: 0 budget_type: warn calc_method: numba -load_n_time_batches: 1 - -# still used by snow and soilzone, but should be removed -restart: 0 - -dprst_flag: True +# Coming options +# load_n_time_batches: 1 +# restart: 0 +# dprst_flag: True # candidates from PRMS style control files to HONOR or keep # modules listing: could be used to create model_dict diff --git a/test_data/hru_1/control.yml b/test_data/hru_1/control.yml index c4305fec..e332f4b1 100644 --- a/test_data/hru_1/control.yml +++ b/test_data/hru_1/control.yml @@ -14,9 +14,6 @@ verbosity: 0 budget_type: warn calc_method: numba -load_n_time_batches: 1 - -# still used by snow and soilzone, but should be removed -restart: 0 - -dprst_flag: True +# load_n_time_batches: 1 +# restart: 0 +# dprst_flag: True diff --git a/test_data/ucb_2yr/control.yml b/test_data/ucb_2yr/control.yml index ff5fae62..2f72aeff 100644 --- a/test_data/ucb_2yr/control.yml +++ b/test_data/ucb_2yr/control.yml @@ -14,9 +14,6 @@ verbosity: 0 budget_type: warn calc_method: numba -load_n_time_batches: 1 - -# still used by snow and soilzone, but should be removed -restart: 0 - -dprst_flag: True +# load_n_time_batches: 1 +# restart: 0 +# dprst_flag: True From 9df0c9503ca23dfe1ecf7e668d95e7d33bb86e5a Mon Sep 17 00:00:00 2001 From: James McCreight Date: Wed, 25 Oct 2023 13:40:47 -0600 Subject: [PATCH 09/14] clean up divide by zero in prms_to_mf6 test --- autotest/test_nhm_self_drive.py | 3 ++- autotest/test_prms_to_mf6.py | 21 +++++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/autotest/test_nhm_self_drive.py b/autotest/test_nhm_self_drive.py index b2e62c1d..112db1d1 100644 --- a/autotest/test_nhm_self_drive.py +++ b/autotest/test_nhm_self_drive.py @@ -73,13 +73,14 @@ def test_drive_indiv_process(domain, tmp_path): control.options["budget_type"] = "warn" control.options["calc_method"] = "numba" control.options["input_dir"] = nhm_output_dir + control.options["netcdf_output_dir"] = proc_model_output_dir proc_model = pws.Model( [proc], control=control, parameters=params, ) - proc_model.initialize_netcdf(output_dir=proc_model_output_dir) + proc_model.initialize_netcdf() proc_model.run(finalize=True) del proc_model, params, control diff --git a/autotest/test_prms_to_mf6.py b/autotest/test_prms_to_mf6.py index 0edc928b..015d5449 100644 --- a/autotest/test_prms_to_mf6.py +++ b/autotest/test_prms_to_mf6.py @@ -76,8 +76,6 @@ def test_mmr_to_mf6(domain, tmp_path, bc_binary_files, bc_flows_combine): .to("meter ** 3 / s") .magnitude ) - comp = abs(result - ans_tt) - assert ((comp < 1e-5) | ((comp / ans_tt) < 1e-5)).all() else: sim = flopy.mf6.MFSimulation.load( @@ -98,7 +96,22 @@ def test_mmr_to_mf6(domain, tmp_path, bc_binary_files, bc_flows_combine): .to("meter ** 3 / s") .magnitude ) - comp = abs(result - ans_tt) - assert ((comp < 1e-5) | ((comp / ans_tt) < 1e-5)).all() + + # << + # Compare + abs_diff = abs(result - ans_tt) + with np.errstate(divide="ignore", invalid="ignore"): + rel_diff = abs_diff / ans_tt + + abs_tol = 1.0e-5 + rel_tol = 1.0e-5 + + abs_close = abs_diff < abs_tol + rel_close = rel_diff < rel_tol + rel_close = np.where(np.isnan(rel_close), False, rel_close) + + close = abs_close | rel_close + + assert close.all() return From 633761ce6e819089e149bdb7c288a006187e0f3b Mon Sep 17 00:00:00 2001 From: James McCreight Date: Wed, 25 Oct 2023 13:42:24 -0600 Subject: [PATCH 10/14] options for soilzone and snow on parameter adjustment behavior: "warn", "error", "no" --- pywatershed/hydrology/prms_channel.py | 24 +++-- pywatershed/hydrology/prms_soilzone.py | 116 +++++++++++++++++-------- 2 files changed, 98 insertions(+), 42 deletions(-) diff --git a/pywatershed/hydrology/prms_channel.py b/pywatershed/hydrology/prms_channel.py index 76d0b734..d2581866 100644 --- a/pywatershed/hydrology/prms_channel.py +++ b/pywatershed/hydrology/prms_channel.py @@ -73,6 +73,12 @@ class PRMSChannel(ConservativeProcess): budget_type: one of [None, "warn", "error"] calc_method: one of ["fortran", "numba", "numpy"]. None defaults to "numba". + adjust_parameters: one of ["warn", "error", "no"]. Default is "warn", + the code edits the parameters and issues a warning. If "error" is + selected the the code issues warnings about all edited parameters + before raising the error to give you information. If "no" is + selected then no parameters are adjusted and there will be no + warnings or errors. verbose: Print extra information or not? """ @@ -86,6 +92,7 @@ def __init__( gwres_flow_vol: adaptable, budget_type: Literal[None, "warn", "error"] = None, calc_method: Literal["fortran", "numba", "numpy"] = None, + adjust_parameters: Literal["warn", "error", "no"] = "warn", verbose: bool = None, ) -> None: super().__init__( @@ -229,12 +236,19 @@ def _initialize_channel_data(self) -> None: # inputs in place during run # should also be done before computing velocity mask_too_flat = self.seg_slope < 1e-7 - if mask_too_flat.any(): - msg = "seg_slope < 1.0e-7, set to 1.0e-4" + if mask_too_flat.any() and self.adjust_parameters != "no": + msg = ( + "seg_slope < 1.0e-7, set to 1.0e-4 at indices:" + f"{np.where(mask_too_flat)[0]}" + ) warn(msg, UserWarning) - self.seg_slope = np.where( - self.seg_slope < 1e-7, 1.0e-4, self.seg_slope - ) # not in prms6 + if self.adjust_parameters == "error": + raise ValueError( + "seg_slope parameter values were edited and an error was " + "requested. See warnings for additional details." + ) + # not in prms6 + self.seg_slope = np.where(mask_too_flat, 1.0e-4, self.seg_slope) # initialize Kcoef to 24.0 for segments with zero velocities # this is different from PRMS, which relied on divide by zero resulting diff --git a/pywatershed/hydrology/prms_soilzone.py b/pywatershed/hydrology/prms_soilzone.py index 42e245dd..c2f32f90 100644 --- a/pywatershed/hydrology/prms_soilzone.py +++ b/pywatershed/hydrology/prms_soilzone.py @@ -50,6 +50,12 @@ class PRMSSoilzone(ConservativeProcess): budget_type: one of [None, "warn", "error"] calc_method: one of ["fortran", "numba", "numpy"]. None defaults to "numba". + adjust_parameters: one of ["warn", "error", "no"]. Default is "warn", + the code edits the parameters and issues a warning. If "error" is + selected the the code issues warnings about all edited parameters + before raising the error to give you information. If "no" is + selected then no parameters are adjusted and there will be no + warnings or errors. verbose: Print extra information or not? """ @@ -70,6 +76,7 @@ def __init__( snowcov_area: adaptable, budget_type: Literal[None, "warn", "error"] = None, calc_method: Literal["numba", "numpy"] = None, + adjust_parameters: Literal["warn", "error", "no"] = "warn", verbose: bool = None, ) -> "PRMSSoilzone": super().__init__( @@ -296,74 +303,109 @@ def _set_initial_conditions(self): # JLM: These are for "ACTIVE and non-lake" hrus.... # JLM check that. + throw_error = False mask = self.soil_moist_max < 1.0e-5 - if mask.any(): - msg = "soil_moist_max < 1.0e-5, set to 1.0e-5" + if mask.any() and self.adjust_parameters != "no": + if self.adjust_parameters == "error": + throw_error = True + msg = ( + "soil_moist_max < 1.0e-5, set to 1.0e-5 at indices: " + f"{np.where(mask)[0]}" + ) warn(msg, UserWarning) - self.soil_moist_max = np.where(mask, 1.0e-5, self.soil_moist_max) + self.soil_moist_max = np.where(mask, 1.0e-5, self.soil_moist_max) mask = self.soil_rechr_max < 1.0e-5 - if mask.any(): - msg = "soil_rechr_max < 1.0e-5, set to 1.0e-5" + if mask.any() and self.adjust_parameters != "no": + if self.adjust_parameters == "error": + throw_error = True + msg = ( + "soil_rechr_max < 1.0e-5, set to 1.0e-5 at indices: " + f"{np.where(mask)[0]}" + ) warn(msg, UserWarning) - self.soil_rechr_max = np.where(mask, 1.0e-5, self.soil_rechr_max) + self.soil_rechr_max = np.where(mask, 1.0e-5, self.soil_rechr_max) mask = self.soil_rechr_max > self.soil_moist_max - if mask.any(): + if mask.any() and self.adjust_parameters != "no": + if self.adjust_parameters == "error": + throw_error = True msg = ( "soil_rechr_max > soil_moist_max, " - "soil_rechr_max set to soil_moist_max" + "soil_rechr_max set to soil_moist_max at indices: " + f"{np.where(mask)[0]}" ) warn(msg, UserWarning) - self.soil_rechr_max = np.where( - mask, - self.soil_moist_max, - self.soil_rechr_max, - ) + self.soil_rechr_max = np.where( + mask, + self.soil_moist_max, + self.soil_rechr_max, + ) mask = self.soil_rechr > self.soil_rechr_max - if mask.any(): + if mask.any() and self.adjust_parameters != "no": + if self.adjust_parameters == "error": + throw_error = True msg = ( "soil_rechr_init > soil_rechr_max, " - "setting soil_rechr_init to soil_rechr_max" + "setting soil_rechr_init to soil_rechr_max at indices: " + f"{np.where(mask)[0]}" ) warn(msg, UserWarning) - self.soil_rechr = np.where( - mask, - self.soil_rechr_max, - self.soil_rechr, - ) + self.soil_rechr = np.where( + mask, + self.soil_rechr_max, + self.soil_rechr, + ) mask = self.soil_moist > self.soil_moist_max - if mask.any(): + if mask.any() and self.adjust_parameters != "no": + if self.adjust_parameters == "error": + throw_error = True msg = ( "soil_moist_init > soil_moist_max, " - "setting soil_moist to soil_moist max" + "setting soil_moist to soil_moist max at indices: " + f"{np.where(mask)[0]}" ) warn(msg, UserWarning) - self.soil_moist = np.where( - mask, - self.soil_moist_max, - self.soil_moist, - ) + self.soil_moist = np.where( + mask, + self.soil_moist_max, + self.soil_moist, + ) mask = self.soil_rechr > self.soil_moist - if mask.any(): - msg = "soil_rechr > soil_moist, setting soil_rechr to soil_moist" + if mask.any() and self.adjust_parameters != "no": + if self.adjust_parameters == "error": + throw_error = True + msg = ( + "soil_rechr > soil_moist, " + "setting soil_rechr to soil_moist at indices: " + f"{np.where(mask)[0]}" + ) warn(msg, UserWarning) - self.soil_rechr = np.where(mask, self.soil_moist, self.soil_rechr) + self.soil_rechr = np.where(mask, self.soil_moist, self.soil_rechr) mask = self.ssres_stor > self._sat_threshold - if mask.any(): + if mask.any() and self.adjust_parameters != "no": + if self.adjust_parameters == "error": + throw_error = True msg = ( "ssres_stor > _sat_threshold, " - "setting ssres_stor to _sat_threshold" + "setting ssres_stor to _sat_threshold at indices: " + f"{np.where(mask)[0]}" + ) + self.ssres_stor = np.where( + mask, + self._sat_threshold, + self.ssres_stor, + ) + + if throw_error: + raise ValueError( + "Some parameter values were edited and an error was requested." + " See warnings for additional details." ) - self.ssres_stor = np.where( - mask, - self._sat_threshold, - self.ssres_stor, - ) # < # need to set on swale_limit self? move to variables? From ef355fe024f4a945bcbcfdd7f47cfe5271a5735b Mon Sep 17 00:00:00 2001 From: James McCreight Date: Wed, 25 Oct 2023 16:11:58 -0600 Subject: [PATCH 11/14] fix attribute adjust_parameters to private --- pywatershed/hydrology/prms_channel.py | 4 ++-- pywatershed/hydrology/prms_soilzone.py | 28 +++++++++++++------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pywatershed/hydrology/prms_channel.py b/pywatershed/hydrology/prms_channel.py index d2581866..1ff0330c 100644 --- a/pywatershed/hydrology/prms_channel.py +++ b/pywatershed/hydrology/prms_channel.py @@ -236,13 +236,13 @@ def _initialize_channel_data(self) -> None: # inputs in place during run # should also be done before computing velocity mask_too_flat = self.seg_slope < 1e-7 - if mask_too_flat.any() and self.adjust_parameters != "no": + if mask_too_flat.any() and self._adjust_parameters != "no": msg = ( "seg_slope < 1.0e-7, set to 1.0e-4 at indices:" f"{np.where(mask_too_flat)[0]}" ) warn(msg, UserWarning) - if self.adjust_parameters == "error": + if self._adjust_parameters == "error": raise ValueError( "seg_slope parameter values were edited and an error was " "requested. See warnings for additional details." diff --git a/pywatershed/hydrology/prms_soilzone.py b/pywatershed/hydrology/prms_soilzone.py index c2f32f90..f0e4182a 100644 --- a/pywatershed/hydrology/prms_soilzone.py +++ b/pywatershed/hydrology/prms_soilzone.py @@ -305,8 +305,8 @@ def _set_initial_conditions(self): throw_error = False mask = self.soil_moist_max < 1.0e-5 - if mask.any() and self.adjust_parameters != "no": - if self.adjust_parameters == "error": + if mask.any() and self._adjust_parameters != "no": + if self._adjust_parameters == "error": throw_error = True msg = ( "soil_moist_max < 1.0e-5, set to 1.0e-5 at indices: " @@ -316,8 +316,8 @@ def _set_initial_conditions(self): self.soil_moist_max = np.where(mask, 1.0e-5, self.soil_moist_max) mask = self.soil_rechr_max < 1.0e-5 - if mask.any() and self.adjust_parameters != "no": - if self.adjust_parameters == "error": + if mask.any() and self._adjust_parameters != "no": + if self._adjust_parameters == "error": throw_error = True msg = ( "soil_rechr_max < 1.0e-5, set to 1.0e-5 at indices: " @@ -327,8 +327,8 @@ def _set_initial_conditions(self): self.soil_rechr_max = np.where(mask, 1.0e-5, self.soil_rechr_max) mask = self.soil_rechr_max > self.soil_moist_max - if mask.any() and self.adjust_parameters != "no": - if self.adjust_parameters == "error": + if mask.any() and self._adjust_parameters != "no": + if self._adjust_parameters == "error": throw_error = True msg = ( "soil_rechr_max > soil_moist_max, " @@ -343,8 +343,8 @@ def _set_initial_conditions(self): ) mask = self.soil_rechr > self.soil_rechr_max - if mask.any() and self.adjust_parameters != "no": - if self.adjust_parameters == "error": + if mask.any() and self._adjust_parameters != "no": + if self._adjust_parameters == "error": throw_error = True msg = ( "soil_rechr_init > soil_rechr_max, " @@ -359,8 +359,8 @@ def _set_initial_conditions(self): ) mask = self.soil_moist > self.soil_moist_max - if mask.any() and self.adjust_parameters != "no": - if self.adjust_parameters == "error": + if mask.any() and self._adjust_parameters != "no": + if self._adjust_parameters == "error": throw_error = True msg = ( "soil_moist_init > soil_moist_max, " @@ -375,8 +375,8 @@ def _set_initial_conditions(self): ) mask = self.soil_rechr > self.soil_moist - if mask.any() and self.adjust_parameters != "no": - if self.adjust_parameters == "error": + if mask.any() and self._adjust_parameters != "no": + if self._adjust_parameters == "error": throw_error = True msg = ( "soil_rechr > soil_moist, " @@ -387,8 +387,8 @@ def _set_initial_conditions(self): self.soil_rechr = np.where(mask, self.soil_moist, self.soil_rechr) mask = self.ssres_stor > self._sat_threshold - if mask.any() and self.adjust_parameters != "no": - if self.adjust_parameters == "error": + if mask.any() and self._adjust_parameters != "no": + if self._adjust_parameters == "error": throw_error = True msg = ( "ssres_stor > _sat_threshold, " From 0560155ca3e13c31a302d22b58f372790ab74a77 Mon Sep 17 00:00:00 2001 From: James McCreight Date: Wed, 25 Oct 2023 17:04:38 -0600 Subject: [PATCH 12/14] output file name formatting problem with windows --- pywatershed/base/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pywatershed/base/model.py b/pywatershed/base/model.py index a23f255d..77a7f018 100644 --- a/pywatershed/base/model.py +++ b/pywatershed/base/model.py @@ -470,7 +470,7 @@ def __init__( if write_control or isinstance(write_control, (pl.Path, str)): if isinstance(write_control, bool): write_control = pl.Path(".") - format_fn = "%Y-%m-%dT%H:%M:%S.model_control.yaml" + format_fn = "%Y-%m-%dT%H.%M.%S.model_control.yaml" yaml_fn = write_control / datetime.now().strftime(format_fn) if not yaml_fn.parent.exists(): yaml_fn.parent.mkdir(parents=True) From 50b9778ed6d8e48d7db1a67382a3deb07178282b Mon Sep 17 00:00:00 2001 From: James McCreight Date: Thu, 26 Oct 2023 16:02:33 -0600 Subject: [PATCH 13/14] catch notebooks up with changes --- examples/01_multi-process_models.ipynb | 87 ++++++++++++------- examples/02_prms_legacy_models.ipynb | 14 +-- examples/03_compare_pws_prms.ipynb | 39 ++++++--- pywatershed/atmosphere/prms_atmosphere.py | 11 ++- pywatershed/atmosphere/prms_solar_geometry.py | 11 ++- .../generate/prms_diagnostic_variables.py | 8 +- 6 files changed, 102 insertions(+), 68 deletions(-) diff --git a/examples/01_multi-process_models.ipynb b/examples/01_multi-process_models.ipynb index f250c4f4..f6d003f8 100644 --- a/examples/01_multi-process_models.ipynb +++ b/examples/01_multi-process_models.ipynb @@ -241,7 +241,7 @@ " time_step=np.timedelta64(24, \"h\"),\n", " options={\n", " \"input_dir\": domain_dir,\n", - " \"budget_type\": None,\n", + " \"budget_type\": \"error\",\n", " \"netcdf_output_dir\": nb_output_dir / \"nhm_memory\",\n", " },\n", ")\n", @@ -349,8 +349,10 @@ "source": [ "run_dir = pl.Path(nb_output_dir / \"nhm_yaml\")\n", "run_dir.mkdir(exist_ok=True)\n", - "control_yaml_file = run_dir / \"control.yml\"\n", - "control.to_yaml(control_yaml_file)" + "control_yaml_file = run_dir / \"control.yaml\"\n", + "control_yaml = deepcopy(control)\n", + "control_yaml.options[\"netcdf_output_dir\"] = nb_output_dir / \"nhm_yaml\"\n", + "control_yaml.to_yaml(control_yaml_file)" ] }, { @@ -366,6 +368,16 @@ "We add the option `netcdf_output_dir` to the control since we assume we wont be able to do so at run time. Note that this option and the `input_dir` option are `pathlib.Path` objects. These are not what we want to write to file. We want their string version. We could do `str()` on each one by hand, but it will be more handy to write a small, recursive function to do this on a supplied dictionary since this will be a recurring task with the model dictionary we will create after the control YAML file." ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "8044016c-7728-4149-9be5-f52b8f43975d", + "metadata": {}, + "outputs": [], + "source": [ + "control" + ] + }, { "cell_type": "code", "execution_count": null, @@ -471,7 +483,7 @@ "metadata": {}, "outputs": [], "source": [ - "model_dict_yaml_file = run_dir / \"model_dict.yml\"\n", + "model_dict_yaml_file = run_dir / \"model_dict.yaml\"\n", "with open(model_dict_yaml_file, \"w\") as file:\n", " _ = yaml.dump(model_dict, file)" ] @@ -491,7 +503,7 @@ "metadata": {}, "outputs": [], "source": [ - "! cat 01_multi-process_models/nhm_yaml/control.yml" + "! cat 01_multi-process_models/nhm_yaml/control.yaml" ] }, { @@ -501,7 +513,7 @@ "metadata": {}, "outputs": [], "source": [ - "! cat 01_multi-process_models/nhm_yaml/model_dict.yml" + "! cat 01_multi-process_models/nhm_yaml/model_dict.yaml" ] }, { @@ -519,8 +531,8 @@ "metadata": {}, "outputs": [], "source": [ - "model_yml = pws.Model.from_yaml(model_dict_yaml_file)\n", - "model_yml" + "model_yaml = pws.Model.from_yaml(model_dict_yaml_file)\n", + "model_yaml" ] }, { @@ -533,7 +545,7 @@ "show_params = not (platform == \"darwin\" and processor() == \"arm\")\n", "try:\n", " pws.analysis.ModelGraph(\n", - " model_yml,\n", + " model_yaml,\n", " hide_variables=False,\n", " process_colors=palette,\n", " show_params=show_params,\n", @@ -558,8 +570,8 @@ "outputs": [], "source": [ "%%time\n", - "model_yml.run()\n", - "model_yml.finalize()" + "model_yaml.run()\n", + "model_yaml.finalize()" ] }, { @@ -584,11 +596,23 @@ "outputs": [], "source": [ "mem_out_dir = nb_output_dir / \"nhm_memory\"\n", - "yml_out_dir = nb_output_dir / \"nhm_yaml\"\n", + "yaml_out_dir = nb_output_dir / \"nhm_yaml\"\n", "mem_files = sorted(mem_out_dir.glob(\"*.nc\"))\n", - "yml_files = sorted(yml_out_dir.glob(\"*.nc\"))\n", + "yaml_files = sorted(yaml_out_dir.glob(\"*.nc\"))\n", "# We get all the same output files\n", - "assert set([ff.name for ff in mem_files]) == set([ff.name for ff in yml_files])" + "assert set([ff.name for ff in mem_files]) == set(\n", + " [ff.name for ff in yaml_files]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "310a8d35-5ddb-4ef1-b4a1-65a823d4aa7c", + "metadata": {}, + "outputs": [], + "source": [ + "set([ff.name for ff in yaml_files]) - set([ff.name for ff in mem_files])" ] }, { @@ -604,6 +628,16 @@ "Now compare the values of all variables:" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "523633e0-bb6e-4fa7-8a7a-bd2de3d602b0", + "metadata": {}, + "outputs": [], + "source": [ + "nb_output_dir.resolve()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -611,9 +645,12 @@ "metadata": {}, "outputs": [], "source": [ - "for mf, yf in zip(mem_files, yml_files):\n", + "for mf, yf in zip(mem_files, yaml_files):\n", " var = mf.with_suffix(\"\").name\n", - " # print(var)\n", + "\n", + " if \"budget\" in var.lower():\n", + " continue\n", + "\n", " mda = xr.open_dataset(mf)[var]\n", " yda = xr.open_dataset(yf)[var]\n", " xr.testing.assert_equal(mda, yda)\n", @@ -681,7 +718,7 @@ "metadata": {}, "outputs": [], "source": [ - "run_dir = pl.Path(nb_output_dir / \"yml_less_output\").resolve()\n", + "run_dir = pl.Path(nb_output_dir / \"yaml_less_output\").resolve()\n", "run_dir.mkdir(exist_ok=True)" ] }, @@ -704,7 +741,7 @@ "]\n", "print(control_cp) # .to_dict(), sort_dicts=False)\n", "\n", - "control_yaml_file = run_dir / \"control.yml\"\n", + "control_yaml_file = run_dir / \"control.yaml\"\n", "control_cp.to_yaml(control_yaml_file)" ] }, @@ -725,7 +762,7 @@ "source": [ "model_dict_copy = deepcopy(model_dict)\n", "model_dict_copy[\"control\"] = str(control_yaml_file)\n", - "model_dict_yaml_file = run_dir / \"model_dict.yml\"" + "model_dict_yaml_file = run_dir / \"model_dict.yaml\"" ] }, { @@ -928,7 +965,7 @@ "control_cp = deepcopy(control)\n", "control_cp.options[\"input_dir\"] = yaml_output_dir.resolve()\n", "control_cp.options[\"netcdf_output_dir\"] = run_dir.resolve()\n", - "control_yaml_file = run_dir / \"control.yml\"\n", + "control_yaml_file = run_dir / \"control.yaml\"\n", "control_cp.to_yaml(control_yaml_file)\n", "pprint(control.to_dict(), sort_dicts=False)" ] @@ -949,7 +986,7 @@ "outputs": [], "source": [ "model_dict[\"control\"] = str(control_yaml_file)\n", - "model_dict_yaml_file = run_dir / \"model_dict.yml\"\n", + "model_dict_yaml_file = run_dir / \"model_dict.yaml\"\n", "keep_procs = [\"prms_soilzone\", \"prms_groundwater\", \"prms_channel\"]\n", "model_dict[\"model_order\"] = keep_procs\n", "for kk in list(model_dict.keys()):\n", @@ -1205,14 +1242,6 @@ "* Regan, R. S., Markstrom, S. L., Hay, L. E., Viger, R. J., Norton, P. A., Driscoll, J. M., & LaFontaine, J. H. (2018). Description of the national hydrologic model for use with the precipitation-runoff modeling system (prms) (No. 6-B9). US Geological Survey.\n", "* Regan, R.S., Markstrom, S.L., LaFontaine, J.H., 2022, PRMS version 5.2.1: Precipitation-Runoff Modeling System (PRMS): U.S. Geological Survey Software Release, 02/10/2022." ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ff4dbac1-046f-4354-94e6-2610269c2de4", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/examples/02_prms_legacy_models.ipynb b/examples/02_prms_legacy_models.ipynb index 566e915a..997971ad 100644 --- a/examples/02_prms_legacy_models.ipynb +++ b/examples/02_prms_legacy_models.ipynb @@ -69,11 +69,7 @@ { "cell_type": "markdown", "id": "39abd69f-13a3-4f23-9678-a57c0b1f848d", - "metadata": { - "jupyter": { - "source_hidden": true - } - }, + "metadata": {}, "source": [ "The domain directory is where we have all the required inputs to run this model (among others) and `nb_output_dir` is where this notebook will write its output. " ] @@ -493,14 +489,6 @@ "%%time\n", "submodel.run(finalize=True)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cfcc3aa8-cc8f-40b9-9a72-124567c2c8bf", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/examples/03_compare_pws_prms.ipynb b/examples/03_compare_pws_prms.ipynb index b0bb3a08..e3679a7a 100644 --- a/examples/03_compare_pws_prms.ipynb +++ b/examples/03_compare_pws_prms.ipynb @@ -96,7 +96,7 @@ "calc_method: str = \"numba\"\n", "budget_type: str = None\n", "\n", - "run_prms: bool = True ## always forced/overwrite\n", + "run_prms: bool = False ## always forced/overwrite\n", "\n", "run_pws: bool = True # run if the output does not exist on disk\n", "force_pws_run: bool = True # if it exists on disk, re-run it and overwrite?" @@ -194,7 +194,22 @@ " input_dir = domain_dir / \"output\"\n", " run_dir = nb_output_dir / f\"{domain_name}_subset_nhm\"\n", "\n", - " control = pws.Control.load(domain_dir / \"control.test\")\n", + " control = pws.Control.load_prms(\n", + " domain_dir / \"control.test\", warn_unused_options=False\n", + " )\n", + " output_var_names = control.options[\"netcdf_output_var_names\"] + [\n", + " \"soltab_horad_potsw\",\n", + " \"soltab_potsw\",\n", + " \"soltab_sunhrs\",\n", + " ]\n", + " control.options = control.options | {\n", + " \"input_dir\": input_dir,\n", + " \"budget_type\": budget_type,\n", + " \"calc_method\": calc_method,\n", + " \"netcdf_output_dir\": run_dir,\n", + " \"netcdf_output_var_names\": output_var_names,\n", + " }\n", + "\n", " params = pws.parameters.PrmsParameters.load(domain_dir / \"myparam.param\")\n", "\n", " if run_dir.exists():\n", @@ -205,13 +220,6 @@ "\n", " print(f\"PWS writing output to {run_dir}\")\n", "\n", - " control.options = control.options | {\n", - " \"input_dir\": input_dir,\n", - " \"budget_type\": budget_type,\n", - " \"calc_method\": calc_method,\n", - " \"netcdf_output_dir\": run_dir,\n", - " }\n", - "\n", " nhm = pws.Model(\n", " nhm_processes,\n", " control=control,\n", @@ -248,11 +256,16 @@ " var_meta = pws.meta.find_variables(var_name)[var_name]\n", " ylabel = f\"{fill(var_meta['desc'], 40)}\\n({var_meta['units']})\"\n", "\n", + " pws_file = run_dir / f\"{var_name}.nc\"\n", " prms_file = domain_dir / f\"output/{var_name}.nc\"\n", " if not prms_file.exists():\n", " return None\n", + " if not pws_file.exists():\n", + " print(f\"PWS file {pws_file} DNE, skipping.\")\n", + " return None\n", + "\n", " prms_var = xr.open_dataarray(prms_file)\n", - " pws_var = xr.open_dataarray(run_dir / f\"{var_name}.nc\")\n", + " pws_var = xr.open_dataarray(pws_file)\n", "\n", " if rmse_min is not None:\n", " if \"time\" in prms_var.dims:\n", @@ -313,7 +326,9 @@ " return None\n", " prms = xr.open_dataarray(prms_file, decode_timedelta=False)\n", " pws_file = run_dir / f\"{var_name}.nc\"\n", - " assert pws_file.exists()\n", + " if not pws_file.exists():\n", + " print(f\"PWS file '{prms_file}' DNE, skipping.\")\n", + " return None\n", " nhm_after = xr.open_dataarray(pws_file, decode_timedelta=False)\n", " if \"time\" in prms.dims:\n", " time_dim = \"time\"\n", @@ -549,7 +564,7 @@ "outputs": [], "source": [ "if pws.PRMSGroundwater in nhm_processes:\n", - " compare_var_timeseries(\"gwres_flow_vol\")" + " compare_var_timeseries(\"gwres_flow\")" ] }, { diff --git a/pywatershed/atmosphere/prms_atmosphere.py b/pywatershed/atmosphere/prms_atmosphere.py index bde02982..1173bc9e 100644 --- a/pywatershed/atmosphere/prms_atmosphere.py +++ b/pywatershed/atmosphere/prms_atmosphere.py @@ -138,12 +138,11 @@ def __init__( self._calculated = False self._netcdf_initialized = False - self.netcdf_output_dir = netcdf_output_dir - if self.netcdf_output_dir: + if self._netcdf_output_dir: self.initialize_netcdf( - output_dir=pl.Path(netcdf_output_dir), - separate_variables=netcdf_separate_files, - output_vars=netcdf_output_vars, + output_dir=pl.Path(self._netcdf_output_dir), + separate_variables=self._netcdf_separate_files, + output_vars=self._netcdf_output_vars, ) return @@ -835,7 +834,7 @@ def initialize_netcdf( output_vars: list = None, **kwargs, ): - if self._netcdf_initialized: + if self._netcdf_initialized and self.control.options["verbosity"] > 5: msg = ( f"{self.name} class previously initialized netcdf output " f"in {self._netcdf_output_dir}" diff --git a/pywatershed/atmosphere/prms_solar_geometry.py b/pywatershed/atmosphere/prms_solar_geometry.py index c045bfa7..67672a3c 100644 --- a/pywatershed/atmosphere/prms_solar_geometry.py +++ b/pywatershed/atmosphere/prms_solar_geometry.py @@ -97,12 +97,11 @@ def __init__( self._calculated = False self._netcdf_initialized = False - if self.netcdf_output_dir: - self._calculate_all_time() + if self._netcdf_output_dir: self.initialize_netcdf( - output_dir=pl.Path(netcdf_output_dir), - separate_variables=netcdf_separate_files, - output_vars=netcdf_output_vars, + output_dir=pl.Path(self._netcdf_output_dir), + separate_variables=self._netcdf_separate_files, + output_vars=self._netcdf_output_vars, ) return @@ -481,7 +480,7 @@ def initialize_netcdf( output_vars: list = None, **kwargs, ): - if self._netcdf_initialized: + if self._netcdf_initialized and self.control.options["verbosity"] > 5: msg = ( f"{self.name} class previously initialized netcdf output " f"in {self._netcdf_output_dir}" diff --git a/test_data/generate/prms_diagnostic_variables.py b/test_data/generate/prms_diagnostic_variables.py index 92cccfa7..60ee3416 100644 --- a/test_data/generate/prms_diagnostic_variables.py +++ b/test_data/generate/prms_diagnostic_variables.py @@ -74,7 +74,9 @@ def diagnose_simple_vars_to_nc( # the final value (-1) was wrapped to the zeroth position # get the initial conditions for the first time by initializing the # model This works based on the control file, so could handle restart. - control = pws.Control.load(domain_dir / "control.test") + control = pws.Control.load_prms( + domain_dir / "control.test", warn_unused_options=False + ) control.options = control.options | { "input_dir": domain_dir / "output", } @@ -251,7 +253,9 @@ def diagnose_final_vars_to_nc( data_file = data_dir / f"{vv}.nc" data[vv] = xr.open_dataarray(data_file) - control = pws.Control.load(domain_dir / "control.test") + control = pws.Control.load_prms( + domain_dir / "control.test", warn_unused_options=False + ) s_per_time = control.time_step_seconds params = pws.parameters.PrmsParameters.load( domain_dir / "myparam.param" From a938fa401144a766d6f37d035c144960d41443fd Mon Sep 17 00:00:00 2001 From: James McCreight Date: Fri, 27 Oct 2023 10:07:28 -0600 Subject: [PATCH 14/14] cleanup minor issues w notebooks --- examples/02_prms_legacy_models.ipynb | 63 ++++++++++++------- examples/03_compare_pws_prms.ipynb | 2 +- pywatershed/atmosphere/prms_atmosphere.py | 6 +- pywatershed/atmosphere/prms_solar_geometry.py | 6 +- 4 files changed, 53 insertions(+), 24 deletions(-) diff --git a/examples/02_prms_legacy_models.ipynb b/examples/02_prms_legacy_models.ipynb index 997971ad..313a8a29 100644 --- a/examples/02_prms_legacy_models.ipynb +++ b/examples/02_prms_legacy_models.ipynb @@ -144,7 +144,9 @@ " pws.PRMSSoilzone,\n", " pws.PRMSGroundwater,\n", " pws.PRMSChannel,\n", - "]" + "]\n", + "\n", + "submodel_processes = [pws.PRMSSoilzone, pws.PRMSGroundwater, pws.PRMSChannel]" ] }, { @@ -242,6 +244,8 @@ "source": [ "control.edit_end_time(np.datetime64(\"1979-07-01T00:00:00\"))\n", "run_dir = nb_output_dir / \"nhm\"\n", + "if run_dir.exists():\n", + " rmtree(run_dir)\n", "control.options = control.options | {\n", " \"input_dir\": cbh_nc_dir,\n", " \"budget_type\": \"warn\",\n", @@ -268,6 +272,26 @@ "control.options[\"netcdf_output_var_names\"]" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "d35aa7f6-0d3a-425f-8db9-1b846675cc5c", + "metadata": {}, + "outputs": [], + "source": [ + "submodel_input_names = pws.Model(\n", + " submodel_processes,\n", + " control=control,\n", + " parameters=params,\n", + " find_input_files=False,\n", + ")._file_input_names\n", + "print(submodel_input_names - set(control.options[\"netcdf_output_var_names\"]))\n", + "control.options[\"netcdf_output_var_names\"] = set(\n", + " control.options[\"netcdf_output_var_names\"] + list(submodel_input_names)\n", + ")\n", + "control.options[\"netcdf_output_var_names\"]" + ] + }, { "cell_type": "markdown", "id": "0b46e9ca-e84b-40b3-bdc5-179fd6c85555", @@ -393,21 +417,6 @@ "Now suppose you wanted to change parameters or model process representation in the PRMSSoilzone to better predict streamflow. As the model is 1-way coupled, you can simply run a submodel starting with PRMSSoilzone and running through PRMSChannel. We simply change our process list to get this \"submodel\" of the full NHM model above." ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "18f20704-36e4-407b-a7a8-1964563cf79a", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - } - }, - "outputs": [], - "source": [ - "submodel_processes = [pws.PRMSSoilzone, pws.PRMSGroundwater, pws.PRMSChannel]" - ] - }, { "cell_type": "markdown", "id": "88353277-76a3-423e-a913-b9575d9ebf02", @@ -428,16 +437,20 @@ }, "outputs": [], "source": [ - "with warnings.catch_warnings():\n", - " warnings.simplefilter(\"ignore\")\n", - " control = pws.Control.load(domain_dir / \"control.test\")\n", + "control = pws.Control.load_prms(\n", + " domain_dir / \"control.test\", warn_unused_options=False\n", + ")\n", + "\n", + "run_dir_submodel = nb_output_dir / \"nhm_submodel\"\n", + "if run_dir_submodel.exists():\n", + " rmtree(run_dir_submodel)\n", "\n", "control.edit_end_time(np.datetime64(\"1979-07-01T00:00:00\"))\n", "control.options = control.options | {\n", " \"input_dir\": run_dir,\n", " \"budget_type\": \"warn\",\n", " \"calc_method\": \"numba\",\n", - " \"netcdf_output_dir\": nb_output_dir / \"nhm_submodel\",\n", + " \"netcdf_output_dir\": run_dir_submodel,\n", "}\n", "\n", "\n", @@ -465,7 +478,7 @@ "outputs": [], "source": [ "submodel = pws.Model(\n", - " [pws.PRMSSoilzone, pws.PRMSGroundwater, pws.PRMSChannel],\n", + " submodel_processes,\n", " control=control,\n", " parameters=params,\n", ")" @@ -489,6 +502,14 @@ "%%time\n", "submodel.run(finalize=True)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e6b4c56-bd36-4d5e-9a6c-4537e036c2bd", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/examples/03_compare_pws_prms.ipynb b/examples/03_compare_pws_prms.ipynb index e3679a7a..d3378332 100644 --- a/examples/03_compare_pws_prms.ipynb +++ b/examples/03_compare_pws_prms.ipynb @@ -96,7 +96,7 @@ "calc_method: str = \"numba\"\n", "budget_type: str = None\n", "\n", - "run_prms: bool = False ## always forced/overwrite\n", + "run_prms: bool = True ## always forced/overwrite\n", "\n", "run_pws: bool = True # run if the output does not exist on disk\n", "force_pws_run: bool = True # if it exists on disk, re-run it and overwrite?" diff --git a/pywatershed/atmosphere/prms_atmosphere.py b/pywatershed/atmosphere/prms_atmosphere.py index 1173bc9e..e6e7be6a 100644 --- a/pywatershed/atmosphere/prms_atmosphere.py +++ b/pywatershed/atmosphere/prms_atmosphere.py @@ -834,7 +834,11 @@ def initialize_netcdf( output_vars: list = None, **kwargs, ): - if self._netcdf_initialized and self.control.options["verbosity"] > 5: + if ( + self._netcdf_initialized + and "verbosity" in self.control.options.keys() + and self.control.options["verbosity"] > 5 + ): msg = ( f"{self.name} class previously initialized netcdf output " f"in {self._netcdf_output_dir}" diff --git a/pywatershed/atmosphere/prms_solar_geometry.py b/pywatershed/atmosphere/prms_solar_geometry.py index 67672a3c..acf3f05e 100644 --- a/pywatershed/atmosphere/prms_solar_geometry.py +++ b/pywatershed/atmosphere/prms_solar_geometry.py @@ -480,7 +480,11 @@ def initialize_netcdf( output_vars: list = None, **kwargs, ): - if self._netcdf_initialized and self.control.options["verbosity"] > 5: + if ( + self._netcdf_initialized + and "verbosity" in self.control.options.keys() + and self.control.options["verbosity"] > 5 + ): msg = ( f"{self.name} class previously initialized netcdf output " f"in {self._netcdf_output_dir}"