diff --git a/dpgen2/constants.py b/dpgen2/constants.py index 6d5d0197..bcc322a9 100644 --- a/dpgen2/constants.py +++ b/dpgen2/constants.py @@ -1,8 +1,11 @@ train_index_pattern = "%04d" train_task_pattern = "task." + train_index_pattern train_script_name = "input.json" +train_cnn_script_name = "input_cnn.json" +train_qnn_script_name = "input_qnn.json" train_log_name = "train.log" model_name_pattern = "model.%03d.pb" +nvnmd_model_name_pattern = "nvnmd_model.%03d" pytorch_model_name_pattern = "model.%03d.pth" model_name_match_pattern = r"model\.[0-9]{3,}(\.pb|\.pth)" lmp_index_pattern = "%06d" diff --git a/dpgen2/entrypoint/args.py b/dpgen2/entrypoint/args.py index df11ff7f..7d24924c 100644 --- a/dpgen2/entrypoint/args.py +++ b/dpgen2/entrypoint/args.py @@ -127,12 +127,59 @@ def dp_train_args(): ] +def nvnmd_train_args(): + doc_numb_models = "Number of models trained for evaluating the model deviation" + doc_config = "Configuration of training" + doc_template_script = "File names of the template training script. It can be a `List[str]`, the length of which is the same as `numb_models`. Each template script in the list is used to train a model. Can be a `str`, the models share the same template training script. " + doc_init_models_paths = "the paths to initial models" + doc_init_models_uri = "The URI of initial models" + doc_optional_files = "Optional files for training" + + return [ + Argument( + "config", + dict, + RunDPTrain.training_args(), + optional=True, + default=RunDPTrain.normalize_config({}), + doc=doc_config, + ), + Argument("numb_models", int, optional=True, default=4, doc=doc_numb_models), + Argument( + "template_script", [List[str], str], optional=False, doc=doc_template_script + ), + Argument( + "init_models_paths", + List[str], + optional=True, + default=None, + doc=doc_init_models_paths, + alias=["training_iter0_model_path"], + ), + Argument( + "init_models_uri", + str, + optional=True, + default=None, + doc=doc_init_models_uri, + ), + Argument( + "optional_files", + list, + optional=True, + default=None, + doc=doc_optional_files, + ), + ] + + def variant_train(): doc = "the type of the training" return Variant( "type", [ Argument("dp", dict, dp_train_args()), + Argument("dp-nvnmd", dict, nvnmd_train_args()), Argument("dp-dist", dict, dp_dist_train_args()), ], doc=doc, @@ -454,6 +501,7 @@ def variant_explore(): "type", [ Argument("lmp", dict, lmp_args(), doc=doc_lmp), + Argument("lmp-nvnmd", dict, lmp_args(), doc=doc_lmp), Argument("calypso", dict, caly_args(), doc=doc_calypso), Argument("calypso:default", dict, caly_args(), doc=doc_calypso), Argument("calypso:merge", dict, caly_args(), doc=doc_calypso), diff --git a/dpgen2/entrypoint/submit.py b/dpgen2/entrypoint/submit.py index 203478dc..a39d6cf8 100644 --- a/dpgen2/entrypoint/submit.py +++ b/dpgen2/entrypoint/submit.py @@ -111,6 +111,8 @@ RunDPTrain, RunLmp, RunLmpHDF5, + RunNvNMD, + RunNvNMDTrain, RunRelax, RunRelaxHDF5, SelectConfs, @@ -182,6 +184,17 @@ def make_concurrent_learning_op( valid_data=valid_data, optional_files=train_optional_files, ) + elif train_style == "dp-nvnmd": + prep_run_train_op = PrepRunDPTrain( + "prep-run-nvnmd-train", + PrepDPTrain, + RunNvNMDTrain, + prep_config=prep_train_config, + run_config=run_train_config, + upload_python_packages=upload_python_packages, + valid_data=valid_data, + optional_files=train_optional_files, + ) else: raise RuntimeError(f"unknown train_style {train_style}") if explore_style == "lmp": @@ -193,6 +206,15 @@ def make_concurrent_learning_op( run_config=run_explore_config, upload_python_packages=upload_python_packages, ) + elif "lmp-nvnmd" in explore_style: + prep_run_explore_op = PrepRunLmp( + "prep-run-nvnmd", + PrepLmp, + RunNvNMD, + prep_config=prep_explore_config, + run_config=run_explore_config, + upload_python_packages=upload_python_packages, + ) elif "calypso" in explore_style: expl_mode = explore_style.split(":")[-1] if ":" in explore_style else "default" if expl_mode == "merge": @@ -286,7 +308,7 @@ def make_naive_exploration_scheduler( # use npt task group explore_style = config["explore"]["type"] - if explore_style == "lmp": + if explore_style in ("lmp", "lmp-nvnmd"): return make_lmp_naive_exploration_scheduler(config) elif "calypso" in explore_style or explore_style == "diffcsp": return make_naive_exploration_scheduler_without_conf(config, explore_style) @@ -374,6 +396,7 @@ def make_lmp_naive_exploration_scheduler(config): output_nopbc = config["explore"]["output_nopbc"] conf_filters = get_conf_filters(config["explore"]["filters"]) use_ele_temp = config["inputs"]["use_ele_temp"] + config["explore"]["type"] scheduler = ExplorationScheduler() # report conv_style = convergence.pop("type") @@ -506,6 +529,16 @@ def workflow_concurrent_learning( else None ) config["train"]["numb_models"] = 1 + + elif train_style == "dp-nvnmd": + init_models_paths = config["train"].get("init_models_paths", None) + numb_models = config["train"]["numb_models"] + if init_models_paths is not None and len(init_models_paths) != numb_models: + raise RuntimeError( + f"{len(init_models_paths)} init models provided, which does " + "not match numb_models={numb_models}" + ) + else: raise RuntimeError(f"unknown params, train_style: {train_style}") diff --git a/dpgen2/exploration/render/traj_render_lammps.py b/dpgen2/exploration/render/traj_render_lammps.py index 00b6a3de..2f4e56d9 100644 --- a/dpgen2/exploration/render/traj_render_lammps.py +++ b/dpgen2/exploration/render/traj_render_lammps.py @@ -109,6 +109,10 @@ def get_confs( conf_filters: Optional["ConfFilters"] = None, optional_outputs: Optional[List[Path]] = None, ) -> dpdata.MultiSystems: + from ase.io import ( # type: ignore + read, + ) + ntraj = len(trajs) ele_temp = None if optional_outputs: @@ -123,12 +127,16 @@ def get_confs( traj = StringIO(trajs[ii].get_data()) # type: ignore else: traj = trajs[ii] - ss = dpdata.System(traj, fmt=traj_fmt, type_map=type_map) - ss.nopbc = self.nopbc - if ele_temp: - self.set_ele_temp(ss, ele_temp[ii]) - ss = ss.sub_system(id_selected[ii]) - ms.append(ss) + # ss = dpdata.System(traj, fmt=traj_fmt, type_map=type_map) + ss = read( + str(traj), format="lammps-dump-text", index=":", specorder=type_map + ) + for jj in id_selected[ii]: + s = dpdata.System(ss[jj], fmt="ase/structure", type_map=type_map) + s.nopbc = self.nopbc + if ele_temp: + self.set_ele_temp(s, ele_temp[ii]) + ms.append(s) if conf_filters is not None: ms = conf_filters.check(ms) return ms diff --git a/dpgen2/exploration/task/lmp/lmp_input.py b/dpgen2/exploration/task/lmp/lmp_input.py index c2a22b60..969ac5aa 100644 --- a/dpgen2/exploration/task/lmp/lmp_input.py +++ b/dpgen2/exploration/task/lmp/lmp_input.py @@ -50,6 +50,7 @@ def make_lmp_input( nopbc: bool = False, max_seed: int = 1000000, deepmd_version="2.0", + nvnmd_version=None, trj_seperate_files=True, pimd_bead: Optional[str] = None, ): @@ -69,9 +70,9 @@ def make_lmp_input( ret += "variable THERMO_FREQ equal %d\n" % trj_freq ret += "variable DUMP_FREQ equal %d\n" % trj_freq ret += "variable TEMP equal %f\n" % temp - if ele_temp_f is not None: + if ele_temp_f is not None and nvnmd_version is None: ret += "variable ELE_TEMP equal %f\n" % ele_temp_f - if ele_temp_a is not None: + if ele_temp_a is not None and nvnmd_version is None: ret += "variable ELE_TEMP equal %f\n" % ele_temp_a if pres is not None: ret += "variable PRES equal %f\n" % pres @@ -106,12 +107,14 @@ def make_lmp_input( if pimd_bead is not None else lmp_model_devi_name ) - if Version(deepmd_version) < Version("1"): + if Version(deepmd_version) < Version("1") and nvnmd_version is None: # 0.x ret += "pair_style deepmd %s ${THERMO_FREQ} %s\n" % ( graph_list, model_devi_file_name, ) + elif nvnmd_version is not None: + ret += "pair_style nvnmd %s\n" % ("model.pb") else: # 1.x keywords = "" @@ -135,17 +138,28 @@ def make_lmp_input( ret += "thermo_style custom step temp pe ke etotal press vol lx ly lz xy xz yz\n" ret += "thermo ${THERMO_FREQ}\n" if trj_seperate_files: - ret += "dump 1 all custom ${DUMP_FREQ} traj/*.lammpstrj id type x y z fx fy fz\n" + if nvnmd_version is None: + ret += "dump 1 all custom ${DUMP_FREQ} traj/*.lammpstrj id type x y z fx fy fz\n" + else: + ret += "dump 1 all custom ${DUMP_FREQ} ${rerun}_traj/*.lammpstrj id type x y z fx fy fz\n" else: lmp_traj_file_name = ( lmp_pimd_traj_name % pimd_bead if pimd_bead is not None else lmp_traj_name ) - ret += ( - "dump 1 all custom ${DUMP_FREQ} %s id type x y z fx fy fz\n" - % lmp_traj_file_name - ) + if nvnmd_version is None: + ret += ( + "dump 1 all custom ${DUMP_FREQ} %s id type x y z fx fy fz\n" + % lmp_traj_file_name + ) + else: + ret += ( + "dump 1 all custom ${DUMP_FREQ} ${rerun}_%s id type x y z fx fy fz\n" + % lmp_traj_file_name + ) ret += "restart 10000 dpgen.restart\n" ret += "\n" + if nvnmd_version is not None: + ret += 'if "${rerun} > 0" then "jump SELF rerun"\n' if pka_e is None: ret += 'if "${restart} == 0" then "velocity all create ${TEMP} %d"' % ( random.randrange(max_seed - 1) + 1 @@ -193,4 +207,12 @@ def make_lmp_input( ret += "\n" ret += "timestep %f\n" % dt ret += "run ${NSTEPS} upto\n" + if nvnmd_version is not None: + ret += "jump SELF end\n" + ret += "label rerun\n" + if trj_seperate_files: + ret += "rerun 0_traj/*.lammpstrj dump x y z fx fy fz add yes\n" + else: + ret += "rerun 0_%s dump x y z fx fy fz add yes\n" % lmp_traj_name + ret += "label end\n" return ret diff --git a/dpgen2/exploration/task/lmp_template_task_group.py b/dpgen2/exploration/task/lmp_template_task_group.py index 1a44cb8e..03eb9a66 100644 --- a/dpgen2/exploration/task/lmp_template_task_group.py +++ b/dpgen2/exploration/task/lmp_template_task_group.py @@ -47,12 +47,14 @@ def set_lmp( revisions: dict = {}, traj_freq: int = 10, extra_pair_style_args: str = "", + nvnmd_version: Optional[str] = None, pimd_bead: Optional[str] = None, ) -> None: self.lmp_template = Path(lmp_template_fname).read_text().split("\n") self.revisions = revisions self.traj_freq = traj_freq self.extra_pair_style_args = extra_pair_style_args + self.nvnmd_version = nvnmd_version self.pimd_bead = pimd_bead self.lmp_set = True self.model_list = sorted([model_name_pattern % ii for ii in range(numb_models)]) @@ -62,10 +64,16 @@ def set_lmp( self.traj_freq, self.extra_pair_style_args, self.pimd_bead, + nvnmd_version=self.nvnmd_version, ) self.lmp_template = revise_lmp_input_dump( - self.lmp_template, self.traj_freq, self.pimd_bead + self.lmp_template, + self.traj_freq, + self.pimd_bead, + nvnmd_version=self.nvnmd_version, ) + if nvnmd_version is not None: + self.lmp_template = revise_lmp_input_rerun(self.lmp_template) if plm_template_fname is not None: self.plm_template = Path(plm_template_fname).read_text().split("\n") self.plm_set = True @@ -158,8 +166,8 @@ def revise_lmp_input_model( extra_pair_style_args="", pimd_bead=None, deepmd_version="1", + nvnmd_version=None, ): - idx = find_only_one_key(lmp_lines, ["pair_style", "deepmd"]) if extra_pair_style_args: extra_pair_style_args = " " + extra_pair_style_args graph_list = " ".join(task_model_list) @@ -168,23 +176,39 @@ def revise_lmp_input_model( if pimd_bead is not None else lmp_model_devi_name ) - lmp_lines[idx] = "pair_style deepmd %s out_freq %d out_file %s%s" % ( - graph_list, - trj_freq, - model_devi_file_name, - extra_pair_style_args, - ) + if nvnmd_version is None: + idx = find_only_one_key(lmp_lines, ["pair_style", "deepmd"]) + lmp_lines[idx] = "pair_style deepmd %s out_freq %d out_file %s%s" % ( + graph_list, + trj_freq, + model_devi_file_name, + extra_pair_style_args, + ) + else: + idx = find_only_one_key(lmp_lines, ["pair_style", "nvnmd"]) + lmp_lines[idx] = "pair_style nvnmd %s %s" % ( + "model.pb", + extra_pair_style_args, + ) + return lmp_lines -def revise_lmp_input_dump(lmp_lines, trj_freq, pimd_bead=None): +def revise_lmp_input_dump(lmp_lines, trj_freq, pimd_bead=None, nvnmd_version=None): idx = find_only_one_key(lmp_lines, ["dump", "dpgen_dump"]) lmp_traj_file_name = ( lmp_pimd_traj_name % pimd_bead if pimd_bead is not None else lmp_traj_name ) - lmp_lines[ - idx - ] = f"dump dpgen_dump all custom {trj_freq} {lmp_traj_file_name} id type x y z" + if nvnmd_version is None: + lmp_lines[ + idx + ] = f"dump dpgen_dump all custom {trj_freq} {lmp_traj_file_name} id type x y z" + else: + lmp_lines[idx] = ( + "dump dpgen_dump all custom %s ${rerun}_%s id type x y z fx fy fz" + % (trj_freq, lmp_traj_file_name) + ) + lmp_lines.insert(idx + 1, 'if "${rerun} > 0" then "jump SELF rerun"') return lmp_lines @@ -197,6 +221,14 @@ def revise_lmp_input_plm(lmp_lines, in_plm, out_plm="output.plumed"): return lmp_lines +def revise_lmp_input_rerun(lmp_lines): + lmp_lines.append("jump SELF end") + lmp_lines.append("label rerun") + lmp_lines.append(f"rerun 0_{lmp_traj_name} dump x y z fx fy fz add yes") + lmp_lines.append("label end") + return lmp_lines + + def revise_by_keys(lmp_lines, keys, values): for kk, vv in zip(keys, values): # type: ignore for ii in range(len(lmp_lines)): diff --git a/dpgen2/exploration/task/make_task_group_from_config.py b/dpgen2/exploration/task/make_task_group_from_config.py index 3b793c58..b8113e21 100644 --- a/dpgen2/exploration/task/make_task_group_from_config.py +++ b/dpgen2/exploration/task/make_task_group_from_config.py @@ -297,12 +297,25 @@ def variant_task_group(): Argument( "lmp-md", dict, npt_task_group_args(), alias=["lmp-npt"], doc=doc_lmp_md ), + Argument( + "lmp-nvnmd", + dict, + npt_task_group_args(), + alias=["lmp-nvnmd-npt"], + doc=doc_lmp_md, + ), Argument( "lmp-template", dict, lmp_template_task_group_args(), doc=doc_lmp_template, ), + Argument( + "lmp-nvnmd-template", + dict, + lmp_template_task_group_args(), + doc=doc_lmp_template, + ), Argument( "customized-lmp-template", dict, @@ -618,6 +631,7 @@ def make_lmp_task_group_from_config( config["conf_idx"] = [] if "conf_idx" not in config else None config = lmp_normalize(config) config = config_strip_confidx(config) + if config["type"] == "lmp-md": tgroup = NPTTaskGroup() config.pop("type") @@ -626,6 +640,15 @@ def make_lmp_task_group_from_config( mass_map, **config, ) + elif config["type"] == "lmp-nvnmd": + tgroup = NPTTaskGroup() + config.pop("type") + config["nvnmd_version"] = "0.0" + tgroup.set_md( + numb_models, + mass_map, + **config, + ) elif config["type"] == "lmp-template": tgroup = LmpTemplateTaskGroup() config.pop("type") @@ -635,6 +658,16 @@ def make_lmp_task_group_from_config( lmp_template, **config, ) + elif config["type"] == "lmp-nvnmd-template": + tgroup = LmpTemplateTaskGroup() + config.pop("type") + config["nvnmd_version"] = "0.0" + lmp_template = config.pop("lmp_template_fname") + tgroup.set_lmp( + numb_models, + lmp_template, + **config, + ) elif config["type"] == "customized-lmp-template": tgroup = CustomizedLmpTemplateTaskGroup() config.pop("type") diff --git a/dpgen2/exploration/task/npt_task_group.py b/dpgen2/exploration/task/npt_task_group.py index 27c1e001..c66d985c 100644 --- a/dpgen2/exploration/task/npt_task_group.py +++ b/dpgen2/exploration/task/npt_task_group.py @@ -49,6 +49,7 @@ def set_md( relative_v_epsilon: Optional[float] = None, ele_temp_f: Optional[float] = None, ele_temp_a: Optional[float] = None, + nvnmd_version: Optional[str] = None, pimd_bead: Optional[str] = None, ): """ @@ -73,6 +74,7 @@ def set_md( self.ele_temp_f = ele_temp_f self.ele_temp_a = ele_temp_a self.md_set = True + self.nvnmd_version = nvnmd_version self.pimd_bead = pimd_bead def make_task( @@ -132,6 +134,7 @@ def _make_lmp_task( self.ele_temp_f, self.ele_temp_a, self.no_pbc, + nvnmd_version=self.nvnmd_version, trj_seperate_files=False, pimd_bead=self.pimd_bead, ), diff --git a/dpgen2/op/__init__.py b/dpgen2/op/__init__.py index f4fec3a2..fa1c020c 100644 --- a/dpgen2/op/__init__.py +++ b/dpgen2/op/__init__.py @@ -38,6 +38,12 @@ RunLmp, RunLmpHDF5, ) +from .run_nvnmd import ( + RunNvNMD, +) +from .run_nvnmd_train import ( + RunNvNMDTrain, +) from .run_relax import ( RunRelax, RunRelaxHDF5, diff --git a/dpgen2/op/prep_dp_train.py b/dpgen2/op/prep_dp_train.py index 20fe58c2..10bd0674 100644 --- a/dpgen2/op/prep_dp_train.py +++ b/dpgen2/op/prep_dp_train.py @@ -119,15 +119,20 @@ def _script_rand_seed( input_dict, ): jtmp = input_dict.copy() - if "model_dict" in jtmp["model"]: - for d in jtmp["model"]["model_dict"].values(): - if isinstance(d["descriptor"], str): - self._set_desc_seed(jtmp["model"]["shared_dict"][d["descriptor"]]) - d["fitting_net"]["seed"] = random.randrange(sys.maxsize) % (2**32) - else: - self._set_desc_seed(jtmp["model"]["descriptor"]) - jtmp["model"]["fitting_net"]["seed"] = random.randrange(sys.maxsize) % ( - 2**32 - ) + if "model" in jtmp: + if "model_dict" in jtmp["model"]: + for d in jtmp["model"]["model_dict"].values(): + if isinstance(d["descriptor"], str): + self._set_desc_seed( + jtmp["model"]["shared_dict"][d["descriptor"]] + ) + d["fitting_net"]["seed"] = random.randrange(sys.maxsize) % (2**32) + else: + self._set_desc_seed(jtmp["model"]["descriptor"]) + jtmp["model"]["fitting_net"]["seed"] = random.randrange(sys.maxsize) % ( + 2**32 + ) + elif "nvnmd" in jtmp: + jtmp["nvnmd"]["seed"] = random.randrange(sys.maxsize) % (2**32) jtmp["training"]["seed"] = random.randrange(sys.maxsize) % (2**32) return jtmp diff --git a/dpgen2/op/run_dp_train.py b/dpgen2/op/run_dp_train.py index 5a9782f4..4c271a7d 100644 --- a/dpgen2/op/run_dp_train.py +++ b/dpgen2/op/run_dp_train.py @@ -1,3 +1,4 @@ +import copy import glob import json import logging @@ -414,9 +415,11 @@ def write_other_to_input_script( config, do_init_model, major_version: str = "1", + do_quantized: bool = False, ): - odict = idict.copy() + odict = copy.deepcopy(idict) odict["training"]["disp_file"] = "lcurve.out" + odict["training"]["save_ckpt"] = "model.ckpt" if do_init_model: odict["learning_rate"]["start_lr"] = config["init_model_start_lr"] if "loss_dict" in odict: @@ -437,6 +440,13 @@ def write_other_to_input_script( raise RuntimeError( "unsupported DeePMD-kit major version", major_version ) + + if do_quantized: + if major_version == "1": + odict["training"]["stop_batch"] = 0 + elif major_version == "2": + odict["training"]["numb_steps"] = 0 + return odict @staticmethod diff --git a/dpgen2/op/run_nvnmd.py b/dpgen2/op/run_nvnmd.py new file mode 100644 index 00000000..4d899088 --- /dev/null +++ b/dpgen2/op/run_nvnmd.py @@ -0,0 +1,325 @@ +import glob +import itertools +import logging +import os +import random +from pathlib import ( + Path, +) +from typing import ( + List, + Optional, + Union, +) + +import numpy as np +from dflow.python import ( + OP, + OPIO, + Artifact, + BigParameter, + OPIOSign, + TransientError, +) + +from dpgen2.constants import ( + lmp_input_name, + lmp_log_name, + lmp_model_devi_name, + lmp_traj_name, + model_name_pattern, + plm_output_name, +) +from dpgen2.op.run_lmp import ( + RunLmp, + find_only_one_key, +) +from dpgen2.utils import ( + set_directory, +) +from dpgen2.utils.run_command import ( + run_command, +) + + +class RunNvNMD(OP): + r"""Execute a LAMMPS task. + + A working directory named `task_name` is created. All input files + are copied or symbol linked to directory `task_name`. The LAMMPS + command is exectuted from directory `task_name`. The trajectory + and the model deviation will be stored in files `op["traj"]` and + `op["model_devi"]`, respectively. + + """ + + @classmethod + def get_input_sign(cls): + return OPIOSign( + { + "config": BigParameter(dict), + "task_name": BigParameter(str), + "task_path": Artifact(Path), + "models": Artifact(List[Path]), + } + ) + + @classmethod + def get_output_sign(cls): + return OPIOSign( + { + "log": Artifact(Path), + "traj": Artifact(Path), + "model_devi": Artifact(Path), + "plm_output": Artifact(Path, optional=True), + "optional_output": Artifact(Path, optional=True), + } + ) + + @OP.exec_sign_check + def execute( + self, + ip: OPIO, + ) -> OPIO: + r"""Execute the OP. + + Parameters + ---------- + ip : dict + Input dict with components: + + - `config`: (`dict`) The config of lmp task. Check `RunLmp.lmp_args` for definitions. + - `task_name`: (`str`) The name of the task. + - `task_path`: (`Artifact(Path)`) The path that contains all input files prepareed by `PrepLmp`. + - `models`: (`Artifact(List[Path])`) The frozen model to estimate the model deviation. The first model with be used to drive molecular dynamics simulation. + + Returns + ------- + Any + Output dict with components: + - `log`: (`Artifact(Path)`) The log file of LAMMPS. + - `traj`: (`Artifact(Path)`) The output trajectory. + - `model_devi`: (`Artifact(Path)`) The model deviation. The order of recorded model deviations should be consistent with the order of frames in `traj`. + + Raises + ------ + TransientError + On the failure of LAMMPS execution. Handle different failure cases? e.g. loss atoms. + """ + config = ip["config"] if ip["config"] is not None else {} + config = RunLmp.normalize_config(config) + command = config["command"] + shuffle_models: Optional[bool] = config["shuffle_models"] + task_name = ip["task_name"] + task_path = ip["task_path"] + models = ip["models"] + # input_files = [lmp_conf_name, lmp_input_name] + # input_files = [(Path(task_path) / ii).resolve() for ii in input_files] + input_files = [ii.resolve() for ii in Path(task_path).iterdir()] + model_files = [Path(ii).resolve() / "model.pb" for ii in models] + work_dir = Path(task_name) + + with set_directory(work_dir): + # link input files + for ii in input_files: + iname = ii.name + # Path(iname).symlink_to(ii) + try: + Path(iname).symlink_to(ii) + except: + logging.warning("failed to link %s, maybe already linked" % iname) + pass + # link models + model_names = [] + for idx, mm in enumerate(model_files): + ext = os.path.splitext(mm)[-1] + if ext == ".pb": + mname = model_name_pattern % (idx) + # Path(mname).symlink_to(mm) + try: + Path(mname).symlink_to(mm) + except: + logging.warning( + "failed to link %s, maybe already linked" % mname + ) + pass + + else: + raise RuntimeError( + "Model file with extension '%s' is not supported" % ext + ) + model_names.append(mname) + + if shuffle_models: + random.shuffle(model_names) + + set_lmp_models(lmp_input_name, model_names) + + # run lmp + for ii in range(len(model_names)): + commands = " ".join( + [ + command, + "-i", + "%d_%s" % (ii, lmp_input_name), + "-log", + "%d_%s" % (ii, lmp_log_name), + "-v", + "rerun", + "%d" % ii, + ] + ) + ret, out, err = run_command(commands, shell=True) + if ret != 0: + logging.error( + "".join( + ( + "lmp failed\n", + "command was: ", + commands, + "out msg: ", + out, + "\n", + "err msg: ", + err, + "\n", + ) + ) + ) + raise TransientError("lmp failed") + + merge_pimd_files() + + traj_files = glob.glob("*_%s" % lmp_traj_name) + if len(traj_files) > 1: + calc_model_devi(traj_files, lmp_model_devi_name) + + ret_dict = { + "log": work_dir / ("%d_%s" % (0, lmp_log_name)), + "traj": work_dir / ("%d_%s" % (0, lmp_traj_name)), + "model_devi": self.get_model_devi(work_dir / lmp_model_devi_name), + } + plm_output = ( + {"plm_output": work_dir / plm_output_name} + if (work_dir / plm_output_name).is_file() + else {} + ) + ret_dict.update(plm_output) + return OPIO(ret_dict) + + def get_model_devi(self, model_devi_file): + return model_devi_file + + +config_args = RunLmp.lmp_args + + +def set_lmp_models(lmp_input_name: str, model_names: List[str]): + with open(lmp_input_name, encoding="utf8") as f: + lmp_input_lines = f.readlines() + + idx = find_only_one_key( + lmp_input_lines, ["pair_style", "nvnmd"], raise_not_found=False + ) + if idx is None: + return + new_line_split = lmp_input_lines[idx].split() + match_idx = find_only_one_key(new_line_split, ["model.pb"], raise_not_found=False) + if match_idx is None: + raise RuntimeError("last matching index should not be -1, terribly wrong ") + + for ii, model_name in enumerate(model_names): + new_line_split[match_idx] = model_name + + lmp_input_lines[idx] = " ".join(new_line_split) + "\n" + + with open("%d_%s" % (ii, lmp_input_name), "w", encoding="utf8") as f: + f.write("".join(lmp_input_lines)) + + +def merge_pimd_files(): + traj_files = glob.glob("traj.*.dump") + if len(traj_files) > 0: + with open(lmp_traj_name, "w") as f: + for traj_file in sorted(traj_files): + with open(traj_file, "r") as f2: + f.write(f2.read()) + model_devi_files = glob.glob("model_devi.*.out") + if len(model_devi_files) > 0: + with open(lmp_model_devi_name, "w") as f: + for model_devi_file in sorted(model_devi_files): + with open(model_devi_file, "r") as f2: + f.write(f2.read()) + + +def calc_model_devi( + traj_files, + fname="model_devi.out", +): + from ase.io import read # type: ignore + + trajectories = [] + for f in traj_files: + traj = read(f, format="lammps-dump-text", index=":", order=True) + trajectories.append(traj) + + num_frames = len(trajectories[0]) + for traj in trajectories: + assert ( + len(traj) == num_frames + ), f"Trajectory length mismatch: expected {num_frames}, got {len(traj)} frames" + + devi = [] + for frame_idx in range(num_frames): + frames = [traj[frame_idx] for traj in trajectories] + + all_forces = [atoms.get_forces() for atoms in frames] + all_errors = [] + + for atom_idx in range(len(frames[0])): + forces = [forces_arr[atom_idx] for forces_arr in all_forces] + + for a, b in itertools.combinations(forces, 2): + error = np.linalg.norm(a - b) + all_errors.append(error) + + max_error = np.max(all_errors) if all_errors else 0.0 + min_error = np.min(all_errors) if all_errors else 0.0 + avg_error = np.mean(all_errors) if all_errors else 0.0 + + # ase verion >= 3.26.0, please update ase using "pip install git+https://gitlab.com/ase/ase.git" + devi.append( + [ + trajectories[0][frame_idx].info["timestep"], + 0, + 0, + 0, + max_error, + min_error, + avg_error, + 0, + ] + ) + + devi = np.array(devi) + write_model_devi_out(devi, fname=fname) + + +def write_model_devi_out(devi: np.ndarray, fname: Union[str, Path], header: str = ""): + assert devi.shape[1] == 8 + header = "%s\n%10s" % (header, "step") + for item in "vf": + header += "%19s%19s%19s" % ( + f"max_devi_{item}", + f"min_devi_{item}", + f"avg_devi_{item}", + ) + with open(fname, "ab") as fp: + np.savetxt( + fp, + devi, + fmt=["%12d"] + ["%19.6e" for _ in range(devi.shape[1] - 1)], + delimiter="", + header=header, + ) + return devi diff --git a/dpgen2/op/run_nvnmd_train.py b/dpgen2/op/run_nvnmd_train.py new file mode 100644 index 00000000..fa00ff74 --- /dev/null +++ b/dpgen2/op/run_nvnmd_train.py @@ -0,0 +1,397 @@ +import copy +import glob +import json +import logging +import os +import shutil +from pathlib import ( + Path, +) +from typing import ( + Dict, + List, + Optional, + Tuple, + Union, +) + +import dpdata +from dargs import ( + Argument, + ArgumentEncoder, + Variant, + dargs, +) +from dflow.python import ( + OP, + OPIO, + Artifact, + BigParameter, + FatalError, + NestedDict, + OPIOSign, + Parameter, + TransientError, +) + +from dpgen2.constants import ( + train_cnn_script_name, + train_qnn_script_name, + train_script_name, + train_task_pattern, +) +from dpgen2.op.run_dp_train import ( + RunDPTrain, + _expand_all_multi_sys_to_sys, +) +from dpgen2.utils.chdir import ( + set_directory, +) +from dpgen2.utils.run_command import ( + run_command, +) + + +def _make_train_command( + dp_command, + train_script_name, + do_init_model, + init_model, + train_args="", +): + # find checkpoint + if os.path.isfile("nvnmd_cnn/checkpoint") and not os.path.isfile( + "nvnmd_cnn/frozen_model.pb" + ): + checkpoint = "nvnmd_cnn/model.ckpt" + else: + checkpoint = None + + # case of restart + if checkpoint is not None: + command = dp_command + [ + "train-nvnmd", + "--restart", + checkpoint, + train_script_name, + ] + return command + + # case of init model + assert checkpoint is None + case_init_model = do_init_model + if case_init_model: + if isinstance(init_model, list): # initialize from model.ckpt + # init_model = ".".join(str(init_model[0]).split('.')[:-1]) + for i in init_model: + if os.path.exists(i): + shutil.copy(i, ".") + init_model = "model.ckpt" + init_flag = "--init-model" + else: # initialize from frozen model + init_flag = "--init-frz-model" + + command = dp_command + [ + "train-nvnmd", + init_flag, + str(init_model), + train_script_name, + ] + else: + command = dp_command + ["train-nvnmd", train_script_name] + + command += train_args.split() + return command + + +class RunNvNMDTrain(OP): + r"""Execute a DP training task. Train and freeze a DP model. + + A working directory named `task_name` is created. All input files + are copied or symbol linked to directory `task_name`. The + DeePMD-kit training and freezing commands are exectuted from + directory `task_name`. + + """ + + default_optional_parameter = { + "mixed_type": False, + } + + @classmethod + def get_input_sign(cls): + return OPIOSign( + { + "config": dict, + "task_name": BigParameter(str), + "optional_parameter": Parameter( + dict, + default=RunNvNMDTrain.default_optional_parameter, + ), + "task_path": Artifact(Path), + "init_model": Artifact(Path, optional=True), + "init_data": Artifact(NestedDict[Path]), + "iter_data": Artifact(List[Path]), + "valid_data": Artifact(NestedDict[Path], optional=True), + "optional_files": Artifact(List[Path], optional=True), + } + ) + + @classmethod + def get_output_sign(cls): + return OPIOSign( + { + "script": Artifact(Path), + "model": Artifact(Path), + "lcurve": Artifact(Path), + "log": Artifact(Path), + } + ) + + @OP.exec_sign_check + def execute( + self, + ip: OPIO, + ) -> OPIO: + r"""Execute the OP. + + Parameters + ---------- + ip : dict + Input dict with components: + + - `config`: (`dict`) The config of training task. Check `RunDPTrain.training_args` for definitions. + - `task_name`: (`str`) The name of training task. + - `task_path`: (`Artifact(Path)`) The path that contains all input files prepareed by `PrepDPTrain`. + - `init_model`: (`Artifact(Path)`) The checkpoint and frozen model to initialize the training. + - `init_data`: (`Artifact(NestedDict[Path])`) Initial training data. + - `iter_data`: (`Artifact(List[Path])`) Training data generated in the DPGEN iterations. + + Returns + ------- + Any + Output dict with components: + - `script`: (`Artifact(Path)`) The training script. + - `model`: (`Artifact(Path)`) The trained continuous and quantized frozen model, the checkpoint model. + - `lcurve`: (`Artifact(Path)`) The learning curve file. + - `log`: (`Artifact(Path)`) The log file of training. + + Raises + ------ + FatalError + On the failure of training or freezing. Human intervention needed. + """ + mixed_type = ip["optional_parameter"]["mixed_type"] + config = ip["config"] if ip["config"] is not None else {} + dp_command = ip["config"].get("command", "dp").split() + train_args = config.get("train_args", "") + config = RunDPTrain.normalize_config(config) + task_name = ip["task_name"] + task_path = ip["task_path"] + init_model = ip["init_model"] + init_frz_model = ip["init_model"] / "frozen_model.pb" if init_model else None + init_model_ckpt_data = ( + ip["init_model"] / "model.ckpt.data-00000-of-00001" if init_model else None + ) + init_model_ckpt_meta = ( + ip["init_model"] / "model.ckpt.meta" if init_model else None + ) + init_model_ckpt_index = ( + ip["init_model"] / "model.ckpt.index" if init_model else None + ) + init_data = ip["init_data"] + iter_data = ip["iter_data"] + valid_data = ip["valid_data"] + iter_data_old_exp = _expand_all_multi_sys_to_sys(iter_data[:-1]) + iter_data_new_exp = _expand_all_multi_sys_to_sys(iter_data[-1:]) + iter_data_exp = iter_data_old_exp + iter_data_new_exp + work_dir = Path(task_name) + + # update the input script + input_script = Path(task_path) / train_script_name + with open(input_script) as fp: + train_dict = json.load(fp) + if "systems" in train_dict["training"]: + major_version = "1" + else: + major_version = "2" + + # auto prob style + init_model_ckpt = [ + init_model_ckpt_meta, + init_model_ckpt_data, + init_model_ckpt_index, + ] + do_init_model = RunDPTrain.decide_init_model( + config, + init_model_ckpt if init_model_ckpt_data is not None else init_frz_model, + init_data, + iter_data, + mixed_type=mixed_type, + ) + auto_prob_str = "prob_sys_size" + if do_init_model: + old_ratio = config["init_model_old_ratio"] + len_init = len(init_data) + numb_old = len_init + len(iter_data_old_exp) + numb_new = numb_old + len(iter_data_new_exp) + auto_prob_str = f"prob_sys_size; 0:{numb_old}:{old_ratio}; {numb_old}:{numb_new}:{1.-old_ratio:g}" + + # update the input dict + train_dict = RunDPTrain.write_data_to_input_script( + train_dict, + config, + init_data, + iter_data_exp, + auto_prob_str, + major_version, + valid_data, + ) + train_cnn_dict = RunDPTrain.write_other_to_input_script( + train_dict, config, do_init_model, major_version, False + ) + train_qnn_dict = RunDPTrain.write_other_to_input_script( + train_dict, + config, + do_init_model, + major_version, + True, + ) + + with set_directory(work_dir): + # open log + fplog = open("train.log", "w") + + def clean_before_quit(): + fplog.close() + + # dump train script + + with open(train_script_name, "w") as fp: + json.dump(train_cnn_dict, fp, indent=4) + + with open(train_cnn_script_name, "w") as fp: + json.dump(train_cnn_dict, fp, indent=4) + + with open(train_qnn_script_name, "w") as fp: + json.dump(train_qnn_dict, fp, indent=4) + + if ip["optional_files"] is not None: + for f in ip["optional_files"]: + Path(f.name).symlink_to(f) + + # train cnn model + command = _make_train_command( + dp_command, + train_cnn_script_name, + do_init_model, + init_model_ckpt if init_model_ckpt_data is not None else init_model, + train_args="-s s1", + ) + + if not RunDPTrain.skip_training( + work_dir, train_dict, init_model, iter_data, None + ): + ret, out, err = run_command(command) + if ret != 0: + clean_before_quit() + logging.error( + "".join( + ( + "dp train-nvnmd -s s1 failed\n", + "out msg: ", + out, + "\n", + "err msg: ", + err, + "\n", + ) + ) + ) + raise FatalError("dp train-nvnmd -s s1 failed") + fplog.write( + "#=================== train_cnn std out ===================\n" + ) + fplog.write(out) + fplog.write( + "#=================== train_cnn std err ===================\n" + ) + fplog.write(err) + + cnn_model_file = "nvnmd_cnn/frozen_model.pb" + model_ckpt_data_file = "nvnmd_cnn/model.ckpt.data-00000-of-00001" + model_ckpt_index_file = "nvnmd_cnn/model.ckpt.index" + model_ckpt_meta_file = "nvnmd_cnn/model.ckpt.meta" + lcurve_file = "nvnmd_cnn/lcurve.out" + + if os.path.exists("input_v2_compat.json"): + shutil.copy2("input_v2_compat.json", train_script_name) + + else: + cnn_model_file = init_model + model_ckpt_data_file = "" + model_ckpt_index_file = "" + model_ckpt_meta_file = "" + lcurve_file = "nvnmd_qnn/lcurve.out" + + # train qnn model + command = _make_train_command( + dp_command, + train_qnn_script_name, + do_init_model, + init_model_ckpt if init_model_ckpt_data is not None else init_model, + train_args="-s s2", + ) + + ret, out, err = run_command(command) + if ret != 0: + clean_before_quit() + logging.error( + "".join( + ( + "dp train-nvnmd -s s2 failed\n", + "out msg: ", + out, + "\n", + "err msg: ", + err, + "\n", + ) + ) + ) + raise FatalError("dp train-nvnmd -s s2 failed") + fplog.write("#=================== train_qnn std out ===================\n") + fplog.write(out) + fplog.write("#=================== train_qnn std err ===================\n") + fplog.write(err) + + qnn_model_file = "nvnmd_qnn/model.pb" + + clean_before_quit() + + # copy all models files to the output directory + os.makedirs("nvnmd_models", exist_ok=True) + if os.path.exists(cnn_model_file): + shutil.copy(cnn_model_file, "nvnmd_models") + if os.path.exists(qnn_model_file): + shutil.copy(qnn_model_file, "nvnmd_models") + if os.path.exists(model_ckpt_meta_file): + shutil.copy(model_ckpt_meta_file, "nvnmd_models") + if os.path.exists(model_ckpt_data_file): + shutil.copy(model_ckpt_data_file, "nvnmd_models") + if os.path.exists(model_ckpt_index_file): + shutil.copy(model_ckpt_index_file, "nvnmd_models") + + model_files = "nvnmd_models" + + return OPIO( + { + "script": work_dir / train_script_name, + "model": work_dir / model_files, + "lcurve": work_dir / lcurve_file, + "log": work_dir / "train.log", + } + ) + + +config_args = RunDPTrain.training_args diff --git a/dpgen2/superop/prep_run_dp_train.py b/dpgen2/superop/prep_run_dp_train.py index 0fd988e4..752a152a 100644 --- a/dpgen2/superop/prep_run_dp_train.py +++ b/dpgen2/superop/prep_run_dp_train.py @@ -11,6 +11,7 @@ Optional, Set, Type, + Union, ) from dflow import ( @@ -47,6 +48,7 @@ ) from dpgen2.op import ( RunDPTrain, + RunNvNMDTrain, ) from dpgen2.utils.step_config import ( init_executor, @@ -59,7 +61,7 @@ def __init__( self, name: str, prep_train_op: Type[OP], - run_train_op: Type[RunDPTrain], + run_train_op: Type[Union[RunDPTrain, RunNvNMDTrain]], prep_config: Optional[dict] = None, run_config: Optional[dict] = None, upload_python_packages: Optional[List[os.PathLike]] = None, @@ -150,7 +152,7 @@ def _prep_run_dp_train( train_steps, step_keys, prep_train_op: Type[OP], - run_train_op: Type[RunDPTrain], + run_train_op: Type[OP], prep_config: dict = normalize_step_dict({}), run_config: dict = normalize_step_dict({}), upload_python_packages: Optional[List[os.PathLike]] = None, diff --git a/dpgen2/utils/download_dpgen2_artifacts.py b/dpgen2/utils/download_dpgen2_artifacts.py index b5f69153..8ab4cb61 100644 --- a/dpgen2/utils/download_dpgen2_artifacts.py +++ b/dpgen2/utils/download_dpgen2_artifacts.py @@ -359,9 +359,9 @@ def _dl_step_item( [step_key, io, name] = item.split(global_step_def_split) pref = _item_path(prefix, item) if io in ["input"]: - target = step.inputs.artifacts[name] + target = step.inputs.artifacts.get(name) elif io in ["output"]: - target = step.outputs.artifacts[name] + target = step.outputs.artifacts.get(name) else: raise RuntimeError("unknown io style {io}") try: diff --git a/tests/mocked_ops.py b/tests/mocked_ops.py index 9cd13c00..87bacdcd 100644 --- a/tests/mocked_ops.py +++ b/tests/mocked_ops.py @@ -41,6 +41,7 @@ lmp_task_pattern, lmp_traj_name, model_name_pattern, + nvnmd_model_name_pattern, train_log_name, train_script_name, train_task_pattern, @@ -95,6 +96,12 @@ from dpgen2.op.run_lmp import ( RunLmp, ) +from dpgen2.op.run_nvnmd import ( + RunNvNMD, +) +from dpgen2.op.run_nvnmd_train import ( + RunNvNMDTrain, +) from dpgen2.op.select_confs import ( SelectConfs, ) @@ -115,6 +122,23 @@ def make_mocked_init_models(numb_models): return tmp_models +def make_mocked_init_nvnmd_models(numb_models): + tmp_models = [] + for ii in range(numb_models): + nvnmd_models_dir = Path(nvnmd_model_name_pattern % ii) + nvnmd_models_dir.mkdir(exist_ok=True, parents=True) + for jj in ( + "frozen_model.pb", + "model.ckpt.meta", + "model.ckpt.data", + "model.ckpt.index", + ): + ff = nvnmd_models_dir / jj + ff.write_text(f"This is init {jj} {ii}") + tmp_models.append(nvnmd_models_dir) + return tmp_models + + def make_mocked_init_data(): tmp_init_data = [Path("init_data/foo"), Path("init_data/bar")] for ii in tmp_init_data: @@ -351,6 +375,306 @@ def execute( ) +class MockedPrepNvNMDTrain(PrepDPTrain): + @OP.exec_sign_check + def execute( + self, + ip: OPIO, + ) -> OPIO: + template = ip["template_script"] + numb_models = ip["numb_models"] + ofiles = [] + osubdirs = [] + + assert template == mocked_template_script + assert numb_models == mocked_numb_models + + for ii in range(numb_models): + jtmp = template + jtmp["seed"] = ii + subdir = Path(train_task_pattern % ii) + subdir.mkdir(exist_ok=True, parents=True) + fname = subdir / "input.json" + with open(fname, "w") as fp: + json.dump(jtmp, fp, indent=4) + osubdirs.append(str(subdir)) + ofiles.append(fname) + + op = OPIO( + { + "task_names": osubdirs, + "task_paths": [Path(ii) for ii in osubdirs], + } + ) + return op + + +class MockedRunNvNMDTrain(RunNvNMDTrain): + @OP.exec_sign_check + def execute( + self, + ip: OPIO, + ) -> OPIO: + work_dir = Path(ip["task_name"]) + script = ip["task_path"] / "input.json" + init_model = ip["init_model"] + init_data = ip["init_data"] + iter_data = ip["iter_data"] + + assert script.is_file() + assert ip["task_path"].is_dir() + assert ip["init_model"].is_dir() + assert len(init_data) == 2 + assert re.match("task.[0-9][0-9][0-9][0-9]", ip["task_name"]) + task_id = int(ip["task_name"].split(".")[1]) + assert ip["task_name"] in str(ip["task_path"]) + init_frz_model = ip["init_model"] / "frozen_model.pb" + init_model_ckpt_data = ip["init_model"] / "model.ckpt.data" + init_model_ckpt_meta = ip["init_model"] / "model.ckpt.meta" + init_model_ckpt_index = ip["init_model"] / "model.ckpt.index" + + assert ".pb" in str(init_frz_model) + assert "ckpt.meta" in str(init_model_ckpt_meta) + assert "ckpt.data" in str(init_model_ckpt_data) + assert "ckpt.index" in str(init_model_ckpt_index) + list_init_data = sorted([str(ii) for ii in init_data]) + assert "init_data/bar" in list_init_data[0] + assert "init_data/foo" in list_init_data[1] + assert Path(list_init_data[0]).is_dir() + assert Path(list_init_data[1]).is_dir() + + script = Path(script).resolve() + init_model = init_model.resolve() + init_frz_model = init_frz_model.resolve() + init_model_ckpt_data = init_model_ckpt_data.resolve() + init_model_ckpt_meta = init_model_ckpt_meta.resolve() + init_model_ckpt_index = init_model_ckpt_index.resolve() + init_data = [ii.resolve() for ii in init_data] + iter_data = [ii.resolve() for ii in iter_data] + init_data_str = [str(ii) for ii in init_data] + iter_data_str = [str(ii) for ii in iter_data] + + with open(script) as fp: + jtmp = json.load(fp) + data = [] + for ii in sorted(init_data_str): + data.append(ii) + for ii in sorted(iter_data_str): + data.append(ii) + jtmp["data"] = data + with open(script, "w") as fp: + json.dump(jtmp, fp, indent=4) + + cwd = os.getcwd() + work_dir.mkdir(exist_ok=True, parents=True) + os.chdir(work_dir) + + for script_str in ["input.json", "input_cnn.json", "input_qnn.json"]: + oscript = Path(script_str) + if not oscript.exists(): + from shutil import ( + copyfile, + ) + + copyfile(script, oscript) + + oscript = Path("input.json") + cnn_dir = Path("nvnmd_cnn") + qnn_dir = Path("nvnmd_qnn") + cnn_model = cnn_dir / Path("frozen_model.pb") + qnn_model = qnn_dir / Path("model.pb") + model_ckpt_meta_file = cnn_dir / Path("model.ckpt.meta") + model_ckpt_data_file = cnn_dir / Path("model.ckpt.data-00000-of-00001") + model_ckpt_index_file = cnn_dir / Path("model.ckpt.index") + lcurve = cnn_dir / Path("lcurve.out") + log = Path("log") + + assert init_frz_model.exists() + assert init_model_ckpt_meta.exists() + assert init_model_ckpt_data.exists() + assert init_model_ckpt_index.exists() + with log.open("w") as f: + f.write(f"init_model {str(init_model)} OK\n") + for ii in jtmp["data"]: + assert Path(ii).exists() + assert (ii in init_data_str) or (ii in iter_data_str) + with log.open("a") as f: + f.write(f"data {str(ii)} OK\n") + assert script.exists() + with log.open("a") as f: + f.write(f"script {str(script)} OK\n") + + cnn_dir.mkdir(exist_ok=True, parents=True) + with cnn_model.open("w") as f: + f.write("read from init model: \n") + f.write(init_frz_model.read_text() + "\n") + with model_ckpt_meta_file.open("w") as f: + f.write("read from init model: \n") + f.write(init_model_ckpt_meta.read_text() + "\n") + with model_ckpt_data_file.open("w") as f: + f.write("read from init model: \n") + f.write(init_model_ckpt_data.read_text() + "\n") + with model_ckpt_index_file.open("w") as f: + f.write("read from init model: \n") + f.write(init_model_ckpt_index.read_text() + "\n") + + qnn_dir.mkdir(exist_ok=True, parents=True) + with qnn_model.open("w") as f: + f.write("read from init model: \n") + f.write(init_frz_model.read_text() + "\n") + with lcurve.open("w") as f: + f.write("read from train_script: \n") + f.write(script.read_text() + "\n") + + model_files = "nvnmd_models" + os.makedirs(model_files, exist_ok=True) + shutil.copy(cnn_model, "nvnmd_models") + shutil.copy(qnn_model, "nvnmd_models") + shutil.copy(model_ckpt_meta_file, "nvnmd_models") + shutil.copy(model_ckpt_data_file, "nvnmd_models") + shutil.copy(model_ckpt_index_file, "nvnmd_models") + + os.chdir(cwd) + + return OPIO( + { + "script": work_dir / oscript, + "model": work_dir / model_files, + "lcurve": work_dir / lcurve, + "log": work_dir / log, + } + ) + + +class MockedRunNvNMDTrainCheckOptParam(RunDPTrain): + @OP.exec_sign_check + def execute( + self, + ip: OPIO, + ) -> OPIO: + if not ip["optional_parameter"]["mixed_type"]: + raise FatalError( + f"the value of mixed_type is {ip['optional_parameter']['mixed_type']} " + ) + if not ip["optional_parameter"]["finetune_mode"]: + raise FatalError( + f"the value of finetune_mode is {ip['optional_parameter']['finetune_mode']} " + ) + return MockedRunDPTrain.execute(self, ip) + + +class MockedRunNvNMDTrainNoneInitModel(RunNvNMDTrain): + @OP.exec_sign_check + def execute( + self, + ip: OPIO, + ) -> OPIO: + work_dir = Path(ip["task_name"]) + script = ip["task_path"] / "input.json" + if ip["init_model"] is not None: + raise FatalError("init model is not None") + init_data = ip["init_data"] + iter_data = ip["iter_data"] + + assert script.is_file() + assert ip["task_path"].is_dir() + assert len(init_data) == 2 + assert re.match("task.[0-9][0-9][0-9][0-9]", ip["task_name"]) + task_id = int(ip["task_name"].split(".")[1]) + assert ip["task_name"] in str(ip["task_path"]) + list_init_data = sorted([str(ii) for ii in init_data]) + assert "init_data/bar" in list_init_data[0] + assert "init_data/foo" in list_init_data[1] + assert Path(list_init_data[0]).is_dir() + assert Path(list_init_data[1]).is_dir() + + script = Path(script).resolve() + init_data = [ii.resolve() for ii in init_data] + iter_data = [ii.resolve() for ii in iter_data] + init_data_str = [str(ii) for ii in init_data] + iter_data_str = [str(ii) for ii in iter_data] + + with open(script) as fp: + jtmp = json.load(fp) + data = [] + for ii in sorted(init_data_str): + data.append(ii) + for ii in sorted(iter_data_str): + data.append(ii) + jtmp["data"] = data + with open(script, "w") as fp: + json.dump(jtmp, fp, indent=4) + + cwd = os.getcwd() + work_dir.mkdir(exist_ok=True, parents=True) + os.chdir(work_dir) + + for script_str in ["input.json", "input_cnn.json", "input_qnn.json"]: + oscript = Path(script_str) + if not oscript.exists(): + from shutil import ( + copyfile, + ) + + copyfile(script, oscript) + + oscript = Path("input.json") + cnn_dir = Path("nvnmd_cnn") + qnn_dir = Path("nvnmd_qnn") + cnn_model = cnn_dir / Path("frozen_model.pb") + qnn_model = qnn_dir / Path("model.pb") + model_ckpt_meta_file = cnn_dir / Path("model.ckpt.meta") + model_ckpt_data_file = cnn_dir / Path("model.ckpt.data-00000-of-00001") + model_ckpt_index_file = cnn_dir / Path("model.ckpt.index") + lcurve = cnn_dir / Path("lcurve.out") + log = Path("log") + + for ii in jtmp["data"]: + assert Path(ii).exists() + assert (ii in init_data_str) or (ii in iter_data_str) + with log.open("a") as f: + f.write(f"data {str(ii)} OK\n") + assert script.exists() + with log.open("a") as f: + f.write(f"script {str(script)} OK\n") + + cnn_dir.mkdir(exist_ok=True, parents=True) + with cnn_model.open("w") as f: + f.write("read from init model: \n") + with model_ckpt_meta_file.open("w") as f: + f.write("read from init model ckpt: \n") + with model_ckpt_data_file.open("w") as f: + f.write("read from init model ckpt: \n") + with model_ckpt_index_file.open("w") as f: + f.write("read from init model ckpt: \n") + + qnn_dir.mkdir(exist_ok=True, parents=True) + with qnn_model.open("w") as f: + f.write("read from init model: \n") + with lcurve.open("w") as f: + f.write("read from train_script: \n") + f.write(script.read_text() + "\n") + + model_files = "nvnmd_models" + os.makedirs(model_files, exist_ok=True) + shutil.copy(cnn_model, "nvnmd_models") + shutil.copy(qnn_model, "nvnmd_models") + shutil.copy(model_ckpt_meta_file, "nvnmd_models") + shutil.copy(model_ckpt_data_file, "nvnmd_models") + shutil.copy(model_ckpt_index_file, "nvnmd_models") + + os.chdir(cwd) + + return OPIO( + { + "script": work_dir / oscript, + "model": work_dir / model_files, + "lcurve": work_dir / lcurve, + "log": work_dir / log, + } + ) + + class MockedRunLmp(RunLmp): @OP.exec_sign_check def execute( @@ -428,6 +752,83 @@ def execute( ) +class MockedRunNvNMD(RunNvNMD): + @OP.exec_sign_check + def execute( + self, + ip: OPIO, + ) -> OPIO: + task_name = ip["task_name"] + task_path = ip["task_path"] + models = ip["models"] + + assert ip["task_path"].is_dir() + assert re.match("task.[0-9][0-9][0-9][0-9][0-9][0-9]", ip["task_name"]) + task_id = int(ip["task_name"].split(".")[1]) + assert task_path.is_dir() + assert ip["task_name"] in str(ip["task_path"]) + assert ( + len(models) == mocked_numb_models + ), f"{len(models)} == {mocked_numb_models}" + for ii in range(mocked_numb_models): + assert ip["models"][ii].is_file() + assert "model" in str(ip["models"][ii]) + assert ".pb" in str(ip["models"][ii]) + assert (task_path / lmp_conf_name).is_file() + assert (task_path / lmp_input_name).is_file() + + task_path = task_path.resolve() + models = [ii.resolve() for ii in models] + models_str = [str(ii) for ii in models] + + work_dir = Path(task_name) + + cwd = os.getcwd() + work_dir.mkdir(exist_ok=True, parents=True) + os.chdir(work_dir) + + import glob + + ifiles = glob.glob(str(task_path / "*")) + for ii in ifiles: + if not Path(Path(ii).name).exists(): + Path(Path(ii).name).symlink_to(ii) + for ii in models: + if not Path(Path(ii).name).exists(): + Path(Path(ii).name).symlink_to(ii) + + log = Path(lmp_log_name) + traj = Path(lmp_traj_name) + model_devi = Path(lmp_model_devi_name) + + # fc = ['log of {task_name}'] + # for ii in ['conf.lmp', 'in.lammps'] + models_str: + # if Path(ii).exists(): + # fc.append(f'{ii} OK') + # log.write_text('\n'.join(fc)) + # log.write_text('log of {task_name}') + fc = [] + for ii in [lmp_conf_name, lmp_input_name] + [ii.name for ii in models]: + fc.append(Path(ii).read_text()) + log.write_text("\n".join(fc)) + model_devi.write_text(f"model_devi of {task_name}") + traj_out = [] + traj_out.append(f"traj of {task_name}") + traj_out.append(Path(lmp_conf_name).read_text()) + traj_out.append(Path(lmp_input_name).read_text()) + traj.write_text("\n".join(traj_out)) + + os.chdir(cwd) + + return OPIO( + { + "log": work_dir / log, + "traj": work_dir / traj, + "model_devi": work_dir / model_devi, + } + ) + + class MockedPrepVasp(PrepVasp): @OP.exec_sign_check def execute( diff --git a/tests/op/test_prep_dp_train.py b/tests/op/test_prep_dp_train.py index a380e221..427a8332 100644 --- a/tests/op/test_prep_dp_train.py +++ b/tests/op/test_prep_dp_train.py @@ -56,6 +56,26 @@ }, } +template_script_nvnmd_v0 = { + "nvnmd": {"version": 0, "seed": 1}, + "training": { + "systems": [], + "stop_batch": 2000, + "batch_size": "auto", + "seed": 1, + }, +} + +template_script_nvnmd_v1 = { + "nvnmd": {"version": 1, "seed": 1}, + "training": { + "systems": [], + "stop_batch": 2000, + "batch_size": "auto", + "seed": 1, + }, +} + class faked_rg: faked_random = -1 @@ -161,6 +181,48 @@ def test_template_list_hyb_sea(self): self.assertEqual(jdata["model"]["fitting_net"]["seed"], 4 * ii + 1) self.assertEqual(jdata["training"]["seed"], 4 * ii + 2) + def test_template_nvnmd_v1(self): + ip = OPIO( + { + "template_script": template_script_nvnmd_v1, + "numb_models": self.numb_models, + } + ) + + faked_rg.faked_random = -1 + with mock.patch("random.randrange", faked_rg.randrange): + op = self.ptrain.execute(ip) + + self._check_output_dir_and_file_exist(op, self.numb_models) + + for ii in range(self.numb_models): + with open(Path(train_task_pattern % ii) / train_script_name) as fp: + jdata = json.load(fp) + self.assertEqual(jdata["nvnmd"]["version"], 1) + self.assertEqual(jdata["nvnmd"]["seed"], 2 * ii + 0) + self.assertEqual(jdata["training"]["seed"], 2 * ii + 1) + + def test_template_nvnmd_v0(self): + ip = OPIO( + { + "template_script": template_script_nvnmd_v0, + "numb_models": self.numb_models, + } + ) + + faked_rg.faked_random = -1 + with mock.patch("random.randrange", faked_rg.randrange): + op = self.ptrain.execute(ip) + + self._check_output_dir_and_file_exist(op, self.numb_models) + + for ii in range(self.numb_models): + with open(Path(train_task_pattern % ii) / train_script_name) as fp: + jdata = json.load(fp) + self.assertEqual(jdata["nvnmd"]["version"], 0) + self.assertEqual(jdata["nvnmd"]["seed"], 2 * ii + 0) + self.assertEqual(jdata["training"]["seed"], 2 * ii + 1) + def test_template_raise_wrong_list_length(self): ip = OPIO( { @@ -168,6 +230,8 @@ def test_template_raise_wrong_list_length(self): template_script_hybrid, template_script_hybrid, template_script_se_e2_a, + template_script_nvnmd_v1, + template_script_nvnmd_v0, ], "numb_models": self.numb_models, } diff --git a/tests/op/test_run_dp_train.py b/tests/op/test_run_dp_train.py index 7649b520..a64c0878 100644 --- a/tests/op/test_run_dp_train.py +++ b/tests/op/test_run_dp_train.py @@ -131,6 +131,7 @@ def setUp(self): "auto_prob": "prob_sys_size", }, "disp_file": "lcurve.out", + "save_ckpt": "model.ckpt", }, "learning_rate": { "start_lr": 1.0, @@ -157,6 +158,7 @@ def setUp(self): "auto_prob": "prob_sys_size; 0:4:0.9; 4:7:0.1", }, "disp_file": "lcurve.out", + "save_ckpt": "model.ckpt", "numb_steps": 400000, }, "learning_rate": { @@ -196,6 +198,7 @@ def setUp(self): "batch_size": "auto", "auto_prob_style": "prob_sys_size", "disp_file": "lcurve.out", + "save_ckpt": "model.ckpt", }, "learning_rate": { "start_lr": 1.0, @@ -220,6 +223,7 @@ def setUp(self): "batch_size": "auto", "auto_prob_style": "prob_sys_size; 0:4:0.9; 4:7:0.1", "disp_file": "lcurve.out", + "save_ckpt": "model.ckpt", "stop_batch": 400000, }, "learning_rate": { @@ -810,6 +814,7 @@ def setUp(self): "auto_prob": "prob_sys_size", }, "disp_file": "lcurve.out", + "save_ckpt": "model.ckpt", }, "learning_rate": { "start_lr": 1.0, diff --git a/tests/op/test_run_nvnmd.py b/tests/op/test_run_nvnmd.py new file mode 100644 index 00000000..e09bb03a --- /dev/null +++ b/tests/op/test_run_nvnmd.py @@ -0,0 +1,194 @@ +import json +import os +import shutil +import unittest +from pathlib import ( + Path, +) + +import dpdata +import numpy as np +from dflow.python import ( + OP, + OPIO, + Artifact, + OPIOSign, + TransientError, +) +from mock import ( + call, + mock, + patch, +) + +# isort: off +from .context import ( + dpgen2, +) +from dpgen2.constants import ( + lmp_conf_name, + lmp_input_name, + lmp_log_name, + lmp_model_devi_name, + lmp_traj_name, + model_name_pattern, +) +from dpgen2.op.run_lmp import get_ele_temp, set_models +from dpgen2.op.run_nvnmd import ( + RunNvNMD, + merge_pimd_files, +) +from dpgen2.utils import ( + BinaryFileInput, +) + +# isort: on + + +class TestRunNvNMD(unittest.TestCase): + def setUp(self): + self.task_path = Path("task/path") + self.task_path.mkdir(parents=True, exist_ok=True) + self.model_path = Path("models/path") + self.model_path.mkdir(parents=True, exist_ok=True) + (self.task_path / lmp_conf_name).write_text("foo") + (self.task_path / lmp_input_name).write_text("bar") + self.task_name = "task_000" + self.models = [self.model_path / Path(f"model_{ii}") for ii in range(4)] + for idx, ii in enumerate(self.models): + ii.mkdir(parents=True, exist_ok=True) + model_file = ii / Path("model.pb") + model_file.write_text(f"model{idx}") + + def tearDown(self): + if Path("task").is_dir(): + shutil.rmtree("task") + if Path("models").is_dir(): + shutil.rmtree("models") + if Path(self.task_name).is_dir(): + shutil.rmtree(self.task_name) + + @patch("dpgen2.op.run_nvnmd.run_command") + def test_success(self, mocked_run): + mocked_run.side_effect = [(0, "foo\n", "")] * 4 + op = RunNvNMD() + out = op.execute( + OPIO( + { + "config": {"command": "mylmp"}, + "task_name": self.task_name, + "task_path": self.task_path, + "models": self.models, + } + ) + ) + work_dir = Path(self.task_name) + # check output + self.assertEqual(out["log"], work_dir / ("0_%s" % lmp_log_name)) + self.assertEqual(out["traj"], work_dir / ("0_%s" % lmp_traj_name)) + self.assertEqual(out["model_devi"], work_dir / lmp_model_devi_name) + # check call + models = ["models/path/model_%d.pb" % i for i in range(len(self.models))] + models = ["models/path/model_%d.pb" % i for i in range(len(self.models))] + calls = [ + call( + " ".join( + [ + "mylmp", + "-i", + "%d_%s" % (ii, lmp_input_name), + "-log", + "%d_%s" % (ii, lmp_log_name), + "-v", + "rerun", + "%d" % ii, + ] + ), + shell=True, + ) + for ii in range(len(models)) + ] + mocked_run.assert_has_calls(calls) + # check input files are correctly linked + self.assertEqual((work_dir / lmp_conf_name).read_text(), "foo") + self.assertEqual((work_dir / lmp_input_name).read_text(), "bar") + for ii in range(4): + self.assertEqual( + (work_dir / (model_name_pattern % ii)).read_text(), f"model{ii}" + ) + + @patch("dpgen2.op.run_nvnmd.run_command") + def test_error(self, mocked_run): + mocked_run.side_effect = [(1, "foo\n", "")] + op = RunNvNMD() + with self.assertRaises(TransientError) as ee: + out = op.execute( + OPIO( + { + "config": {"command": "mylmp"}, + "task_name": self.task_name, + "task_path": self.task_path, + "models": self.models, + } + ) + ) + # check call + models = ["models/path/model_%d.pb" % i for i in range(len(self.models))] + calls = [ + call( + " ".join( + [ + "mylmp", + "-i", + "%d_%s" % (ii, lmp_input_name), + "-log", + "%d_%s" % (ii, lmp_log_name), + "-v", + "rerun", + "%d" % ii, + ] + ), + shell=True, + ) + for ii in range(1) + ] + mocked_run.assert_has_calls(calls) + + +def swap_element(arg): + bk = arg.copy() + arg[1] = bk[0] + arg[0] = bk[1] + + +class TestSetModels(unittest.TestCase): + def setUp(self): + self.input_name = Path("lmp.input") + self.model_names = ["model.000.pb", "model.001.pb"] + + def tearDown(self): + os.remove(self.input_name) + + def test(self): + lmp_config = "pair_style nvnmd model.000.pb\n" + expected_output = "pair_style nvnmd model.000.pb\n" + input_name = self.input_name + input_name.write_text(lmp_config) + set_models(input_name, self.model_names) + self.assertEqual(input_name.read_text(), expected_output) + + def test_failed(self): + lmp_config = "pair_style deepmd model.000.pb\n" + input_name = self.input_name + input_name = Path("lmp.input") + input_name.write_text(lmp_config) + with self.assertRaises(RuntimeError) as re: + set_models(input_name, self.model_names) + + def test_failed_no_matching(self): + lmp_config = "pair_style deepmd\n" + input_name = self.input_name + input_name = Path("lmp.input") + input_name.write_text(lmp_config) + with self.assertRaises(RuntimeError) as re: + set_models(input_name, self.model_names) diff --git a/tests/op/test_run_nvnmd_train.py b/tests/op/test_run_nvnmd_train.py new file mode 100644 index 00000000..729bea0f --- /dev/null +++ b/tests/op/test_run_nvnmd_train.py @@ -0,0 +1,834 @@ +import itertools +import json +import os +import shutil +import unittest +from pathlib import ( + Path, +) + +import numpy as np +from dflow.python import ( + OP, + OPIO, + Artifact, + FatalError, + OPIOSign, + TransientError, +) +from fake_data_set import ( + fake_multi_sys, + fake_system, +) +from mock import ( + call, + patch, +) + +# isort: off +from .context import ( + dpgen2, +) +from dpgen2.constants import ( + train_script_name, + train_cnn_script_name, + train_qnn_script_name, + train_task_pattern, +) +from dpgen2.op.run_dp_train import ( + RunDPTrain, + _get_data_size_of_all_mult_sys, +) +from dpgen2.op.run_nvnmd_train import ( + RunNvNMDTrain, + _make_train_command, +) + +# isort: on + + +class TestRunNvNMDTrain(unittest.TestCase): + def setUp(self): + self.atom_name = "foo" + self.nframes_0 = [2, 5, 3] + self.natoms_0 = [4, 3, 4] + self.nframes_1 = [3, 4, 2] + self.natoms_1 = [5, 3, 2] + ms_0 = fake_multi_sys(self.nframes_0, self.natoms_0, self.atom_name) + ms_1 = fake_multi_sys(self.nframes_1, self.natoms_1, self.atom_name) + ms_0.to_deepmd_npy("data-0") + ms_1.to_deepmd_npy("data-1") + self.iter_data = [Path("data-0"), Path("data-1")] + self.iter_data_exp = [ + "data-0/foo3", + "data-0/foo4", + "data-1/foo2", + "data-1/foo3", + "data-1/foo5", + ] + ms_0.to_deepmd_npy_mixed("mixed-data-0") + ms_1.to_deepmd_npy_mixed("mixed-data-1") + self.mixed_iter_data = [Path("mixed-data-0"), Path("mixed-data-1")] + + self.init_nframs_0 = 3 + self.init_natoms_0 = 5 + self.init_nframs_1 = 4 + self.init_natoms_1 = 2 + ss_0 = fake_system(self.init_nframs_0, self.init_natoms_0, self.atom_name) + ss_1 = fake_system(self.init_nframs_1, self.init_natoms_1, self.atom_name) + ss_0.to_deepmd_npy("init/data-0") + ss_1.to_deepmd_npy("init/data-1") + self.init_data = [Path("init/data-0"), Path("init/data-1")] + self.init_data = sorted(list(self.init_data)) + + # self.init_model = Path("bar.pb") + # self.init_model_ckpt_meta = Path("model.ckpt.meta") + # self.init_model_ckpt_data = Path("model.ckpt.data") + # self.init_model_ckpt_index = Path("model.ckpt.index") + self.init_model = Path("nvnmd_models") + + self.config = { + "init_model_policy": "no", + "init_model_old_ratio": 0.9, + "init_model_numb_steps": 400000, + "init_model_start_lr": 1e-4, + "init_model_start_pref_e": 0.1, + "init_model_start_pref_f": 100, + "init_model_start_pref_v": 0.0, + } + self.config = RunDPTrain.normalize_config(self.config) + + self.old_data_size = ( + self.init_nframs_0 + self.init_nframs_1 + sum(self.nframes_0) + ) + self.task_name = "task-000" + self.task_path = "input-000" + + self.idict_v2 = { + "training": { + "training_data": { + "systems": [], + }, + "validation_data": { + "systems": [], + }, + }, + "learning_rate": { + "start_lr": 1.0, + }, + "loss": { + "start_pref_e": 1.0, + "start_pref_f": 1.0, + "start_pref_v": 1.0, + }, + } + self.expected_odict_v2 = { + "training": { + "training_data": { + "systems": [ + "init/data-0", + "init/data-1", + "data-0/foo3", + "data-0/foo4", + "data-1/foo2", + "data-1/foo3", + "data-1/foo5", + ], + "batch_size": "auto", + "auto_prob": "prob_sys_size", + }, + "disp_file": "lcurve.out", + "save_ckpt": "model.ckpt", + }, + "learning_rate": { + "start_lr": 1.0, + }, + "loss": { + "start_pref_e": 1.0, + "start_pref_f": 1.0, + "start_pref_v": 1.0, + }, + } + self.expected_init_model_odict_v2 = { + "training": { + "training_data": { + "systems": [ + "init/data-0", + "init/data-1", + "data-0/foo3", + "data-0/foo4", + "data-1/foo2", + "data-1/foo3", + "data-1/foo5", + ], + "batch_size": "auto", + "auto_prob": "prob_sys_size; 0:4:0.9; 4:7:0.1", + }, + "disp_file": "lcurve.out", + "save_ckpt": "model.ckpt", + "numb_steps": 400000, + }, + "learning_rate": { + "start_lr": 1e-4, + }, + "loss": { + "start_pref_e": 0.1, + "start_pref_f": 100, + "start_pref_v": 0.0, + }, + } + self.expected_qnn_model_odict_v2 = { + "training": { + "training_data": { + "systems": [ + "init/data-0", + "init/data-1", + "data-0/foo3", + "data-0/foo4", + "data-1/foo2", + "data-1/foo3", + "data-1/foo5", + ], + "batch_size": "auto", + "auto_prob": "prob_sys_size; 0:4:0.9; 4:7:0.1", + }, + "disp_file": "lcurve.out", + "save_ckpt": "model.ckpt", + "numb_steps": 0, + }, + "learning_rate": { + "start_lr": 1e-4, + }, + "loss": { + "start_pref_e": 0.1, + "start_pref_f": 100, + "start_pref_v": 0.0, + }, + } + + self.idict_v1 = { + "training": { + "systems": [], + }, + "learning_rate": { + "start_lr": 1.0, + }, + "loss": { + "start_pref_e": 1.0, + "start_pref_f": 1.0, + "start_pref_v": 1.0, + }, + } + self.expected_odict_v1 = { + "training": { + "systems": [ + "init/data-0", + "init/data-1", + "data-0/foo3", + "data-0/foo4", + "data-1/foo2", + "data-1/foo3", + "data-1/foo5", + ], + "batch_size": "auto", + "auto_prob_style": "prob_sys_size", + "disp_file": "lcurve.out", + "save_ckpt": "model.ckpt", + }, + "learning_rate": { + "start_lr": 1.0, + }, + "loss": { + "start_pref_e": 1.0, + "start_pref_f": 1.0, + "start_pref_v": 1.0, + }, + } + self.expected_init_model_odict_v1 = { + "training": { + "systems": [ + "init/data-0", + "init/data-1", + "data-0/foo3", + "data-0/foo4", + "data-1/foo2", + "data-1/foo3", + "data-1/foo5", + ], + "batch_size": "auto", + "auto_prob_style": "prob_sys_size; 0:4:0.9; 4:7:0.1", + "disp_file": "lcurve.out", + "save_ckpt": "model.ckpt", + "stop_batch": 400000, + }, + "learning_rate": { + "start_lr": 1e-4, + }, + "loss": { + "start_pref_e": 0.1, + "start_pref_f": 100, + "start_pref_v": 0.0, + }, + } + self.expected_qnn_model_odict_v1 = { + "training": { + "systems": [ + "init/data-0", + "init/data-1", + "data-0/foo3", + "data-0/foo4", + "data-1/foo2", + "data-1/foo3", + "data-1/foo5", + ], + "batch_size": "auto", + "auto_prob_style": "prob_sys_size; 0:4:0.9; 4:7:0.1", + "disp_file": "lcurve.out", + "save_ckpt": "model.ckpt", + "stop_batch": 0, + }, + "learning_rate": { + "start_lr": 1e-4, + }, + "loss": { + "start_pref_e": 0.1, + "start_pref_f": 100, + "start_pref_v": 0.0, + }, + } + + def tearDown(self): + for ii in [ + "init", + "data-0", + "data-1", + "mixed-data-0", + "mixed-data-1", + self.task_path, + self.task_name, + ]: + if Path(ii).exists(): + shutil.rmtree(str(ii)) + + def test_normalize_config(self): + config = self.config + self.assertEqual(config["init_model_policy"], "no") + self.assertAlmostEqual(config["init_model_old_ratio"], 0.9) + self.assertEqual(config["init_model_numb_steps"], 400000) + self.assertAlmostEqual(config["init_model_start_lr"], 1e-4) + self.assertAlmostEqual(config["init_model_start_pref_e"], 0.1) + self.assertAlmostEqual(config["init_model_start_pref_f"], 100) + self.assertAlmostEqual(config["init_model_start_pref_v"], 0.0) + + def test_update_input_dict_v1_init_model(self): + odict = RunDPTrain.write_data_to_input_script( + self.idict_v1, + self.config, + self.init_data, + self.iter_data_exp, + auto_prob_str="prob_sys_size; 0:4:0.9; 4:7:0.1", + major_version="1", + ) + config = self.config.copy() + config["init_model_policy"] = "yes" + odict = RunDPTrain.write_other_to_input_script( + odict, config, True, major_version="1", do_quantized=False + ) + self.assertDictEqual(odict, self.expected_init_model_odict_v1) + odict = RunDPTrain.write_other_to_input_script( + odict, config, True, major_version="1", do_quantized=True + ) + self.assertDictEqual(odict, self.expected_qnn_model_odict_v1) + + def test_update_input_dict_v1(self): + odict = RunDPTrain.write_data_to_input_script( + self.idict_v1, + self.config, + self.init_data, + self.iter_data_exp, + auto_prob_str="prob_sys_size", + major_version="1", + ) + config = self.config.copy() + config["init_model_policy"] = "no" + odict = RunDPTrain.write_other_to_input_script( + odict, config, False, major_version="1", do_quantized=False + ) + self.assertDictEqual(odict, self.expected_odict_v1) + + def test_update_input_dict_v2_init_model(self): + idict = self.idict_v2 + odict = RunDPTrain.write_data_to_input_script( + idict, + self.config, + self.init_data, + self.iter_data_exp, + auto_prob_str="prob_sys_size; 0:4:0.9; 4:7:0.1", + major_version="2", + ) + config = self.config.copy() + config["init_model_policy"] = "yes" + odict = RunDPTrain.write_other_to_input_script( + odict, config, True, major_version="2", do_quantized=False + ) + self.assertDictEqual(odict, self.expected_init_model_odict_v2) + odict = RunDPTrain.write_other_to_input_script( + odict, config, True, major_version="2", do_quantized=True + ) + self.assertDictEqual(odict, self.expected_qnn_model_odict_v2) + + def test_update_input_dict_v2(self): + idict = self.idict_v2 + odict = RunDPTrain.write_data_to_input_script( + idict, + self.config, + self.init_data, + self.iter_data_exp, + auto_prob_str="prob_sys_size", + major_version="2", + ) + config = self.config.copy() + config["init_model_policy"] = "no" + odict = RunDPTrain.write_other_to_input_script( + odict, config, False, major_version="2", do_quantized=False + ) + self.assertDictEqual(odict, self.expected_odict_v2) + + @patch("dpgen2.op.run_nvnmd_train.run_command") + def test_exec_v1(self, mocked_run): + mocked_run.side_effect = [(0, "foo\n", ""), (0, "bar\n", "")] + + config = self.config.copy() + config["init_model_policy"] = "no" + + task_path = self.task_path + Path(task_path).mkdir(exist_ok=True) + with open(Path(task_path) / train_script_name, "w") as fp: + json.dump(self.idict_v1, fp, indent=4) + task_name = self.task_name + work_dir = Path(task_name) + + ptrain = RunNvNMDTrain() + out = ptrain.execute( + OPIO( + { + "config": config, + "task_name": task_name, + "task_path": Path(task_path), + "init_model": Path(self.init_model), + "init_data": [Path(ii) for ii in self.init_data], + "iter_data": [Path(ii) for ii in self.iter_data], + } + ) + ) + self.assertEqual(out["script"], work_dir / train_script_name) + self.assertEqual( + out["model"] / "frozen_model.pb", + work_dir / "nvnmd_models/frozen_model.pb", + ) + self.assertEqual( + out["model"] / "model.pb", + work_dir / "nvnmd_models/model.pb", + ) + self.assertEqual( + out["model"] / "model.ckpt.data-00000-of-00001", + work_dir / "nvnmd_models/model.ckpt.data-00000-of-00001", + ) + self.assertEqual( + out["model"] / "model.ckpt.meta", + work_dir / "nvnmd_models/model.ckpt.meta", + ) + self.assertEqual( + out["model"] / "model.ckpt.index", + work_dir / "nvnmd_models/model.ckpt.index", + ) + self.assertEqual( + out["lcurve"], + work_dir / "nvnmd_cnn/lcurve.out", + ) + self.assertEqual( + out["log"], + work_dir / "train.log", + ) + + calls = [ + call(["dp", "train-nvnmd", train_cnn_script_name, "-s", "s1"]), + call(["dp", "train-nvnmd", train_qnn_script_name, "-s", "s2"]), + ] + mocked_run.assert_has_calls(calls) + + self.assertTrue(work_dir.is_dir()) + self.assertTrue(out["log"].is_file()) + self.assertEqual( + out["log"].read_text(), + "#=================== train_cnn std out ===================\n" + "foo\n" + "#=================== train_cnn std err ===================\n" + "#=================== train_qnn std out ===================\n" + "bar\n" + "#=================== train_qnn std err ===================\n", + ) + with open(out["script"]) as fp: + jdata = json.load(fp) + self.assertDictEqual(jdata, self.expected_odict_v1) + + @patch("dpgen2.op.run_nvnmd_train.run_command") + def test_exec_v2(self, mocked_run): + mocked_run.side_effect = [(0, "foo\n", ""), (0, "bar\n", "")] + + config = self.config.copy() + config["init_model_policy"] = "no" + + task_path = self.task_path + Path(task_path).mkdir(exist_ok=True) + with open(Path(task_path) / train_script_name, "w") as fp: + json.dump(self.idict_v2, fp, indent=4) + task_name = self.task_name + work_dir = Path(task_name) + + ptrain = RunNvNMDTrain() + out = ptrain.execute( + OPIO( + { + "config": config, + "task_name": task_name, + "task_path": Path(task_path), + "init_model": Path(self.init_model), + "init_data": [Path(ii) for ii in self.init_data], + "iter_data": [Path(ii) for ii in self.iter_data], + } + ) + ) + self.assertEqual(out["script"], work_dir / train_script_name) + self.assertEqual( + out["model"] / "frozen_model.pb", + work_dir / "nvnmd_models/frozen_model.pb", + ) + self.assertEqual( + out["model"] / "model.pb", + work_dir / "nvnmd_models/model.pb", + ) + self.assertEqual( + out["model"] / "model.ckpt.data-00000-of-00001", + work_dir / "nvnmd_models/model.ckpt.data-00000-of-00001", + ) + self.assertEqual( + out["model"] / "model.ckpt.meta", + work_dir / "nvnmd_models/model.ckpt.meta", + ) + self.assertEqual( + out["model"] / "model.ckpt.index", + work_dir / "nvnmd_models/model.ckpt.index", + ) + self.assertEqual(out["lcurve"], work_dir / "nvnmd_cnn/lcurve.out") + self.assertEqual(out["log"], work_dir / "train.log") + + calls = [ + call(["dp", "train-nvnmd", train_cnn_script_name, "-s", "s1"]), + call(["dp", "train-nvnmd", train_qnn_script_name, "-s", "s2"]), + ] + mocked_run.assert_has_calls(calls) + + self.assertTrue(work_dir.is_dir()) + self.assertTrue(out["log"].is_file()) + self.assertEqual( + out["log"].read_text(), + "#=================== train_cnn std out ===================\n" + "foo\n" + "#=================== train_cnn std err ===================\n" + "#=================== train_qnn std out ===================\n" + "bar\n" + "#=================== train_qnn std err ===================\n", + ) + with open(out["script"]) as fp: + jdata = json.load(fp) + self.assertDictEqual(jdata, self.expected_odict_v2) + + @patch("dpgen2.op.run_nvnmd_train.run_command") + def test_exec_v2_init_model(self, mocked_run): + mocked_run.side_effect = [(0, "foo\n", ""), (0, "bar\n", "")] + + config = self.config.copy() + config["init_model_policy"] = "yes" + + task_path = self.task_path + Path(task_path).mkdir(exist_ok=True) + with open(Path(task_path) / train_script_name, "w") as fp: + json.dump(self.idict_v2, fp, indent=4) + task_name = self.task_name + work_dir = Path(task_name) + + ptrain = RunNvNMDTrain() + out = ptrain.execute( + OPIO( + { + "config": config, + "task_name": task_name, + "task_path": Path(task_path), + "init_model": Path(self.init_model), + "init_data": [Path(ii) for ii in self.init_data], + "iter_data": [Path(ii) for ii in self.iter_data], + } + ) + ) + self.assertEqual(out["script"], work_dir / train_script_name) + self.assertEqual( + out["model"] / "frozen_model.pb", + work_dir / "nvnmd_models/frozen_model.pb", + ) + self.assertEqual( + out["model"] / "model.pb", + work_dir / "nvnmd_models/model.pb", + ) + self.assertEqual( + out["model"] / "model.ckpt.data-00000-of-00001", + work_dir / "nvnmd_models/model.ckpt.data-00000-of-00001", + ) + self.assertEqual( + out["model"] / "model.ckpt.meta", + work_dir / "nvnmd_models/model.ckpt.meta", + ) + self.assertEqual( + out["model"] / "model.ckpt.index", + work_dir / "nvnmd_models/model.ckpt.index", + ) + self.assertEqual(out["lcurve"], work_dir / "nvnmd_cnn/lcurve.out") + self.assertEqual(out["log"], work_dir / "train.log") + + calls = [ + call( + [ + "dp", + "train-nvnmd", + "--init-model", + "model.ckpt", + train_cnn_script_name, + "-s", + "s1", + ] + ) + ] + mocked_run.assert_has_calls(calls) + + self.assertTrue(work_dir.is_dir()) + self.assertTrue(out["log"].is_file()) + self.assertEqual( + out["log"].read_text(), + "#=================== train_cnn std out ===================\n" + "foo\n" + "#=================== train_cnn std err ===================\n" + "#=================== train_qnn std out ===================\n" + "bar\n" + "#=================== train_qnn std err ===================\n", + ) + with open(out["script"]) as fp: + jdata = json.load(fp) + self.assertDictEqual(jdata, self.expected_init_model_odict_v2) + + @patch("dpgen2.op.run_nvnmd_train.run_command") + def test_exec_v2_train_error(self, mocked_run): + mocked_run.side_effect = [(1, "", "foo\n"), (0, "bar\n", "")] + + config = self.config.copy() + config["init_model_policy"] = "no" + + task_path = self.task_path + Path(task_path).mkdir(exist_ok=True) + with open(Path(task_path) / train_script_name, "w") as fp: + json.dump(self.idict_v2, fp, indent=4) + task_name = self.task_name + work_dir = Path(task_name) + + ptrain = RunNvNMDTrain() + with self.assertRaises(FatalError) as ee: + out = ptrain.execute( + OPIO( + { + "config": config, + "task_name": task_name, + "task_path": Path(task_path), + "init_model": Path(self.init_model), + "init_data": [Path(ii) for ii in self.init_data], + "iter_data": [Path(ii) for ii in self.iter_data], + } + ) + ) + + calls = [ + call(["dp", "train-nvnmd", train_cnn_script_name, "-s", "s1"]), + ] + mocked_run.assert_has_calls(calls) + + self.assertTrue(work_dir.is_dir()) + with open(work_dir / train_script_name) as fp: + jdata = json.load(fp) + self.assertDictEqual(jdata, self.expected_odict_v2) + + +class TestRunNvNMDTrainNullIterData(unittest.TestCase): + def setUp(self): + self.atom_name = "foo" + self.init_nframs_0 = 3 + self.init_natoms_0 = 5 + self.init_nframs_1 = 4 + self.init_natoms_1 = 2 + ss_0 = fake_system(self.init_nframs_0, self.init_natoms_0, self.atom_name) + ss_1 = fake_system(self.init_nframs_1, self.init_natoms_1, self.atom_name) + ss_0.to_deepmd_npy("init/data-0") + ss_1.to_deepmd_npy("init/data-1") + self.init_data = [Path("init/data-0"), Path("init/data-1")] + self.init_data = sorted(list(self.init_data)) + + self.init_model = Path("bar.pb") + self.init_model_ckpt_meta = Path("model.ckpt.meta") + self.init_model_ckpt_data = Path("model.ckpt.data") + self.init_model_ckpt_index = Path("model.ckpt.index") + self.init_model = Path("nvnmd_models") + + self.config = { + "init_model_policy": "no", + "init_model_old_ratio": 0.9, + "init_model_numb_steps": 400000, + "init_model_start_lr": 1e-4, + "init_model_start_pref_e": 0.1, + "init_model_start_pref_f": 100, + "init_model_start_pref_v": 0.0, + } + self.config = RunDPTrain.normalize_config(self.config) + + self.task_name = "task-000" + self.task_path = "input-000" + + self.idict_v2 = { + "training": { + "training_data": { + "systems": [], + }, + "validation_data": { + "systems": [], + }, + }, + "learning_rate": { + "start_lr": 1.0, + }, + "loss": { + "start_pref_e": 1.0, + "start_pref_f": 1.0, + "start_pref_v": 1.0, + }, + } + self.expected_odict_v2 = { + "training": { + "training_data": { + "systems": ["init/data-0", "init/data-1"], + "batch_size": "auto", + "auto_prob": "prob_sys_size", + }, + "disp_file": "lcurve.out", + "save_ckpt": "model.ckpt", + }, + "learning_rate": { + "start_lr": 1.0, + }, + "loss": { + "start_pref_e": 1.0, + "start_pref_f": 1.0, + "start_pref_v": 1.0, + }, + } + + def tearDown(self): + for ii in ["init", self.task_path, self.task_name, "foo"]: + if Path(ii).exists(): + shutil.rmtree(str(ii)) + + def test_update_input_dict_v2_empty_list(self): + idict = self.idict_v2 + odict = RunDPTrain.write_data_to_input_script( + idict, + self.config, + self.init_data, + [], + auto_prob_str="prob_sys_size", + major_version="2", + ) + config = self.config.copy() + config["init_model_policy"] = "no" + odict = RunDPTrain.write_other_to_input_script( + odict, config, False, major_version="2", do_quantized=False + ) + self.assertDictEqual(odict, self.expected_odict_v2) + + @patch("dpgen2.op.run_nvnmd_train.run_command") + def test_exec_v2_empty_dir(self, mocked_run): + mocked_run.side_effect = [(0, "foo\n", ""), (0, "bar\n", "")] + + config = self.config.copy() + config["init_model_policy"] = "no" + + task_path = self.task_path + Path(task_path).mkdir(exist_ok=True) + with open(Path(task_path) / train_script_name, "w") as fp: + json.dump(self.idict_v2, fp, indent=4) + task_name = self.task_name + work_dir = Path(task_name) + empty_data = Path("foo") + empty_data.mkdir(exist_ok=True) + + ptrain = RunNvNMDTrain() + out = ptrain.execute( + OPIO( + { + "config": config, + "task_name": task_name, + "task_path": Path(task_path), + "init_model": Path(self.init_model), + "init_data": [Path(ii) for ii in self.init_data], + "iter_data": [empty_data], + } + ) + ) + self.assertEqual(out["script"], work_dir / train_script_name) + self.assertEqual( + out["model"] / "frozen_model.pb", + work_dir / "nvnmd_models/frozen_model.pb", + ) + self.assertEqual( + out["model"] / "model.pb", + work_dir / "nvnmd_models/model.pb", + ) + self.assertEqual( + out["model"] / "model.ckpt.data-00000-of-00001", + work_dir / "nvnmd_models/model.ckpt.data-00000-of-00001", + ) + self.assertEqual( + out["model"] / "model.ckpt.meta", + work_dir / "nvnmd_models/model.ckpt.meta", + ) + self.assertEqual( + out["model"] / "model.ckpt.index", + work_dir / "nvnmd_models/model.ckpt.index", + ) + self.assertEqual(out["lcurve"], work_dir / "nvnmd_cnn/lcurve.out") + self.assertEqual(out["log"], work_dir / "train.log") + + calls = [ + call(["dp", "train-nvnmd", train_cnn_script_name, "-s", "s1"]), + call(["dp", "train-nvnmd", train_qnn_script_name, "-s", "s2"]), + ] + mocked_run.assert_has_calls(calls) + + self.assertTrue(work_dir.is_dir()) + self.assertTrue(out["log"].is_file()) + self.assertEqual( + out["log"].read_text(), + "#=================== train_cnn std out ===================\n" + "foo\n" + "#=================== train_cnn std err ===================\n" + "#=================== train_qnn std out ===================\n" + "bar\n" + "#=================== train_qnn std err ===================\n", + ) + with open(out["script"]) as fp: + jdata = json.load(fp) + self.assertDictEqual(jdata, self.expected_odict_v2) diff --git a/tests/test_prep_run_dp_train.py b/tests/test_prep_run_dp_train.py index 536ca4b9..b137070b 100644 --- a/tests/test_prep_run_dp_train.py +++ b/tests/test_prep_run_dp_train.py @@ -53,8 +53,11 @@ MockedPrepDPTrain, MockedRunDPTrain, MockedRunDPTrainNoneInitModel, + MockedRunNvNMDTrain, + MockedRunNvNMDTrainNoneInitModel, make_mocked_init_data, make_mocked_init_models, + make_mocked_init_nvnmd_models, mocked_numb_models, mocked_template_script, ) @@ -132,6 +135,19 @@ def _check_model( tcase.assertEqual(flines[ii + 1], mlines[ii]) +def _check_nvnmd_model_files(tcase, cwd, init_model): + """Helper to check all nvnmd model files.""" + model_checks = [ + ("nvnmd_models/frozen_model.pb", init_model / "frozen_model.pb"), + ("nvnmd_models/model.pb", init_model / "frozen_model.pb"), + ("nvnmd_models/model.ckpt.meta", init_model / "model.ckpt.meta"), + ("nvnmd_models/model.ckpt.data-00000-of-00001", init_model / "model.ckpt.data"), + ("nvnmd_models/model.ckpt.index", init_model / "model.ckpt.index"), + ] + for output_file, expected_file in model_checks: + _check_model(tcase, output_file, cwd, expected_file) + + def _check_lcurve( tcase, fname, @@ -173,6 +189,32 @@ def check_run_train_dp_output( os.chdir(cwd) +def check_run_train_nvnmd_output( + tcase, + work_dir, + script, + init_model, + init_data, + iter_data, + only_check_name=False, +): + cwd = os.getcwd() + os.chdir(work_dir) + _check_log( + tcase, + "log", + cwd, + script, + init_model, + init_data, + iter_data, + only_check_name=only_check_name, + ) + _check_nvnmd_model_files(tcase, cwd, init_model) + _check_lcurve(tcase, "nvnmd_cnn/lcurve.out", cwd, script) + os.chdir(cwd) + + class TestMockedPrepDPTrain(unittest.TestCase): def setUp(self): self.numb_models = mocked_numb_models @@ -270,6 +312,95 @@ def test(self): ) +class TestMockedRunNvNMDTrain(unittest.TestCase): + def setUp(self): + self.numb_models = mocked_numb_models + + self.init_models = make_mocked_init_nvnmd_models(self.numb_models) + + tmp_init_data = make_mocked_init_data() + self.init_data = tmp_init_data + + tmp_iter_data = [Path("iter_data/foo"), Path("iter_data/bar")] + for ii in tmp_iter_data: + ii.mkdir(exist_ok=True, parents=True) + (ii / "a").write_text("data a") + (ii / "b").write_text("data b") + self.iter_data = tmp_iter_data + + self.template_script = mocked_template_script.copy() + + self.task_names = ["task.0000", "task.0001", "task.0002"] + self.task_paths = [Path(ii) for ii in self.task_names] + self.train_scripts = [ + Path("task.0000/input.json"), + Path("task.0001/input.json"), + Path("task.0002/input.json"), + ] + + for ii in range(3): + Path(self.task_names[ii]).mkdir(exist_ok=True, parents=True) + Path(self.train_scripts[ii]).write_text("{}") + + def tearDown(self): + for ii in ["init_data", "iter_data"] + self.task_names: + if Path(ii).exists(): + shutil.rmtree(str(ii)) + for ii in self.init_models: + if Path(ii).exists(): + shutil.rmtree(ii) + + def test(self): + for ii in range(3): + run = MockedRunNvNMDTrain() + ip = OPIO( + { + "config": {}, + "task_name": self.task_names[ii], + "task_path": self.task_paths[ii], + "init_model": self.init_models[ii], + "init_data": self.init_data, + "iter_data": self.iter_data, + } + ) + op = run.execute(ip) + self.assertEqual(op["script"], Path(train_task_pattern % ii) / "input.json") + self.assertTrue(op["script"].is_file()) + self.assertEqual( + op["model"] / "frozen_model.pb", + Path(train_task_pattern % ii) / "nvnmd_models/frozen_model.pb", + ) + self.assertEqual( + op["model"] / "model.pb", + Path(train_task_pattern % ii) / "nvnmd_models/model.pb", + ) + self.assertEqual( + op["model"] / "model.ckpt.meta", + Path(train_task_pattern % ii) / "nvnmd_models/model.ckpt.meta", + ) + self.assertEqual( + op["model"] / "model.ckpt.data-00000-of-00001", + Path(train_task_pattern % ii) + / "nvnmd_models/model.ckpt.data-00000-of-00001", + ) + self.assertEqual( + op["model"] / "model.ckpt.index", + Path(train_task_pattern % ii) / "nvnmd_models/model.ckpt.index", + ) + self.assertEqual(op["log"], Path(train_task_pattern % ii) / "log") + self.assertEqual( + op["lcurve"], Path(train_task_pattern % ii) / "nvnmd_cnn/lcurve.out" + ) + check_run_train_nvnmd_output( + self, + self.task_names[ii], + self.train_scripts[ii], + self.init_models[ii], + self.init_data, + self.iter_data, + ) + + @unittest.skipIf(skip_ut_with_dflow, skip_ut_with_dflow_reason) class TestTrainDp(unittest.TestCase): def setUp(self): @@ -449,3 +580,127 @@ def test_finetune(self): self.path_iter_data, only_check_name=True, ) + + +@unittest.skipIf(skip_ut_with_dflow, skip_ut_with_dflow_reason) +class TestTrainNvNMD(unittest.TestCase): + def setUp(self): + self.numb_models = mocked_numb_models + + tmp_models = make_mocked_init_nvnmd_models(self.numb_models) + self.init_models = upload_artifact(tmp_models) + self.str_init_models = tmp_models + + tmp_init_data = make_mocked_init_data() + self.init_data = upload_artifact(tmp_init_data) + self.path_init_data = tmp_init_data + + tmp_iter_data = [Path("iter_data/foo"), Path("iter_data/bar")] + for ii in tmp_iter_data: + ii.mkdir(exist_ok=True, parents=True) + (ii / "a").write_text("data a") + (ii / "b").write_text("data b") + self.iter_data = upload_artifact(tmp_iter_data) + self.path_iter_data = tmp_iter_data + + self.template_script = mocked_template_script.copy() + + self.task_names = ["task.0000", "task.0001", "task.0002"] + self.task_paths = [Path(ii) for ii in self.task_names] + self.train_scripts = [ + Path("task.0000/input.json"), + Path("task.0001/input.json"), + Path("task.0002/input.json"), + ] + + def tearDown(self): + for ii in ["init_data", "iter_data"] + self.task_names: + if Path(ii).exists(): + shutil.rmtree(str(ii)) + for ii in self.str_init_models: + if Path(ii).exists(): + shutil.rmtree(ii) + + def test_train(self): + steps = PrepRunDPTrain( + "train-steps", + MockedPrepDPTrain, + MockedRunNvNMDTrain, + upload_python_packages=upload_python_packages, + prep_config=default_config, + run_config=default_config, + ) + train_step = Step( + "train-step", + template=steps, + parameters={ + "numb_models": self.numb_models, + "template_script": self.template_script, + "train_config": {}, + }, + artifacts={ + "init_models": self.init_models, + "init_data": self.init_data, + "iter_data": self.iter_data, + }, + ) + wf = Workflow(name="nvnmd-train", host=default_host) + wf.add(train_step) + wf.submit() + + while wf.query_status() in ["Pending", "Running"]: + time.sleep(4) + + self.assertEqual(wf.query_status(), "Succeeded") + step = wf.query_step(name="train-step")[0] + self.assertEqual(step.phase, "Succeeded") + + download_artifact(step.outputs.artifacts["scripts"]) + download_artifact(step.outputs.artifacts["models"]) + download_artifact(step.outputs.artifacts["logs"]) + download_artifact(step.outputs.artifacts["lcurves"]) + + for ii in range(3): + check_run_train_nvnmd_output( + self, + self.task_names[ii], + self.train_scripts[ii], + self.str_init_models[ii], + self.path_init_data, + self.path_iter_data, + only_check_name=True, + ) + + def test_train_no_init_model(self): + steps = PrepRunDPTrain( + "train-steps", + MockedPrepDPTrain, + MockedRunNvNMDTrainNoneInitModel, + upload_python_packages=upload_python_packages, + prep_config=default_config, + run_config=default_config, + ) + train_step = Step( + "train-step", + template=steps, + parameters={ + "numb_models": self.numb_models, + "template_script": self.template_script, + "train_config": {}, + }, + artifacts={ + "init_models": None, + "init_data": self.init_data, + "iter_data": self.iter_data, + }, + ) + wf = Workflow(name="nvnmd-train", host=default_host) + wf.add(train_step) + wf.submit() + + while wf.query_status() in ["Pending", "Running"]: + time.sleep(4) + + self.assertEqual(wf.query_status(), "Succeeded") + step = wf.query_step(name="train-step")[0] + self.assertEqual(step.phase, "Succeeded") diff --git a/tests/test_prep_run_lmp.py b/tests/test_prep_run_lmp.py index 3b350240..db3ae06c 100644 --- a/tests/test_prep_run_lmp.py +++ b/tests/test_prep_run_lmp.py @@ -53,6 +53,7 @@ ) from mocked_ops import ( MockedRunLmp, + MockedRunNvNMD, mocked_numb_models, ) @@ -218,6 +219,77 @@ def test(self): self.check_run_lmp_output(self.task_list_str[ii], self.model_list) +class TestMockedRunNvNMD(unittest.TestCase): + def setUp(self): + self.ntask = 2 + self.nmodels = 3 + self.task_list = [] + self.model_list = [] + for ii in range(self.ntask): + work_path = Path(lmp_task_pattern % ii) + work_path.mkdir(exist_ok=True, parents=True) + (work_path / lmp_conf_name).write_text(f"conf {ii}") + (work_path / lmp_input_name).write_text(f"input {ii}") + self.task_list.append(work_path) + for ii in range(self.nmodels): + model = Path(f"model{ii}.pb") + model.write_text(f"model {ii}") + self.model_list.append(model) + + def check_run_lmp_output( + self, + task_name: str, + models: List[Path], + ): + cwd = os.getcwd() + os.chdir(task_name) + fc = [] + for ii in [lmp_conf_name, lmp_input_name] + [ii.name for ii in models]: + fc.append(Path(ii).read_text()) + self.assertEqual(fc, Path(lmp_log_name).read_text().strip().split("\n")) + self.assertEqual( + f"traj of {task_name}", Path(lmp_traj_name).read_text().split("\n")[0] + ) + self.assertEqual( + f"model_devi of {task_name}", Path(lmp_model_devi_name).read_text() + ) + os.chdir(cwd) + + def tearDown(self): + for ii in range(self.ntask): + work_path = Path(lmp_task_pattern % ii) + if work_path.is_dir(): + shutil.rmtree(work_path) + for ii in range(self.nmodels): + model = Path(f"model{ii}.pb") + if model.is_file(): + os.remove(model) + + def test(self): + self.task_list_str = [str(ii) for ii in self.task_list] + self.model_list_str = [str(ii) for ii in self.model_list] + for ii in range(self.ntask): + ip = OPIO( + { + "task_name": self.task_list_str[ii], + "task_path": self.task_list[ii], + "models": self.model_list, + "config": {}, + } + ) + op = MockedRunNvNMD() + out = op.execute(ip) + self.assertEqual(out["log"], Path(f"task.{ii:06d}") / lmp_log_name) + self.assertEqual(out["traj"], Path(f"task.{ii:06d}") / lmp_traj_name) + self.assertEqual( + out["model_devi"], Path(f"task.{ii:06d}") / lmp_model_devi_name + ) + self.assertTrue(out["log"].is_file()) + self.assertTrue(out["traj"].is_file()) + self.assertTrue(out["model_devi"].is_file()) + self.check_run_lmp_output(self.task_list_str[ii], self.model_list) + + # @unittest.skip("temp") @unittest.skipIf(skip_ut_with_dflow, skip_ut_with_dflow_reason) class TestPrepRunLmp(unittest.TestCase): diff --git a/tests/utils/test_dl_dpgen2_arti.py b/tests/utils/test_dl_dpgen2_arti.py index c1166678..a9f251ac 100644 --- a/tests/utils/test_dl_dpgen2_arti.py +++ b/tests/utils/test_dl_dpgen2_arti.py @@ -28,6 +28,9 @@ class MockedArti: + def get(self, key): + return self.__getitem__(key) + def __getitem__( self, key, diff --git a/tests/utils/test_dl_dpgen2_arti_by_def.py b/tests/utils/test_dl_dpgen2_arti_by_def.py index 91d35e93..e6a30a32 100644 --- a/tests/utils/test_dl_dpgen2_arti_by_def.py +++ b/tests/utils/test_dl_dpgen2_arti_by_def.py @@ -30,6 +30,9 @@ class MockedArti: + def get(self, key): + return self.__getitem__(key) + def __getitem__( self, key,