-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathtrain_se3_flows.py
100 lines (82 loc) · 3.43 KB
/
train_se3_flows.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
Code adapted from
https://github.com/microsoft/protein-frame-flow/blob/main/experiments/train_se3_flows.py
"""
import os
import GPUtil
import torch
import hydra
from omegaconf import DictConfig, OmegaConf
from pytorch_lightning import Trainer
from pytorch_lightning.loggers.wandb import WandbLogger
from pytorch_lightning.trainer import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from src.data.pdb_na_datamodule_base import PDBNABaseDataModule
from src.models.flow_module import FlowModule
import src.utils as eu
import wandb
log = eu.get_pylogger(__name__)
torch.set_float32_matmul_precision('high')
class Experiment:
def __init__(self, *, cfg: DictConfig):
self._cfg = cfg
self._data_cfg = cfg.data_cfg
self._exp_cfg = cfg.experiment
self._model = FlowModule(self._cfg)
self._datamodule = PDBNABaseDataModule(data_cfg=self._data_cfg)
def train(self):
callbacks = []
if self._exp_cfg.debug:
log.info("Debug mode.")
logger = None
self._exp_cfg.num_devices = 1
self._data_cfg.loader.num_workers = 0
else:
logger = WandbLogger(**self._exp_cfg.wandb,)
# Checkpoint directory
ckpt_dir = self._exp_cfg.checkpointer.dirpath
os.makedirs(ckpt_dir, exist_ok=True)
log.info(f"Checkpoints saved to {ckpt_dir}")
# Model checkpoints
callbacks.append(ModelCheckpoint(**self._exp_cfg.checkpointer))
# Save config
cfg_path = os.path.join(ckpt_dir, 'config.yaml')
with open(cfg_path, 'w') as f:
OmegaConf.save(config=self._cfg, f=f.name)
cfg_dict = OmegaConf.to_container(self._cfg, resolve=True)
flat_cfg = dict(eu.flatten_dict(cfg_dict))
if isinstance(logger.experiment.config, wandb.sdk.wandb_config.Config):
logger.experiment.config.update(flat_cfg)
devices = GPUtil.getAvailable(order='memory', limit = 8)[:self._exp_cfg.num_devices]
log.info(f"Using devices: {devices}")
trainer = Trainer(
**self._exp_cfg.trainer,
callbacks=callbacks,
logger=logger,
use_distributed_sampler=False,
enable_progress_bar=True,
enable_model_summary=True,
devices=devices,
)
trainer.fit(
model=self._model,
datamodule=self._datamodule,
ckpt_path=self._exp_cfg.warm_start
)
@hydra.main(version_base=None, config_path="./configs", config_name="config")
def main(cfg: DictConfig):
if cfg.experiment.warm_start is not None and cfg.experiment.warm_start_cfg_override:
# Loads warm start config.
warm_start_cfg_path = os.path.join(os.path.dirname(cfg.experiment.warm_start), 'config.yaml')
warm_start_cfg = OmegaConf.load(warm_start_cfg_path)
# Warm start config may not have latest fields in the base config.
# Add these fields to the warm start config.
OmegaConf.set_struct(cfg.model, False)
OmegaConf.set_struct(warm_start_cfg.model, False)
cfg.model = OmegaConf.merge(cfg.model, warm_start_cfg.model)
OmegaConf.set_struct(cfg.model, True)
log.info(f'Loaded warm start config from {warm_start_cfg_path}')
exp = Experiment(cfg=cfg)
exp.train()
if __name__ == "__main__":
main()