-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_proxy_ZINC.py
91 lines (80 loc) · 3.66 KB
/
main_proxy_ZINC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import logging
import os
import custom_graphgym # noqa, register custom modules
import torch
from torch_geometric import seed_everything
from torch_geometric.graphgym.cmd_args import parse_args
from torch_geometric.graphgym.config import (cfg, dump_cfg, load_cfg,
set_agg_dir, set_run_dir)
from torch_geometric.graphgym.logger import create_logger, set_printing
from torch_geometric.graphgym.model_builder import create_model
from torch_geometric.graphgym.optim import create_optimizer, create_scheduler
from torch_geometric.graphgym.register import train_dict
from torch_geometric.graphgym.proxy_rm import proxy_rm
from torch_geometric.graphgym.utils.agg_runs import agg_runs
from torch_geometric.graphgym.utils.comp_budget import params_count
from torch_geometric.graphgym.utils.device import auto_select_device
from torch_geometric.graphgym.golden_model_train import attach_golden_vec_ZINC, attach_random_vec
from torch_geometric.graphgym.loader import get_loader
from torch_geometric.datasets import ZINC
"""
本函数是采用了多层hook但是经过MLP过滤以后的pipeline, 重点是采用的是ZINC数据集
"""
def create_loader():
"""
Create data loader object
Returns: List of PyTorch data loaders
"""
dataset_train = ZINC(root='./datasets/ZINC', split='train', subset=True)
dataset_test = ZINC(root='./datasets/ZINC/', split='test', subset=True)
dataset_val = ZINC(root='./datasets/ZINC/', split='val', subset=True)
# train loader
loaders = [get_loader(dataset_train, cfg.train.sampler, cfg.train.batch_size,
shuffle=True)]
# val and test loaders
loaders.append(
get_loader(dataset_val, cfg.val.sampler, cfg.train.batch_size,
shuffle=False))
loaders.append(
get_loader(dataset_test, cfg.val.sampler, cfg.train.batch_size,
shuffle=False))
return loaders
if __name__ == '__main__':
# Load cmd line args
args = parse_args()
# Load config file
load_cfg(cfg, args) # 这里cfg由命令行进行指定,是一个.yaml文件
# Set Pytorch environment
torch.set_num_threads(cfg.num_threads)
dump_cfg(cfg)
# Repeat for different random seeds
auto_select_device()
loaders = create_loader() # list of loaders, they are divided from original dataset according to 'train' 'test' and 'val'
attach_golden_vec_ZINC(loaders)
for i in range(args.repeat):
set_run_dir(cfg.out_dir, args.cfg_file)
set_printing()
# Set configurations for each run
cfg.seed = cfg.seed + 1
cfg.share.num_splits = 3
# seed_everything(cfg.seed) # Sets the seed for generating random numbers in PyTorch, numpy and Python.实际上就是生成随机数,避免训练结果相同
# Set machine learning pipeline
loggers = create_logger()
model = create_model()
optimizer = create_optimizer(model.parameters(), cfg.optim)
scheduler = create_scheduler(optimizer, cfg.optim)
# Print model info
logging.info(model)
logging.info(cfg)
cfg.params = params_count(model)
logging.info('Num parameters: %s', cfg.params)
if cfg.train.mode == 'standard':
proxy_rm(loggers, loaders, model, optimizer, scheduler)
else:
train_dict[cfg.train.mode](loggers, loaders, model, optimizer,
scheduler)
# Aggregate results from different seeds
agg_runs(set_agg_dir(cfg.out_dir, args.cfg_file), cfg.metric_best)
# When being launched in batch mode, mark a yaml as done
if args.mark_done:
os.rename(args.cfg_file, f'{args.cfg_file}_done')