-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_cmaes.py
121 lines (95 loc) · 3.93 KB
/
run_cmaes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
Implementation of Covariance Matrix Adaptation Evolution Strategy (CMA-ES) in Pytorch
This module is designed to train a model for standard Gym environments.
Copyright: Pavel B. Chernov, [email protected]
Date: Dec 2020 - June 2021
License: MIT
"""
import copy
import torch.nn as nn
import multiprocessing as mp
from init import *
from optim.cmaes import CMAES
from optim.models import Models
def scoring_function(env: gym.Env,
model: nn.Module,
population: Sequence[torch.Tensor],
n_iter: int,
episodes_per_iteration: int = 10,
cpu_limit: int = 2
) -> Sequence[float]:
"""Evaluate a set of different possible weights for a model"""
# Initialize a list of models with weights from specified population
models = []
for x in population:
entity_model = copy.deepcopy(model)
torch.nn.utils.vector_to_parameters(x, entity_model.parameters())
models.append(entity_model)
# Construct batch
args = []
for model in models:
for i in range(episodes_per_iteration):
seed = n_iter * episodes_per_iteration + i
args.append((env, model, seed, False))
# Run parallel or sequential
if cpu_limit > 1:
with mp.Pool(cpu_limit) as pool:
rewards = pool.starmap(run_episode, args)
else:
rewards = [run_episode(*a) for a in args]
# Compute mean reward for each entity over episodes
scores = np.array(rewards).reshape(len(models), episodes_per_iteration).mean(axis=1, dtype=np.float).tolist()
# Sometimes show screen
if n_iter % 2 == 0:
run_episode(env, model, render=True)
return scores
def run_episode(env: gym.Env, model: nn.Module, seed=None, render=False) -> float:
"""Run new episode using provided seed (if any)"""
env.seed(seed)
state = env.reset()
rewards = []
while True:
if render:
env.render()
action = model(state)
action = convert_action(action, env.action_space)
state, reward, done, info = env.step(action)
rewards.append(reward)
if done:
break
env.close()
return float(np.sum(rewards))
def run(env_name: str, model_name: str, hidden_size=16, max_iter=200):
# Prepare environment
env = get_env(env_name)
log.info(f'Env: {env}')
# Initialize model
input_shape = space_shape(env.observation_space)
output_shape = space_shape(env.action_space)
model_class = Models[model_name]
model = model_class(input_shape=input_shape, output_shape=output_shape, hidden_size=hidden_size, norm=False)
log.info(f'Model: {model}')
# Initialize optimizer
autosave_prefix = f'cmaes_{model_name.lower()}_{env_name.split("-", 1)[0].lower()}'
optimizer = CMAES(
initial_point=torch.nn.utils.parameters_to_vector(model.parameters()),
scoring_function=lambda population, optimizer:
scoring_function(env, model, population, n_iter=optimizer.n_iter, cpu_limit=N_WORKERS),
autosave_dir=WORK_DIR,
autosave_prefix=autosave_prefix,
autosave_interval=timedelta(minutes=1),
log=log
)
log.info(f'Optimizer: {optimizer}')
# Run the optimization
optimizer.fit(max_iter=max_iter)
optimizer.autosave(force=True)
if __name__ == '__main__':
# run(env_name='CartPole-v0', model_name='fc1')
# Breakout fails to run due to large model size caused by large input size
# run(env_name='Breakout-v0', model_name='fc1')
# run(env_name='MountainCar-v0', model_name='fc1', max_iter=500)
# run(env_name='MountainCarContinuous-v0', model_name='fc1')
# run(env_name='Acrobot-v1', model_name='fc1', max_iter=100)
# run(env_name='LunarLander-v2', model_name='gru1', max_iter=100)
run(env_name='Breakout-v0', model_name='convgru1', hidden_size=32)