forked from johnjim0816/joyrl-offline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
trainer.py
35 lines (35 loc) · 1.72 KB
/
trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class Trainer:
'''训练
'''
def __init__(self) -> None:
pass
def train_one_episode(self, env, agent, cfg):
ep_reward = 0 # 每一回合的奖励,初始化为 0
ep_step = 0 # 每一回合的步数
state = env.reset() # 重置环境并且获取初始状态
for _ in range(cfg.max_steps):
ep_step += 1 # 累加回合步数
action = agent.sample_action(state) # 抽样动作
next_state, reward, terminated, truncated , info = env.step(action) # 更新环境,返回 transitions
agent.memory.push(state, action, reward,next_state, terminated) # 保存 transitions
agent.update() # 更新智能体
state = next_state # 更新状态
ep_reward += reward # 累加奖励
if terminated:
break
res = {'ep_reward':ep_reward,'ep_step':ep_step}
return agent,res
def test_one_episode(self, env, agent, cfg):
ep_reward = 0 # 每一回合奖励,初始化为 0
ep_step = 0
state = env.reset() # 重置环境并且获取初始状态
for _ in range(cfg.max_steps):
ep_step += 1
action = agent.predict_action(state) # 抽样动作
next_state, reward, terminated, truncated , info = env.step(action) # 更新环境,返回 trasitions
state = next_state # 更新状态
ep_reward += reward # 累加奖励
if terminated:
break
res = {'ep_reward':ep_reward,'ep_step':ep_step}
return agent,res