Skip to content

Commit

Permalink
Fixed test case for Breakout learning with DDDQN.
Browse files Browse the repository at this point in the history
  • Loading branch information
sven1977 committed Oct 15, 2019
1 parent f190fe6 commit 82e6688
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 117 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ algorithm is 40 lines at its core - not counting comments, import statements and
self.memory.add_records(dict(s=self.s, a=self.a, r=r, s_=s_, t=t)) # s_ = s'; t = is s' terminal?
```

[![asdsd](https://img.shields.io/badge/python-3.7-orange.svg)](https://www.python.org/downloads/release/python-374/)


```
# Handle ε-greedy exploration (decay).
Expand Down

This file was deleted.

2 changes: 1 addition & 1 deletion surreal/tests/algos/configs/dddqn_breakout_learning.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

"n_step": 4,

"memory_capacity": 75000,
"memory_capacity": 101376,
"memory_batch_size": 256,

"epsilon": {
Expand Down
41 changes: 2 additions & 39 deletions surreal/tests/algos/learning_up_to_1d/test_dddqn_long_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,48 +87,11 @@ def action_map(a):

env.terminate()

def test_learning_on_breakout_WORKING___YAY(self):
# Create an Env object.
env = OpenAIGymEnv(
"Breakout-v4", actors=16, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8
)

preprocessor = Preprocessor(
ImageCrop(x=5, y=29, width=150, height=167),
GrayScale(keepdims=True),
ImageResize(width=84, height=84, interpolation="bilinear"),
lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32), # simple preprocessor: [0,255] to [-1.0,1.0]
Sequence(sequence_length=4, adddim=False)
)
# Create a DQN2015Config.
config = DDDQNConfig.make(
"../configs/WORKING___YAY_dddqn_breakout_learning.json",
preprocessor=preprocessor,
state_space=env.actors[0].state_space,
action_space=env.actors[0].action_space
)
# Create an Algo object.
algo = DDDQN(config=config, name="my-dddqn")

# Point actor(s) to the algo.
for actor in env.actors:
actor.set_algo(algo)

# Run and wait for env to complete.
env.run(actor_time_steps=10000000, sync=True, render=debug.RenderEnvInLearningTests)

# Check last n episode returns.
n = 10
mean_last_n = np.mean(env.historic_episodes_returns[-n:])
print("Avg return over last {} episodes: {}".format(n, mean_last_n))
self.assertTrue(mean_last_n > 150.0)

env.terminate()

def test_learning_on_breakout(self):
# Create an Env object.
env = OpenAIGymEnv(
"Breakout-v4", actors=1, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8
"Breakout-v4", actors=16, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8,
frame_skip=(2, 5)
)

preprocessor = Preprocessor(
Expand Down

0 comments on commit 82e6688

Please sign in to comment.