Skip to content

Commit

Permalink
Format .jl files
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] authored Aug 28, 2021
1 parent cce2dbc commit 44e8e2a
Show file tree
Hide file tree
Showing 112 changed files with 1,593 additions and 1,359 deletions.
10 changes: 8 additions & 2 deletions docs/experiments/experiments/CFR/JuliaRL_DeepCFR_OpenSpiel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,11 @@ function RL.Experiment(
batch_size_Π = 2048,
initializer = glorot_normal(CUDA.CURAND.default_rng()),
)
Experiment(p, env, StopAfterStep(500, is_show_progress=!haskey(ENV, "CI")), EmptyHook(), "# run DeepcCFR on leduc_poker")
end
Experiment(
p,
env,
StopAfterStep(500, is_show_progress = !haskey(ENV, "CI")),
EmptyHook(),
"# run DeepcCFR on leduc_poker",
)
end
10 changes: 8 additions & 2 deletions docs/experiments/experiments/CFR/JuliaRL_TabularCFR_OpenSpiel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,14 @@ function RL.Experiment(
π = TabularCFRPolicy(; rng = rng)

description = "# Play `$game` in OpenSpiel with TabularCFRPolicy"
Experiment(π, env, StopAfterStep(300, is_show_progress=!haskey(ENV, "CI")), EmptyHook(), description)
Experiment(
π,
env,
StopAfterStep(300, is_show_progress = !haskey(ENV, "CI")),
EmptyHook(),
description,
)
end

ex = E`JuliaRL_TabularCFR_OpenSpiel(kuhn_poker)`
run(ex)
run(ex)
74 changes: 35 additions & 39 deletions docs/experiments/experiments/DQN/Dopamine_DQN_Atari.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,39 +79,35 @@ function atari_env_factory(
repeat_action_probability = 0.25,
n_replica = nothing,
)
init(seed) =
RewardOverriddenEnv(
StateCachedEnv(
StateTransformedEnv(
AtariEnv(;
name = string(name),
grayscale_obs = true,
noop_max = 30,
frame_skip = 4,
terminal_on_life_loss = false,
repeat_action_probability = repeat_action_probability,
max_num_frames_per_episode = n_frames * max_episode_steps,
color_averaging = false,
full_action_space = false,
seed = seed,
);
state_mapping=Chain(
ResizeImage(state_size...),
StackFrames(state_size..., n_frames)
),
state_space_mapping= _ -> Space(fill(0..256, state_size..., n_frames))
)
init(seed) = RewardOverriddenEnv(
StateCachedEnv(
StateTransformedEnv(
AtariEnv(;
name = string(name),
grayscale_obs = true,
noop_max = 30,
frame_skip = 4,
terminal_on_life_loss = false,
repeat_action_probability = repeat_action_probability,
max_num_frames_per_episode = n_frames * max_episode_steps,
color_averaging = false,
full_action_space = false,
seed = seed,
);
state_mapping = Chain(
ResizeImage(state_size...),
StackFrames(state_size..., n_frames),
),
state_space_mapping = _ -> Space(fill(0..256, state_size..., n_frames)),
),
r -> clamp(r, -1, 1)
)
),
r -> clamp(r, -1, 1),
)

if isnothing(n_replica)
init(seed)
else
envs = [
init(isnothing(seed) ? nothing : hash(seed + i))
for i in 1:n_replica
]
envs = [init(isnothing(seed) ? nothing : hash(seed + i)) for i in 1:n_replica]
states = Flux.batch(state.(envs))
rewards = reward.(envs)
terminals = is_terminated.(envs)
Expand Down Expand Up @@ -172,7 +168,7 @@ function RL.Experiment(
::Val{:Atari},
name::AbstractString;
save_dir = nothing,
seed = nothing
seed = nothing,
)
rng = Random.GLOBAL_RNG
Random.seed!(rng, seed)
Expand All @@ -190,7 +186,7 @@ function RL.Experiment(
name,
STATE_SIZE,
N_FRAMES;
seed = isnothing(seed) ? nothing : hash(seed + 1)
seed = isnothing(seed) ? nothing : hash(seed + 1),
)
N_ACTIONS = length(action_space(env))
init = glorot_uniform(rng)
Expand Down Expand Up @@ -254,17 +250,15 @@ function RL.Experiment(
end,
DoEveryNEpisode() do t, agent, env
with_logger(lg) do
@info "training" episode_length = step_per_episode.steps[end] reward = reward_per_episode.rewards[end] log_step_increment = 0
@info "training" episode_length = step_per_episode.steps[end] reward =
reward_per_episode.rewards[end] log_step_increment = 0
end
end,
DoEveryNStep(;n=EVALUATION_FREQ) do t, agent, env
DoEveryNStep(; n = EVALUATION_FREQ) do t, agent, env
@info "evaluating agent at $t step..."
p = agent.policy
p = @set p.explorer = EpsilonGreedyExplorer(0.001; rng = rng) # set evaluation epsilon
h = ComposedHook(
TotalOriginalRewardPerEpisode(),
StepsPerEpisode(),
)
h = ComposedHook(TotalOriginalRewardPerEpisode(), StepsPerEpisode())
s = @elapsed run(
p,
atari_env_factory(
Expand All @@ -281,16 +275,18 @@ function RL.Experiment(
avg_score = mean(h[1].rewards[1:end-1])
avg_length = mean(h[2].steps[1:end-1])

@info "finished evaluating agent in $s seconds" avg_length = avg_length avg_score = avg_score
@info "finished evaluating agent in $s seconds" avg_length = avg_length avg_score =
avg_score
with_logger(lg) do
@info "evaluating" avg_length = avg_length avg_score = avg_score log_step_increment = 0
@info "evaluating" avg_length = avg_length avg_score = avg_score log_step_increment =
0
end
end,
)

stop_condition = StopAfterStep(
haskey(ENV, "CI") ? 1_000 : 50_000_000,
is_show_progress=!haskey(ENV, "CI")
is_show_progress = !haskey(ENV, "CI"),
)
Experiment(agent, env, stop_condition, hook, "# DQN <-> Atari($name)")
end
Expand Down
73 changes: 38 additions & 35 deletions docs/experiments/experiments/DQN/Dopamine_IQN_Atari.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,39 +84,35 @@ function atari_env_factory(
repeat_action_probability = 0.25,
n_replica = nothing,
)
init(seed) =
RewardOverriddenEnv(
StateCachedEnv(
StateTransformedEnv(
AtariEnv(;
name = string(name),
grayscale_obs = true,
noop_max = 30,
frame_skip = 4,
terminal_on_life_loss = false,
repeat_action_probability = repeat_action_probability,
max_num_frames_per_episode = n_frames * max_episode_steps,
color_averaging = false,
full_action_space = false,
seed = seed,
);
state_mapping=Chain(
ResizeImage(state_size...),
StackFrames(state_size..., n_frames)
),
state_space_mapping= _ -> Space(fill(0..256, state_size..., n_frames))
)
init(seed) = RewardOverriddenEnv(
StateCachedEnv(
StateTransformedEnv(
AtariEnv(;
name = string(name),
grayscale_obs = true,
noop_max = 30,
frame_skip = 4,
terminal_on_life_loss = false,
repeat_action_probability = repeat_action_probability,
max_num_frames_per_episode = n_frames * max_episode_steps,
color_averaging = false,
full_action_space = false,
seed = seed,
);
state_mapping = Chain(
ResizeImage(state_size...),
StackFrames(state_size..., n_frames),
),
state_space_mapping = _ -> Space(fill(0..256, state_size..., n_frames)),
),
r -> clamp(r, -1, 1)
)
),
r -> clamp(r, -1, 1),
)

if isnothing(n_replica)
init(seed)
else
envs = [
init(isnothing(seed) ? nothing : hash(seed + i))
for i in 1:n_replica
]
envs = [init(isnothing(seed) ? nothing : hash(seed + i)) for i in 1:n_replica]
states = Flux.batch(state.(envs))
rewards = reward.(envs)
terminals = is_terminated.(envs)
Expand Down Expand Up @@ -195,7 +191,12 @@ function RL.Experiment(
N_FRAMES = 4
STATE_SIZE = (84, 84)

env = atari_env_factory(name, STATE_SIZE, N_FRAMES; seed = isnothing(seed) ? nothing : hash(seed + 2))
env = atari_env_factory(
name,
STATE_SIZE,
N_FRAMES;
seed = isnothing(seed) ? nothing : hash(seed + 2),
)
N_ACTIONS = length(action_space(env))
Nₑₘ = 64

Expand Down Expand Up @@ -250,7 +251,7 @@ function RL.Experiment(
),
),
trajectory = CircularArraySARTTrajectory(
capacity = haskey(ENV, "CI") : 1_000 : 1_000_000,
capacity = haskey(ENV, "CI"):1_000:1_000_000,
state = Matrix{Float32} => STATE_SIZE,
),
)
Expand All @@ -274,7 +275,7 @@ function RL.Experiment(
steps_per_episode.steps[end] log_step_increment = 0
end
end,
DoEveryNStep(;n=EVALUATION_FREQ) do t, agent, env
DoEveryNStep(; n = EVALUATION_FREQ) do t, agent, env
@info "evaluating agent at $t step..."
p = agent.policy
p = @set p.explorer = EpsilonGreedyExplorer(0.001; rng = rng) # set evaluation epsilon
Expand All @@ -286,7 +287,7 @@ function RL.Experiment(
STATE_SIZE,
N_FRAMES,
MAX_EPISODE_STEPS_EVAL;
seed = isnothing(seed) ? nothing : hash(seed + t)
seed = isnothing(seed) ? nothing : hash(seed + t),
),
StopAfterStep(125_000; is_show_progress = false),
h,
Expand All @@ -295,16 +296,18 @@ function RL.Experiment(
avg_score = mean(h[1].rewards[1:end-1])
avg_length = mean(h[2].steps[1:end-1])

@info "finished evaluating agent in $s seconds" avg_length = avg_length avg_score = avg_score
@info "finished evaluating agent in $s seconds" avg_length = avg_length avg_score =
avg_score
with_logger(lg) do
@info "evaluating" avg_length = avg_length avg_score = avg_score log_step_increment = 0
@info "evaluating" avg_length = avg_length avg_score = avg_score log_step_increment =
0
end
end,
)

stop_condition = StopAfterStep(
haskey(ENV, "CI") ? 10_000 : 50_000_000,
is_show_progress=!haskey(ENV, "CI")
is_show_progress = !haskey(ENV, "CI"),
)
Experiment(agent, env, stop_condition, hook, "# IQN <-> Atari($name)")
end
Expand Down
Loading

0 comments on commit 44e8e2a

Please sign in to comment.