Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatic JuliaFormatter.jl run #1

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions docs/experiments/experiments/CFR/JuliaRL_DeepCFR_OpenSpiel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,11 @@ function RL.Experiment(
batch_size_Π = 2048,
initializer = glorot_normal(CUDA.CURAND.default_rng()),
)
Experiment(p, env, StopAfterStep(500, is_show_progress=!haskey(ENV, "CI")), EmptyHook(), "# run DeepcCFR on leduc_poker")
end
Experiment(
p,
env,
StopAfterStep(500, is_show_progress = !haskey(ENV, "CI")),
EmptyHook(),
"# run DeepcCFR on leduc_poker",
)
end
10 changes: 8 additions & 2 deletions docs/experiments/experiments/CFR/JuliaRL_TabularCFR_OpenSpiel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,14 @@ function RL.Experiment(
π = TabularCFRPolicy(; rng = rng)

description = "# Play `$game` in OpenSpiel with TabularCFRPolicy"
Experiment(π, env, StopAfterStep(300, is_show_progress=!haskey(ENV, "CI")), EmptyHook(), description)
Experiment(
π,
env,
StopAfterStep(300, is_show_progress = !haskey(ENV, "CI")),
EmptyHook(),
description,
)
end

ex = E`JuliaRL_TabularCFR_OpenSpiel(kuhn_poker)`
run(ex)
run(ex)
77 changes: 37 additions & 40 deletions docs/experiments/experiments/DQN/Dopamine_DQN_Atari.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,44 +79,41 @@ function atari_env_factory(
repeat_action_probability = 0.25,
n_replica = nothing,
)
init(seed) =
RewardOverriddenEnv(
StateCachedEnv(
StateTransformedEnv(
AtariEnv(;
name = string(name),
grayscale_obs = true,
noop_max = 30,
frame_skip = 4,
terminal_on_life_loss = false,
repeat_action_probability = repeat_action_probability,
max_num_frames_per_episode = n_frames * max_episode_steps,
color_averaging = false,
full_action_space = false,
seed = seed,
);
state_mapping=Chain(
ResizeImage(state_size...),
StackFrames(state_size..., n_frames)
),
state_space_mapping= _ -> Space(fill(0..256, state_size..., n_frames))
)
init(seed) = RewardOverriddenEnv(
StateCachedEnv(
StateTransformedEnv(
AtariEnv(;
name = string(name),
grayscale_obs = true,
noop_max = 30,
frame_skip = 4,
terminal_on_life_loss = false,
repeat_action_probability = repeat_action_probability,
max_num_frames_per_episode = n_frames * max_episode_steps,
color_averaging = false,
full_action_space = false,
seed = seed,
);
state_mapping = Chain(
ResizeImage(state_size...),
StackFrames(state_size..., n_frames),
),
state_space_mapping = _ ->
Space(fill(0 .. 256, state_size..., n_frames)),
),
r -> clamp(r, -1, 1)
)
),
r -> clamp(r, -1, 1),
)

if isnothing(n_replica)
init(seed)
else
envs = [
init(isnothing(seed) ? nothing : hash(seed + i))
for i in 1:n_replica
]
envs = [init(isnothing(seed) ? nothing : hash(seed + i)) for i in 1:n_replica]
states = Flux.batch(state.(envs))
rewards = reward.(envs)
terminals = is_terminated.(envs)
A = Space([action_space(x) for x in envs])
S = Space(fill(0..255, size(states)))
S = Space(fill(0 .. 255, size(states)))
MultiThreadEnv(envs, states, rewards, terminals, A, S, nothing)
end
end
Expand Down Expand Up @@ -172,7 +169,7 @@ function RL.Experiment(
::Val{:Atari},
name::AbstractString;
save_dir = nothing,
seed = nothing
seed = nothing,
)
rng = Random.GLOBAL_RNG
Random.seed!(rng, seed)
Expand All @@ -190,7 +187,7 @@ function RL.Experiment(
name,
STATE_SIZE,
N_FRAMES;
seed = isnothing(seed) ? nothing : hash(seed + 1)
seed = isnothing(seed) ? nothing : hash(seed + 1),
)
N_ACTIONS = length(action_space(env))
init = glorot_uniform(rng)
Expand Down Expand Up @@ -254,17 +251,15 @@ function RL.Experiment(
end,
DoEveryNEpisode() do t, agent, env
with_logger(lg) do
@info "training" episode_length = step_per_episode.steps[end] reward = reward_per_episode.rewards[end] log_step_increment = 0
@info "training" episode_length = step_per_episode.steps[end] reward =
reward_per_episode.rewards[end] log_step_increment = 0
end
end,
DoEveryNStep(;n=EVALUATION_FREQ) do t, agent, env
DoEveryNStep(; n = EVALUATION_FREQ) do t, agent, env
@info "evaluating agent at $t step..."
p = agent.policy
p = @set p.explorer = EpsilonGreedyExplorer(0.001; rng = rng) # set evaluation epsilon
h = ComposedHook(
TotalOriginalRewardPerEpisode(),
StepsPerEpisode(),
)
h = ComposedHook(TotalOriginalRewardPerEpisode(), StepsPerEpisode())
s = @elapsed run(
p,
atari_env_factory(
Expand All @@ -281,16 +276,18 @@ function RL.Experiment(
avg_score = mean(h[1].rewards[1:end-1])
avg_length = mean(h[2].steps[1:end-1])

@info "finished evaluating agent in $s seconds" avg_length = avg_length avg_score = avg_score
@info "finished evaluating agent in $s seconds" avg_length = avg_length avg_score =
avg_score
with_logger(lg) do
@info "evaluating" avg_length = avg_length avg_score = avg_score log_step_increment = 0
@info "evaluating" avg_length = avg_length avg_score = avg_score log_step_increment =
0
end
end,
)

stop_condition = StopAfterStep(
haskey(ENV, "CI") ? 1_000 : 50_000_000,
is_show_progress=!haskey(ENV, "CI")
is_show_progress = !haskey(ENV, "CI"),
)
Experiment(agent, env, stop_condition, hook, "# DQN <-> Atari($name)")
end
Expand Down
76 changes: 40 additions & 36 deletions docs/experiments/experiments/DQN/Dopamine_IQN_Atari.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,44 +84,41 @@ function atari_env_factory(
repeat_action_probability = 0.25,
n_replica = nothing,
)
init(seed) =
RewardOverriddenEnv(
StateCachedEnv(
StateTransformedEnv(
AtariEnv(;
name = string(name),
grayscale_obs = true,
noop_max = 30,
frame_skip = 4,
terminal_on_life_loss = false,
repeat_action_probability = repeat_action_probability,
max_num_frames_per_episode = n_frames * max_episode_steps,
color_averaging = false,
full_action_space = false,
seed = seed,
);
state_mapping=Chain(
ResizeImage(state_size...),
StackFrames(state_size..., n_frames)
),
state_space_mapping= _ -> Space(fill(0..256, state_size..., n_frames))
)
init(seed) = RewardOverriddenEnv(
StateCachedEnv(
StateTransformedEnv(
AtariEnv(;
name = string(name),
grayscale_obs = true,
noop_max = 30,
frame_skip = 4,
terminal_on_life_loss = false,
repeat_action_probability = repeat_action_probability,
max_num_frames_per_episode = n_frames * max_episode_steps,
color_averaging = false,
full_action_space = false,
seed = seed,
);
state_mapping = Chain(
ResizeImage(state_size...),
StackFrames(state_size..., n_frames),
),
state_space_mapping = _ ->
Space(fill(0 .. 256, state_size..., n_frames)),
),
r -> clamp(r, -1, 1)
)
),
r -> clamp(r, -1, 1),
)

if isnothing(n_replica)
init(seed)
else
envs = [
init(isnothing(seed) ? nothing : hash(seed + i))
for i in 1:n_replica
]
envs = [init(isnothing(seed) ? nothing : hash(seed + i)) for i in 1:n_replica]
states = Flux.batch(state.(envs))
rewards = reward.(envs)
terminals = is_terminated.(envs)
A = Space([action_space(x) for x in envs])
S = Space(fill(0..255, size(states)))
S = Space(fill(0 .. 255, size(states)))
MultiThreadEnv(envs, states, rewards, terminals, A, S, nothing)
end
end
Expand Down Expand Up @@ -195,7 +192,12 @@ function RL.Experiment(
N_FRAMES = 4
STATE_SIZE = (84, 84)

env = atari_env_factory(name, STATE_SIZE, N_FRAMES; seed = isnothing(seed) ? nothing : hash(seed + 2))
env = atari_env_factory(
name,
STATE_SIZE,
N_FRAMES;
seed = isnothing(seed) ? nothing : hash(seed + 2),
)
N_ACTIONS = length(action_space(env))
Nₑₘ = 64

Expand Down Expand Up @@ -250,7 +252,7 @@ function RL.Experiment(
),
),
trajectory = CircularArraySARTTrajectory(
capacity = haskey(ENV, "CI") : 1_000 : 1_000_000,
capacity = haskey(ENV, "CI"):1_000:1_000_000,
state = Matrix{Float32} => STATE_SIZE,
),
)
Expand All @@ -274,7 +276,7 @@ function RL.Experiment(
steps_per_episode.steps[end] log_step_increment = 0
end
end,
DoEveryNStep(;n=EVALUATION_FREQ) do t, agent, env
DoEveryNStep(; n = EVALUATION_FREQ) do t, agent, env
@info "evaluating agent at $t step..."
p = agent.policy
p = @set p.explorer = EpsilonGreedyExplorer(0.001; rng = rng) # set evaluation epsilon
Expand All @@ -286,7 +288,7 @@ function RL.Experiment(
STATE_SIZE,
N_FRAMES,
MAX_EPISODE_STEPS_EVAL;
seed = isnothing(seed) ? nothing : hash(seed + t)
seed = isnothing(seed) ? nothing : hash(seed + t),
),
StopAfterStep(125_000; is_show_progress = false),
h,
Expand All @@ -295,16 +297,18 @@ function RL.Experiment(
avg_score = mean(h[1].rewards[1:end-1])
avg_length = mean(h[2].steps[1:end-1])

@info "finished evaluating agent in $s seconds" avg_length = avg_length avg_score = avg_score
@info "finished evaluating agent in $s seconds" avg_length = avg_length avg_score =
avg_score
with_logger(lg) do
@info "evaluating" avg_length = avg_length avg_score = avg_score log_step_increment = 0
@info "evaluating" avg_length = avg_length avg_score = avg_score log_step_increment =
0
end
end,
)

stop_condition = StopAfterStep(
haskey(ENV, "CI") ? 10_000 : 50_000_000,
is_show_progress=!haskey(ENV, "CI")
is_show_progress = !haskey(ENV, "CI"),
)
Experiment(agent, env, stop_condition, hook, "# IQN <-> Atari($name)")
end
Expand Down
Loading