Skip to content

Commit

Permalink
added experiments to test
Browse files Browse the repository at this point in the history
adjusted tests and merged main
  • Loading branch information
Mytolo committed Jul 19, 2023
1 parent 5df8e66 commit d660085
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 8 deletions.
6 changes: 5 additions & 1 deletion src/ReinforcementLearningExperiments/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ version = "0.3.1"
[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
ReinforcementLearningBase = "e575027e-6cd6-5018-9292-cdc6200d2b44"
ReinforcementLearningCore = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
ReinforcementLearningEnvironments = "25e41dd2-4622-11e9-1641-f1adca772921"
ReinforcementLearningZoo = "d607f57d-ee1e-4ba7-bcf2-7734c1e31854"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Weave = "44d3d7a6-8a23-5bf8-98c5-b353f8df5ec9"

Expand All @@ -29,7 +31,9 @@ julia = "1.9"

[extras]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["CUDA", "Test"]
test = ["CUDA", "PyCall", "Test"]
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ using Flux.Losses: huber_loss
function RLCore.Experiment(
::Val{:JuliaRL},
::Val{:DQN},
::Val{:MPESimple};
::Val{:MPESimple},
seed=123,
n=1,
γ=0.99f0,
is_enable_double_DQN=true
)
rng = StableRNG(seed)
env = discrete2standard_discrete(PettingzooEnv("mpe.simple_v2"; seed=seed))
env = discrete2standard_discrete(PettingZooEnv("mpe.simple_v2"; seed=seed))
ns, na = length(state(env)), length(action_space(env))

agent = Agent(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
# ---

using StableRNGs
using ReinforcementLearning
using ReinforcementLearningBase
using ReinforcementLearningZoo
using ReinforcementLearningCore
Expand All @@ -16,7 +15,6 @@ using Flux
using Flux.Losses: huber_loss
using Flux: glorot_uniform

using ProgressMeter


rng = StableRNG(1234)
Expand All @@ -25,6 +23,71 @@ cap = 100

RLCore.forward(L::DQNLearner, state::A) where {A <: Real} = RLCore.forward(L, [state])


episodes_per_step = 25

function RLCore.Experiment(
::Val{:JuliaRL},
::Val{:IDQN},
::Val{:TicTacToe},
seed=123,
n=1,
γ=0.99f0,
is_enable_double_DQN=true
)
rng = StableRNG(seed)
create_policy() = QBasedPolicy(
learner=DQNLearner(
approximator=Approximator(
model=TwinNetwork(
Chain(
Dense(1, 512, relu; init=glorot_uniform(rng)),
Dense(512, 256, relu; init=glorot_uniform(rng)),
Dense(256, 9; init=glorot_uniform(rng)),
);
sync_freq=100
),
optimiser=Adam(),
),
n=n,
γ=γ,
is_enable_double_DQN=is_enable_double_DQN,
loss_func=huber_loss,
rng=rng,
),
explorer=EpsilonGreedyExplorer(
kind=:exp,
ϵ_stable=0.01,
decay_steps=500,
rng=rng,
),
)

e = TicTacToeEnv();
m = MultiAgentPolicy(NamedTuple((player =>
Agent(player != :Cross ? create_policy() : RandomPolicy(;rng=rng),
Trajectory(
container=CircularArraySARTTraces(
capacity=cap,
state=Integer => (1,),
),
sampler=NStepBatchSampler{SS′ART}(
n=n,
γ=γ,
batch_size=1,
rng=rng
),
controller=InsertSampleRatioController(
threshold=1,
n_inserted=0
))
)
for player in players(e)))
);
hooks = MultiAgentHook(NamedTuple((p => TotalRewardPerEpisode() for p players(e))))
Experiment(m, e, StopAfterEpisode(episodes_per_step), hooks)
end

create_policy() = QBasedPolicy(
learner=DQNLearner(
approximator=Approximator(
Expand All @@ -36,7 +99,7 @@ create_policy() = QBasedPolicy(
);
sync_freq=100
),
optimiser=ADAM(),
optimiser=Adam(),
),
n=32,
γ=0.99f0,
Expand Down Expand Up @@ -75,9 +138,8 @@ m = MultiAgentPolicy(NamedTuple((player =>
);
hooks = MultiAgentHook(NamedTuple((p => TotalRewardPerEpisode() for p players(e))))

episodes_per_step = 25
win_rates = (Cross=Float64[], Nought=Float64[])
@showprogress for i 1:2
for i 1:2
run(m, e, StopAfterEpisode(episodes_per_step; is_show_progress=false), hooks)
wr_cross = sum(hooks[:Cross].rewards)/(i*episodes_per_step)
wr_nought = sum(hooks[:Nought].rewards)/(i*episodes_per_step)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module ReinforcementLearningExperiments

using Reexport
using Requires

@reexport using ReinforcementLearningCore, ReinforcementLearningBase, ReinforcementLearningZoo

Expand All @@ -19,6 +20,10 @@ include(joinpath(EXPERIMENTS_DIR, "JuliaRL_Rainbow_CartPole.jl"))
include(joinpath(EXPERIMENTS_DIR, "JuliaRL_VPG_CartPole.jl"))
include(joinpath(EXPERIMENTS_DIR, "JuliaRL_TRPO_CartPole.jl"))
include(joinpath(EXPERIMENTS_DIR, "JuliaRL_MPO_CartPole.jl"))
include(joinpath(EXPERIMENTS_DIR, "IDQN_TicTacToe.jl"))
@require PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" include(
joinpath(EXPERIMENTS_DIR, "DQN_mpe_simple.jl")
)

# dynamic loading environments
function __init__() end
Expand Down
6 changes: 6 additions & 0 deletions src/ReinforcementLearningExperiments/test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
using ReinforcementLearningExperiments
using CUDA

using Requires



CUDA.allowscalar(false)

run(E`JuliaRL_NFQ_CartPole`)
Expand All @@ -15,6 +19,8 @@ run(E`JuliaRL_VPG_CartPole`)
run(E`JuliaRL_MPODiscrete_CartPole`)
run(E`JuliaRL_MPOContinuous_CartPole`)
run(E`JuliaRL_MPOCovariance_CartPole`)
@require PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" run(E`JuliaRL_DQN_MPESimple`)
run(E`JuliaRL_IDQN_TicTacToe`)
# run(E`JuliaRL_BC_CartPole`)
# run(E`JuliaRL_VMPO_CartPole`)
# run(E`JuliaRL_BasicDQN_MountainCar`)
Expand Down

0 comments on commit d660085

Please sign in to comment.