From 5df8e666d8daf567f124bc8a0414d324b24abadd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Panajiotis=20Ke=C3=9Fler?= Date: Fri, 7 Jul 2023 11:33:52 +0200 Subject: [PATCH 1/5] added first Independent Q Learning experiment --- .../src/experiments/MARL/IDQN_TicTacToe.jl | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 src/ReinforcementLearningExperiments/src/experiments/MARL/IDQN_TicTacToe.jl diff --git a/src/ReinforcementLearningExperiments/src/experiments/MARL/IDQN_TicTacToe.jl b/src/ReinforcementLearningExperiments/src/experiments/MARL/IDQN_TicTacToe.jl new file mode 100644 index 000000000..027108256 --- /dev/null +++ b/src/ReinforcementLearningExperiments/src/experiments/MARL/IDQN_TicTacToe.jl @@ -0,0 +1,96 @@ +# --- +# title: JuliaRL\_IDQN\_TicTacToe +# cover: +# description: IDQN applied to TicTacToe competitive +# date: 2023-07-03 +# author: "[Panajiotis Keßler](mailto:panajiotis.kessler@gmail.com)" +# --- + +using StableRNGs +using ReinforcementLearning +using ReinforcementLearningBase +using ReinforcementLearningZoo +using ReinforcementLearningCore +using Plots +using Flux +using Flux.Losses: huber_loss +using Flux: glorot_uniform + +using ProgressMeter + + +rng = StableRNG(1234) + +cap = 100 + +RLCore.forward(L::DQNLearner, state::A) where {A <: Real} = RLCore.forward(L, [state]) + +create_policy() = QBasedPolicy( + learner=DQNLearner( + approximator=Approximator( + model=TwinNetwork( + Chain( + Dense(1, 512, relu; init=glorot_uniform(rng)), + Dense(512, 256, relu; init=glorot_uniform(rng)), + Dense(256, 9; init=glorot_uniform(rng)), + ); + sync_freq=100 + ), + optimiser=ADAM(), + ), + n=32, + γ=0.99f0, + is_enable_double_DQN=true, + loss_func=huber_loss, + rng=rng, + ), + explorer=EpsilonGreedyExplorer( + kind=:exp, + ϵ_stable=0.01, + decay_steps=500, + rng=rng, + ), + ) + +e = TicTacToeEnv(); +m = MultiAgentPolicy(NamedTuple((player => + Agent(player != :Cross ? create_policy() : RandomPolicy(;rng=rng), + Trajectory( + container=CircularArraySARTTraces( + capacity=cap, + state=Integer => (1,), + ), + sampler=NStepBatchSampler{SS′ART}( + n=1, + γ=0.99f0, + batch_size=1, + rng=rng + ), + controller=InsertSampleRatioController( + threshold=1, + n_inserted=0 + )) + ) + for player in players(e))) + ); +hooks = MultiAgentHook(NamedTuple((p => TotalRewardPerEpisode() for p ∈ players(e)))) + +episodes_per_step = 25 +win_rates = (Cross=Float64[], Nought=Float64[]) +@showprogress for i ∈ 1:2 + run(m, e, StopAfterEpisode(episodes_per_step; is_show_progress=false), hooks) + wr_cross = sum(hooks[:Cross].rewards)/(i*episodes_per_step) + wr_nought = sum(hooks[:Nought].rewards)/(i*episodes_per_step) + push!(win_rates[:Cross], wr_cross) + push!(win_rates[:Nought], wr_nought) +end +p1 = plot([win_rates[:Cross] win_rates[:Nought]], labels=["Cross" "Nought"]) +xlabel!("Iteration steps of $episodes_per_step episodes") +ylabel!("Win rate of the player") + +p2 = plot([hooks[:Cross].rewards hooks[:Nought].rewards], labels=["Cross" "Nought"]) +xlabel!("Overall episodes") +ylabel!("Rewards of the players") + +p = plot(p1, p2, layout=(2,1), size=[1000,1000]) +savefig("TTT_CROSS_DQN_NOUGHT_RANDOM.png") From d660085c7f43663f24f3d7492a44d71a80bcd8d4 Mon Sep 17 00:00:00 2001 From: Mytolo Date: Mon, 17 Jul 2023 18:23:45 +0200 Subject: [PATCH 2/5] added experiments to test adjusted tests and merged main --- .../Project.toml | 6 +- .../experiments/MARL/DQN_mpe_simple.jl | 4 +- .../experiments/MARL/IDQN_TicTacToe.jl | 72 +++++++++++++++++-- .../src/ReinforcementLearningExperiments.jl | 5 ++ .../test/runtests.jl | 6 ++ 5 files changed, 85 insertions(+), 8 deletions(-) rename src/ReinforcementLearningExperiments/{src => deps/experiments}/experiments/MARL/DQN_mpe_simple.jl (96%) rename src/ReinforcementLearningExperiments/{src => deps/experiments}/experiments/MARL/IDQN_TicTacToe.jl (56%) diff --git a/src/ReinforcementLearningExperiments/Project.toml b/src/ReinforcementLearningExperiments/Project.toml index 5f41135fc..50039e4b0 100644 --- a/src/ReinforcementLearningExperiments/Project.toml +++ b/src/ReinforcementLearningExperiments/Project.toml @@ -6,12 +6,14 @@ version = "0.3.1" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" ReinforcementLearningBase = "e575027e-6cd6-5018-9292-cdc6200d2b44" ReinforcementLearningCore = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6" ReinforcementLearningEnvironments = "25e41dd2-4622-11e9-1641-f1adca772921" ReinforcementLearningZoo = "d607f57d-ee1e-4ba7-bcf2-7734c1e31854" +Requires = "ae029012-a4dd-5104-9daa-d747884805df" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Weave = "44d3d7a6-8a23-5bf8-98c5-b353f8df5ec9" @@ -29,7 +31,9 @@ julia = "1.9" [extras] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" +Requires = "ae029012-a4dd-5104-9daa-d747884805df" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["CUDA", "Test"] +test = ["CUDA", "PyCall", "Test"] diff --git a/src/ReinforcementLearningExperiments/src/experiments/MARL/DQN_mpe_simple.jl b/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/DQN_mpe_simple.jl similarity index 96% rename from src/ReinforcementLearningExperiments/src/experiments/MARL/DQN_mpe_simple.jl rename to src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/DQN_mpe_simple.jl index 2c3363df9..3bef9d9a2 100644 --- a/src/ReinforcementLearningExperiments/src/experiments/MARL/DQN_mpe_simple.jl +++ b/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/DQN_mpe_simple.jl @@ -17,14 +17,14 @@ using Flux.Losses: huber_loss function RLCore.Experiment( ::Val{:JuliaRL}, ::Val{:DQN}, - ::Val{:MPESimple}; + ::Val{:MPESimple}, seed=123, n=1, γ=0.99f0, is_enable_double_DQN=true ) rng = StableRNG(seed) - env = discrete2standard_discrete(PettingzooEnv("mpe.simple_v2"; seed=seed)) + env = discrete2standard_discrete(PettingZooEnv("mpe.simple_v2"; seed=seed)) ns, na = length(state(env)), length(action_space(env)) agent = Agent( diff --git a/src/ReinforcementLearningExperiments/src/experiments/MARL/IDQN_TicTacToe.jl b/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/IDQN_TicTacToe.jl similarity index 56% rename from src/ReinforcementLearningExperiments/src/experiments/MARL/IDQN_TicTacToe.jl rename to src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/IDQN_TicTacToe.jl index 027108256..30353b2f9 100644 --- a/src/ReinforcementLearningExperiments/src/experiments/MARL/IDQN_TicTacToe.jl +++ b/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/IDQN_TicTacToe.jl @@ -7,7 +7,6 @@ # --- using StableRNGs -using ReinforcementLearning using ReinforcementLearningBase using ReinforcementLearningZoo using ReinforcementLearningCore @@ -16,7 +15,6 @@ using Flux using Flux.Losses: huber_loss using Flux: glorot_uniform -using ProgressMeter rng = StableRNG(1234) @@ -25,6 +23,71 @@ cap = 100 RLCore.forward(L::DQNLearner, state::A) where {A <: Real} = RLCore.forward(L, [state]) + +episodes_per_step = 25 + +function RLCore.Experiment( + ::Val{:JuliaRL}, + ::Val{:IDQN}, + ::Val{:TicTacToe}, + seed=123, + n=1, + γ=0.99f0, + is_enable_double_DQN=true +) + rng = StableRNG(seed) + create_policy() = QBasedPolicy( + learner=DQNLearner( + approximator=Approximator( + model=TwinNetwork( + Chain( + Dense(1, 512, relu; init=glorot_uniform(rng)), + Dense(512, 256, relu; init=glorot_uniform(rng)), + Dense(256, 9; init=glorot_uniform(rng)), + ); + sync_freq=100 + ), + optimiser=Adam(), + ), + n=n, + γ=γ, + is_enable_double_DQN=is_enable_double_DQN, + loss_func=huber_loss, + rng=rng, + ), + explorer=EpsilonGreedyExplorer( + kind=:exp, + ϵ_stable=0.01, + decay_steps=500, + rng=rng, + ), + ) + + e = TicTacToeEnv(); + m = MultiAgentPolicy(NamedTuple((player => + Agent(player != :Cross ? create_policy() : RandomPolicy(;rng=rng), + Trajectory( + container=CircularArraySARTTraces( + capacity=cap, + state=Integer => (1,), + ), + sampler=NStepBatchSampler{SS′ART}( + n=n, + γ=γ, + batch_size=1, + rng=rng + ), + controller=InsertSampleRatioController( + threshold=1, + n_inserted=0 + )) + ) + for player in players(e))) + ); + hooks = MultiAgentHook(NamedTuple((p => TotalRewardPerEpisode() for p ∈ players(e)))) + Experiment(m, e, StopAfterEpisode(episodes_per_step), hooks) +end + create_policy() = QBasedPolicy( learner=DQNLearner( approximator=Approximator( @@ -36,7 +99,7 @@ create_policy() = QBasedPolicy( ); sync_freq=100 ), - optimiser=ADAM(), + optimiser=Adam(), ), n=32, γ=0.99f0, @@ -75,9 +138,8 @@ m = MultiAgentPolicy(NamedTuple((player => ); hooks = MultiAgentHook(NamedTuple((p => TotalRewardPerEpisode() for p ∈ players(e)))) -episodes_per_step = 25 win_rates = (Cross=Float64[], Nought=Float64[]) -@showprogress for i ∈ 1:2 +for i ∈ 1:2 run(m, e, StopAfterEpisode(episodes_per_step; is_show_progress=false), hooks) wr_cross = sum(hooks[:Cross].rewards)/(i*episodes_per_step) wr_nought = sum(hooks[:Nought].rewards)/(i*episodes_per_step) diff --git a/src/ReinforcementLearningExperiments/src/ReinforcementLearningExperiments.jl b/src/ReinforcementLearningExperiments/src/ReinforcementLearningExperiments.jl index ca27a1031..7efab0bac 100644 --- a/src/ReinforcementLearningExperiments/src/ReinforcementLearningExperiments.jl +++ b/src/ReinforcementLearningExperiments/src/ReinforcementLearningExperiments.jl @@ -1,6 +1,7 @@ module ReinforcementLearningExperiments using Reexport +using Requires @reexport using ReinforcementLearningCore, ReinforcementLearningBase, ReinforcementLearningZoo @@ -19,6 +20,10 @@ include(joinpath(EXPERIMENTS_DIR, "JuliaRL_Rainbow_CartPole.jl")) include(joinpath(EXPERIMENTS_DIR, "JuliaRL_VPG_CartPole.jl")) include(joinpath(EXPERIMENTS_DIR, "JuliaRL_TRPO_CartPole.jl")) include(joinpath(EXPERIMENTS_DIR, "JuliaRL_MPO_CartPole.jl")) +include(joinpath(EXPERIMENTS_DIR, "IDQN_TicTacToe.jl")) +@require PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" include( + joinpath(EXPERIMENTS_DIR, "DQN_mpe_simple.jl") +) # dynamic loading environments function __init__() end diff --git a/src/ReinforcementLearningExperiments/test/runtests.jl b/src/ReinforcementLearningExperiments/test/runtests.jl index 1086313e0..eae611c97 100644 --- a/src/ReinforcementLearningExperiments/test/runtests.jl +++ b/src/ReinforcementLearningExperiments/test/runtests.jl @@ -1,6 +1,10 @@ using ReinforcementLearningExperiments using CUDA +using Requires + + + CUDA.allowscalar(false) run(E`JuliaRL_NFQ_CartPole`) @@ -15,6 +19,8 @@ run(E`JuliaRL_VPG_CartPole`) run(E`JuliaRL_MPODiscrete_CartPole`) run(E`JuliaRL_MPOContinuous_CartPole`) run(E`JuliaRL_MPOCovariance_CartPole`) +@require PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" run(E`JuliaRL_DQN_MPESimple`) +run(E`JuliaRL_IDQN_TicTacToe`) # run(E`JuliaRL_BC_CartPole`) # run(E`JuliaRL_VMPO_CartPole`) # run(E`JuliaRL_BasicDQN_MountainCar`) From a2fed827772ca2fcbcfe3044768796b9ca1b4e3a Mon Sep 17 00:00:00 2001 From: Mytolo Date: Mon, 17 Jul 2023 18:23:45 +0200 Subject: [PATCH 3/5] added experiments to test adjusted tests and merged main --- .../experiments/experiments/MARL/DQN_mpe_simple.jl | 2 +- .../src/ReinforcementLearningExperiments.jl | 10 ++++++---- src/ReinforcementLearningExperiments/test/runtests.jl | 6 +++++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/DQN_mpe_simple.jl b/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/DQN_mpe_simple.jl index 3bef9d9a2..205d09f10 100644 --- a/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/DQN_mpe_simple.jl +++ b/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/DQN_mpe_simple.jl @@ -7,7 +7,7 @@ # --- using PyCall -using ReinforcementLearningCore, ReinforcementLearningBase, ReinforcementLearningZoo +using ReinforcementLearningCore, ReinforcementLearningBase, ReinforcementLearningZoo, ReinforcementLearningEnvironments using Flux using Flux: glorot_uniform diff --git a/src/ReinforcementLearningExperiments/src/ReinforcementLearningExperiments.jl b/src/ReinforcementLearningExperiments/src/ReinforcementLearningExperiments.jl index 7efab0bac..b9133efda 100644 --- a/src/ReinforcementLearningExperiments/src/ReinforcementLearningExperiments.jl +++ b/src/ReinforcementLearningExperiments/src/ReinforcementLearningExperiments.jl @@ -21,11 +21,13 @@ include(joinpath(EXPERIMENTS_DIR, "JuliaRL_VPG_CartPole.jl")) include(joinpath(EXPERIMENTS_DIR, "JuliaRL_TRPO_CartPole.jl")) include(joinpath(EXPERIMENTS_DIR, "JuliaRL_MPO_CartPole.jl")) include(joinpath(EXPERIMENTS_DIR, "IDQN_TicTacToe.jl")) -@require PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" include( - joinpath(EXPERIMENTS_DIR, "DQN_mpe_simple.jl") -) + # dynamic loading environments -function __init__() end +function __init__() + @require PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" include( + joinpath(EXPERIMENTS_DIR, "DQN_mpe_simple.jl") + ) +end end # module diff --git a/src/ReinforcementLearningExperiments/test/runtests.jl b/src/ReinforcementLearningExperiments/test/runtests.jl index eae611c97..3e35c0024 100644 --- a/src/ReinforcementLearningExperiments/test/runtests.jl +++ b/src/ReinforcementLearningExperiments/test/runtests.jl @@ -5,6 +5,10 @@ using Requires +using Requires + + + CUDA.allowscalar(false) run(E`JuliaRL_NFQ_CartPole`) @@ -19,8 +23,8 @@ run(E`JuliaRL_VPG_CartPole`) run(E`JuliaRL_MPODiscrete_CartPole`) run(E`JuliaRL_MPOContinuous_CartPole`) run(E`JuliaRL_MPOCovariance_CartPole`) -@require PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" run(E`JuliaRL_DQN_MPESimple`) run(E`JuliaRL_IDQN_TicTacToe`) +@require PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" run(E`JuliaRL_DQN_MPESimple`) # run(E`JuliaRL_BC_CartPole`) # run(E`JuliaRL_VMPO_CartPole`) # run(E`JuliaRL_BasicDQN_MountainCar`) From 358ed8b52171574c93eb5a3ae2e3825ec60d8843 Mon Sep 17 00:00:00 2001 From: Mytolo Date: Mon, 17 Jul 2023 18:23:45 +0200 Subject: [PATCH 4/5] added experiments to test adjusted tests and merged main --- .../experiments/MARL/DQN_mpe_simple.jl | 70 ------------------- .../experiments/MARL/pettingzoo_ex.jl | 70 +++++++++++++++++++ .../src/ReinforcementLearningExperiments.jl | 12 +++- .../test/runtests.jl | 21 ++++-- 4 files changed, 93 insertions(+), 80 deletions(-) create mode 100644 src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/pettingzoo_ex.jl diff --git a/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/DQN_mpe_simple.jl b/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/DQN_mpe_simple.jl index 205d09f10..7d832ac68 100644 --- a/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/DQN_mpe_simple.jl +++ b/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/DQN_mpe_simple.jl @@ -6,76 +6,6 @@ # author: "[Panajiotis Keßler](mailto:panajiotis@christoforidis.net)" # --- -using PyCall -using ReinforcementLearningCore, ReinforcementLearningBase, ReinforcementLearningZoo, ReinforcementLearningEnvironments -using Flux -using Flux: glorot_uniform - -using StableRNGs: StableRNG -using Flux.Losses: huber_loss - -function RLCore.Experiment( - ::Val{:JuliaRL}, - ::Val{:DQN}, - ::Val{:MPESimple}, - seed=123, - n=1, - γ=0.99f0, - is_enable_double_DQN=true -) - rng = StableRNG(seed) - env = discrete2standard_discrete(PettingZooEnv("mpe.simple_v2"; seed=seed)) - ns, na = length(state(env)), length(action_space(env)) - - agent = Agent( - policy=QBasedPolicy( - learner=DQNLearner( - approximator=Approximator( - model=TwinNetwork( - Chain( - Dense(ns, 128, relu; init=glorot_uniform(rng)), - Dense(128, 128, relu; init=glorot_uniform(rng)), - Dense(128, na; init=glorot_uniform(rng)), - ); - sync_freq=100 - ), - optimiser=Adam(), - ), - n=n, - γ=γ, - is_enable_double_DQN=is_enable_double_DQN, - loss_func=huber_loss, - rng=rng, - ), - explorer=EpsilonGreedyExplorer( - kind=:exp, - ϵ_stable=0.01, - decay_steps=500, - rng=rng, - ), - ), - trajectory=Trajectory( - container=CircularArraySARTTraces( - capacity=1000, - state=Float32 => (ns,), - ), - sampler=NStepBatchSampler{SS′ART}( - n=n, - γ=γ, - batch_size=32, - rng=rng - ), - controller=InsertSampleRatioController( - threshold=100, - n_inserted=-1 - ) - ) - ) - - stop_condition = StopAfterEpisode(150, is_show_progress=!haskey(ENV, "CI")) - hook = TotalRewardPerEpisode() - Experiment(agent, env, stop_condition, hook) -end using Plots ex = E`JuliaRL_DQN_MPESimple` diff --git a/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/pettingzoo_ex.jl b/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/pettingzoo_ex.jl new file mode 100644 index 000000000..9982f8710 --- /dev/null +++ b/src/ReinforcementLearningExperiments/deps/experiments/experiments/MARL/pettingzoo_ex.jl @@ -0,0 +1,70 @@ +using PyCall +using ReinforcementLearningCore, ReinforcementLearningBase, ReinforcementLearningZoo, ReinforcementLearningEnvironments +using Flux +using Flux: glorot_uniform + +using StableRNGs: StableRNG +using Flux.Losses: huber_loss + +function RLCore.Experiment( + ::Val{:JuliaRL}, + ::Val{:DQN}, + ::Val{:MPESimple}, + seed=123, + n=1, + γ=0.99f0, + is_enable_double_DQN=true +) + rng = StableRNG(seed) + env = discrete2standard_discrete(PettingZooEnv("mpe.simple_v2"; seed=seed)) + ns, na = length(state(env)), length(action_space(env)) + + agent = Agent( + policy=QBasedPolicy( + learner=DQNLearner( + approximator=Approximator( + model=TwinNetwork( + Chain( + Dense(ns, 128, relu; init=glorot_uniform(rng)), + Dense(128, 128, relu; init=glorot_uniform(rng)), + Dense(128, na; init=glorot_uniform(rng)), + ); + sync_freq=100 + ), + optimiser=Adam(), + ), + n=n, + γ=γ, + is_enable_double_DQN=is_enable_double_DQN, + loss_func=huber_loss, + rng=rng, + ), + explorer=EpsilonGreedyExplorer( + kind=:exp, + ϵ_stable=0.01, + decay_steps=500, + rng=rng, + ), + ), + trajectory=Trajectory( + container=CircularArraySARTTraces( + capacity=1000, + state=Float32 => (ns,), + ), + sampler=NStepBatchSampler{SS′ART}( + n=n, + γ=γ, + batch_size=32, + rng=rng + ), + controller=InsertSampleRatioController( + threshold=100, + n_inserted=-1 + ) + ) + ) + + stop_condition = StopAfterEpisode(150, is_show_progress=!haskey(ENV, "CI")) + hook = TotalRewardPerEpisode() + Experiment(agent, env, stop_condition, hook) +end \ No newline at end of file diff --git a/src/ReinforcementLearningExperiments/src/ReinforcementLearningExperiments.jl b/src/ReinforcementLearningExperiments/src/ReinforcementLearningExperiments.jl index b9133efda..04761f956 100644 --- a/src/ReinforcementLearningExperiments/src/ReinforcementLearningExperiments.jl +++ b/src/ReinforcementLearningExperiments/src/ReinforcementLearningExperiments.jl @@ -1,14 +1,18 @@ module ReinforcementLearningExperiments using Reexport +using Pkg using Requires -@reexport using ReinforcementLearningCore, ReinforcementLearningBase, ReinforcementLearningZoo +@reexport using ReinforcementLearningCore, ReinforcementLearningBase, ReinforcementLearningZoo, ReinforcementLearningEnvironments const EXPERIMENTS_DIR = joinpath(@__DIR__, "experiments") +# as long as there are not working experiments, this is not working properly # for f in readdir(EXPERIMENTS_DIR) # include(joinpath(EXPERIMENTS_DIR, f)) # end + + include(joinpath(EXPERIMENTS_DIR, "JuliaRL_NFQ_CartPole.jl")) include(joinpath(EXPERIMENTS_DIR, "JuliaRL_BasicDQN_CartPole.jl")) include(joinpath(EXPERIMENTS_DIR, "JuliaRL_DQN_CartPole.jl")) @@ -23,10 +27,12 @@ include(joinpath(EXPERIMENTS_DIR, "JuliaRL_MPO_CartPole.jl")) include(joinpath(EXPERIMENTS_DIR, "IDQN_TicTacToe.jl")) + # dynamic loading environments -function __init__() +function __init__() + #PyCall environments experiments @require PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" include( - joinpath(EXPERIMENTS_DIR, "DQN_mpe_simple.jl") + joinpath(EXPERIMENTS_DIR, "pettingzoo_ex.jl") ) end diff --git a/src/ReinforcementLearningExperiments/test/runtests.jl b/src/ReinforcementLearningExperiments/test/runtests.jl index 3e35c0024..947fac4e3 100644 --- a/src/ReinforcementLearningExperiments/test/runtests.jl +++ b/src/ReinforcementLearningExperiments/test/runtests.jl @@ -1,12 +1,10 @@ +using Pkg +if Base.UUID("438e738f-606a-5dbb-bf0a-cddfbfd45ab0") in Pkg.dependencies().keys + using PyCall +end using ReinforcementLearningExperiments using CUDA -using Requires - - - -using Requires - CUDA.allowscalar(false) @@ -24,7 +22,16 @@ run(E`JuliaRL_MPODiscrete_CartPole`) run(E`JuliaRL_MPOContinuous_CartPole`) run(E`JuliaRL_MPOCovariance_CartPole`) run(E`JuliaRL_IDQN_TicTacToe`) -@require PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" run(E`JuliaRL_DQN_MPESimple`) + +# test PyCall experiments. +# NOTE: Do NOT use E`...` macro as it will execute also if statement is false (beforehand?) +if Base.UUID("438e738f-606a-5dbb-bf0a-cddfbfd45ab0") in Pkg.dependencies().keys + if PyCall.pyexists("pettingzoo.mpe") + x = RLCore.Experiment("JuliaRL_DQN_MPESimple") + run(x) + end +end + # run(E`JuliaRL_BC_CartPole`) # run(E`JuliaRL_VMPO_CartPole`) # run(E`JuliaRL_BasicDQN_MountainCar`) From c7448026fa8a4c1c424b1e9b3a98e04a685199ff Mon Sep 17 00:00:00 2001 From: Mytolo Date: Thu, 20 Jul 2023 15:08:49 +0200 Subject: [PATCH 5/5] fix missing Pkg dependency --- src/ReinforcementLearningExperiments/Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ReinforcementLearningExperiments/Project.toml b/src/ReinforcementLearningExperiments/Project.toml index 50039e4b0..bd7053e41 100644 --- a/src/ReinforcementLearningExperiments/Project.toml +++ b/src/ReinforcementLearningExperiments/Project.toml @@ -6,6 +6,7 @@ version = "0.3.1" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69"