diff --git a/src/ReinforcementLearningCore/src/policies/random_policy.jl b/src/ReinforcementLearningCore/src/policies/random_policy.jl index b09cd69d8..3719d478c 100644 --- a/src/ReinforcementLearningCore/src/policies/random_policy.jl +++ b/src/ReinforcementLearningCore/src/policies/random_policy.jl @@ -24,7 +24,7 @@ RandomPolicy(s = nothing; rng = Random.default_rng()) = RandomPolicy(s, rng) RLBase.optimise!(::RandomPolicy, x::NamedTuple) = nothing -RLBase.plan!(p::RandomPolicy{S,RNG}, env::AbstractEnv) where {S,RNG<:AbstractRNG} = rand.(p.rng, p.action_space) +RLBase.plan!(p::RandomPolicy{S,RNG}, env::AbstractEnv) where {S,RNG<:AbstractRNG} = rand(p.rng, p.action_space) function RLBase.plan!(p::RandomPolicy{Nothing,RNG}, env::AbstractEnv) where {RNG<:AbstractRNG} legal_action_space_ = RLBase.legal_action_space(env) @@ -33,7 +33,7 @@ end function RLBase.plan!(p::RandomPolicy{Nothing,RNG}, env::E, player::Symbol) where {E<:AbstractEnv, RNG<:AbstractRNG} legal_action_space_ = RLBase.legal_action_space(env, player) - return rand.(p.rng, legal_action_space_) + return rand(p.rng, legal_action_space_) end ##### diff --git a/src/ReinforcementLearningCore/test/core/base.jl b/src/ReinforcementLearningCore/test/core/base.jl index 300f3fcd7..f46e40b69 100644 --- a/src/ReinforcementLearningCore/test/core/base.jl +++ b/src/ReinforcementLearningCore/test/core/base.jl @@ -1,4 +1,4 @@ -using ReinforcementLearningCore +#using ReinforcementLearningCore using ReinforcementLearningBase using TimerOutputs diff --git a/src/ReinforcementLearningCore/test/core/stop_conditions.jl b/src/ReinforcementLearningCore/test/core/stop_conditions.jl index f492527c8..0411e4459 100644 --- a/src/ReinforcementLearningCore/test/core/stop_conditions.jl +++ b/src/ReinforcementLearningCore/test/core/stop_conditions.jl @@ -1,4 +1,4 @@ -using ReinforcementLearningCore: check_stop +import ReinforcementLearningCore.check_stop @testset "StopAfterStep" begin stop_condition = StopAfterStep(10) diff --git a/src/ReinforcementLearningCore/test/policies/agent.jl b/src/ReinforcementLearningCore/test/policies/agent.jl index 81297ce77..1f412c8e6 100644 --- a/src/ReinforcementLearningCore/test/policies/agent.jl +++ b/src/ReinforcementLearningCore/test/policies/agent.jl @@ -1,6 +1,5 @@ using ReinforcementLearningBase, ReinforcementLearningEnvironments -using ReinforcementLearningCore: SRT -using ReinforcementLearningCore +import ReinforcementLearningCore.SRT @testset "agent.jl" begin @testset "Agent Tests" begin diff --git a/src/ReinforcementLearningCore/test/policies/multi_agent.jl b/src/ReinforcementLearningCore/test/policies/multi_agent.jl index fc1229497..496ec98bb 100644 --- a/src/ReinforcementLearningCore/test/policies/multi_agent.jl +++ b/src/ReinforcementLearningCore/test/policies/multi_agent.jl @@ -1,7 +1,6 @@ using Test using ReinforcementLearningEnvironments using ReinforcementLearningTrajectories -using ReinforcementLearningCore using ReinforcementLearningBase using DomainSets diff --git a/src/ReinforcementLearningCore/test/runtests.jl b/src/ReinforcementLearningCore/test/runtests.jl index d11899e4d..41768f81d 100644 --- a/src/ReinforcementLearningCore/test/runtests.jl +++ b/src/ReinforcementLearningCore/test/runtests.jl @@ -1,5 +1,5 @@ using ReinforcementLearningBase -using ReinforcementLearningCore +#using ReinforcementLearningCore using ReinforcementLearningEnvironments using ReinforcementLearningTrajectories diff --git a/src/ReinforcementLearningCore/test/utils/distributions.jl b/src/ReinforcementLearningCore/test/utils/distributions.jl index e40d31b99..8dd3bde85 100644 --- a/src/ReinforcementLearningCore/test/utils/distributions.jl +++ b/src/ReinforcementLearningCore/test/utils/distributions.jl @@ -1,4 +1,4 @@ -using Test, LinearAlgebra, Distributions, ReinforcementLearningCore, CUDA, Flux +using Test, LinearAlgebra, Distributions, CUDA, Flux @testset "utils/distributions" begin @testset "logdetLorU" begin diff --git a/src/ReinforcementLearningCore/test/utils/networks.jl b/src/ReinforcementLearningCore/test/utils/networks.jl index 59ff7e93a..06900109a 100644 --- a/src/ReinforcementLearningCore/test/utils/networks.jl +++ b/src/ReinforcementLearningCore/test/utils/networks.jl @@ -1,4 +1,4 @@ -using Test, Flux, CUDA, ChainRulesCore, LinearAlgebra, Distributions, ReinforcementLearningCore +using Test, Flux, CUDA, ChainRulesCore, LinearAlgebra, Distributions using Flux: params, gradient, unsqueeze @testset "Approximators" begin #= These may need to be updated due to recent changes diff --git a/src/ReinforcementLearningExperiments/deps/experiments/experiments/Policy Gradient/JuliaRL_SAC_Pendulum.jl b/src/ReinforcementLearningExperiments/deps/experiments/experiments/Policy Gradient/JuliaRL_SAC_Pendulum.jl index a7b2c31e5..2b3e0524b 100644 --- a/src/ReinforcementLearningExperiments/deps/experiments/experiments/Policy Gradient/JuliaRL_SAC_Pendulum.jl +++ b/src/ReinforcementLearningExperiments/deps/experiments/experiments/Policy Gradient/JuliaRL_SAC_Pendulum.jl @@ -67,7 +67,7 @@ function RLCore.Experiment( γ=0.99f0, α=0.2f0, start_steps=1000, - start_policy=RandomPolicy([-1.0 .. 1.0 for _ in 1:na]; rng=rng), + start_policy=RandomPolicy(-1.0 .. 1.0; rng=rng), automatic_entropy_tuning=true, lr_alpha=0.003f0, action_dims=action_dims,