Skip to content

Commit

Permalink
Add the tuning entropy component (JuliaReinforcementLearning#365)
Browse files Browse the repository at this point in the history
* Add the tuning entropy component

* Bump version

* Revert "Bump version"

This reverts commit 44ce4f5.

* bump version

* update NEWS

Co-authored-by: pilgrim <[email protected]>
Co-authored-by: Jun Tian <[email protected]>
  • Loading branch information
3 people authored Jul 13, 2021
1 parent c7b66e4 commit dd17ad6
Show file tree
Hide file tree
Showing 10 changed files with 63 additions and 31 deletions.
6 changes: 3 additions & 3 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -450,13 +450,13 @@ version = "1.0.0"
deps = ["AbstractTrees", "CommonRLInterface", "Markdown", "Random", "Test"]
path = "src/ReinforcementLearningBase"
uuid = "e575027e-6cd6-5018-9292-cdc6200d2b44"
version = "0.9.5"
version = "0.9.6"

[[ReinforcementLearningCore]]
deps = ["AbstractTrees", "Adapt", "CUDA", "CircularArrayBuffers", "Compat", "Dates", "Distributions", "ElasticArrays", "FillArrays", "Flux", "Functors", "GPUArrays", "LinearAlgebra", "MacroTools", "Markdown", "ProgressMeter", "Random", "ReinforcementLearningBase", "Setfield", "Statistics", "StatsBase", "UnicodePlots", "Zygote"]
path = "src/ReinforcementLearningCore"
uuid = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
version = "0.8.0"
version = "0.8.1"

[[ReinforcementLearningEnvironments]]
deps = ["IntervalSets", "MacroTools", "Markdown", "Random", "ReinforcementLearningBase", "Requires", "StatsBase"]
Expand All @@ -468,7 +468,7 @@ version = "0.6.1"
deps = ["AbstractTrees", "CUDA", "CircularArrayBuffers", "DataStructures", "Dates", "Distributions", "Flux", "IntervalSets", "LinearAlgebra", "Logging", "MacroTools", "Random", "ReinforcementLearningBase", "ReinforcementLearningCore", "Setfield", "Statistics", "StatsBase", "StructArrays", "Zygote"]
path = "src/ReinforcementLearningZoo"
uuid = "d607f57d-ee1e-4ba7-bcf2-7734c1e31854"
version = "0.4.0"
version = "0.5.0"

[[Requires]]
deps = ["UUIDs"]
Expand Down
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# ReinforcementLearning.jl Release Notes

## [email protected]

### ReinforcementLearningZoo.jl

#### v0.5.0

- Update the complete SAC implementation and modify some details based on the
original paper. [#365](https://github.com/JuliaReinforcementLearning/ReinforcementLearning.jl/pull/365)

## [email protected]

### ReinforcementLearningBase.jl
Expand Down
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "ReinforcementLearning"
uuid = "158674fc-8238-5cab-b5ba-03dfc80d1318"
authors = ["Johanni Brea <[email protected]>", "Jun Tian <[email protected]>"]
version = "0.9.0"
version = "0.10.0"

[deps]
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Expand All @@ -15,7 +15,7 @@ Reexport = "0.2, 1"
ReinforcementLearningBase = "0.9"
ReinforcementLearningCore = "0.8"
ReinforcementLearningEnvironments = "0.6"
ReinforcementLearningZoo = "0.4"
ReinforcementLearningZoo = "0.5"
julia = "1.6"

[extras]
Expand Down
12 changes: 6 additions & 6 deletions docs/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -622,9 +622,9 @@ version = "1.42.0+0"

[[Libiconv_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "8d22e127ea9a0917bc98ebd3755c8bd31989381e"
git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778"
uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
version = "1.16.1+0"
version = "1.16.1+1"

[[Libmount_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
Expand Down Expand Up @@ -955,19 +955,19 @@ version = "1.0.0"
deps = ["Reexport", "ReinforcementLearningBase", "ReinforcementLearningCore", "ReinforcementLearningEnvironments", "ReinforcementLearningZoo"]
path = ".."
uuid = "158674fc-8238-5cab-b5ba-03dfc80d1318"
version = "0.9.0"
version = "0.10.0"

[[ReinforcementLearningBase]]
deps = ["AbstractTrees", "CommonRLInterface", "Markdown", "Random", "Test"]
path = "../src/ReinforcementLearningBase"
uuid = "e575027e-6cd6-5018-9292-cdc6200d2b44"
version = "0.9.5"
version = "0.9.6"

[[ReinforcementLearningCore]]
deps = ["AbstractTrees", "Adapt", "CUDA", "CircularArrayBuffers", "Compat", "Dates", "Distributions", "ElasticArrays", "FillArrays", "Flux", "Functors", "GPUArrays", "LinearAlgebra", "MacroTools", "Markdown", "ProgressMeter", "Random", "ReinforcementLearningBase", "Setfield", "Statistics", "StatsBase", "UnicodePlots", "Zygote"]
path = "../src/ReinforcementLearningCore"
uuid = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
version = "0.8.0"
version = "0.8.1"

[[ReinforcementLearningEnvironments]]
deps = ["IntervalSets", "MacroTools", "Markdown", "Random", "ReinforcementLearningBase", "Requires", "StatsBase"]
Expand All @@ -979,7 +979,7 @@ version = "0.6.1"
deps = ["AbstractTrees", "CUDA", "CircularArrayBuffers", "DataStructures", "Dates", "Distributions", "Flux", "IntervalSets", "LinearAlgebra", "Logging", "MacroTools", "Random", "ReinforcementLearningBase", "ReinforcementLearningCore", "Setfield", "Statistics", "StatsBase", "StructArrays", "Zygote"]
path = "../src/ReinforcementLearningZoo"
uuid = "d607f57d-ee1e-4ba7-bcf2-7734c1e31854"
version = "0.4.0"
version = "0.5.0"

[[Requires]]
deps = ["UUIDs"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ function RL.Experiment(
)
rng = StableRNG(seed)
inner_env = PendulumEnv(T = Float32, rng = rng)
action_dims = inner_env.n_actions
A = action_space(inner_env)
low = A.left
high = A.right
Expand Down Expand Up @@ -64,13 +65,16 @@ function RL.Experiment(
target_qnetwork1 = create_q_net(),
target_qnetwork2 = create_q_net(),
γ = 0.99f0,
ρ = 0.995f0,
τ = 0.005f0,
α = 0.2f0,
batch_size = 64,
start_steps = 1000,
start_policy = RandomPolicy(Space([-1.0..1.0 for _ in 1:na]); rng = rng),
update_after = 1000,
update_every = 1,
automatic_entropy_tuning = true,
lr_alpha = 0.003f0,
action_dims = action_dims,
rng = rng,
),
trajectory = CircularArraySARTTrajectory(
Expand Down
12 changes: 6 additions & 6 deletions src/ReinforcementLearningExperiments/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -421,9 +421,9 @@ uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

[[Libiconv_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "8d22e127ea9a0917bc98ebd3755c8bd31989381e"
git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778"
uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
version = "1.16.1+0"
version = "1.16.1+1"

[[LinearAlgebra]]
deps = ["Libdl"]
Expand Down Expand Up @@ -636,19 +636,19 @@ version = "1.1.0"
deps = ["Reexport", "ReinforcementLearningBase", "ReinforcementLearningCore", "ReinforcementLearningEnvironments", "ReinforcementLearningZoo"]
path = "../.."
uuid = "158674fc-8238-5cab-b5ba-03dfc80d1318"
version = "0.9.0"
version = "0.10.0"

[[ReinforcementLearningBase]]
deps = ["AbstractTrees", "CommonRLInterface", "Markdown", "Random", "Test"]
path = "../ReinforcementLearningBase"
uuid = "e575027e-6cd6-5018-9292-cdc6200d2b44"
version = "0.9.5"
version = "0.9.6"

[[ReinforcementLearningCore]]
deps = ["AbstractTrees", "Adapt", "CUDA", "CircularArrayBuffers", "Compat", "Dates", "Distributions", "ElasticArrays", "FillArrays", "Flux", "Functors", "GPUArrays", "LinearAlgebra", "MacroTools", "Markdown", "ProgressMeter", "Random", "ReinforcementLearningBase", "Setfield", "Statistics", "StatsBase", "UnicodePlots", "Zygote"]
path = "../ReinforcementLearningCore"
uuid = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
version = "0.8.0"
version = "0.8.1"

[[ReinforcementLearningEnvironments]]
deps = ["IntervalSets", "MacroTools", "Markdown", "Random", "ReinforcementLearningBase", "Requires", "StatsBase"]
Expand All @@ -660,7 +660,7 @@ version = "0.6.1"
deps = ["AbstractTrees", "CUDA", "CircularArrayBuffers", "DataStructures", "Dates", "Distributions", "Flux", "IntervalSets", "LinearAlgebra", "Logging", "MacroTools", "Random", "ReinforcementLearningBase", "ReinforcementLearningCore", "Setfield", "Statistics", "StatsBase", "StructArrays", "Zygote"]
path = "../ReinforcementLearningZoo"
uuid = "d607f57d-ee1e-4ba7-bcf2-7734c1e31854"
version = "0.4.0"
version = "0.5.0"

[[Requires]]
deps = ["UUIDs"]
Expand Down
6 changes: 3 additions & 3 deletions src/ReinforcementLearningExperiments/Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "ReinforcementLearningExperiments"
uuid = "6bd458e5-1694-412f-b601-3a888375c491"
authors = ["Jun Tian <[email protected]>"]
version = "0.1.0"
version = "0.1.1"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down Expand Up @@ -39,11 +39,11 @@ Flux = "0.12"
GridWorlds = "0.4"
ImageTransformations = "0.8"
IntervalSets = "0.5"
ReinforcementLearning = "0.9"
ReinforcementLearning = "0.10"
ReinforcementLearningBase = "0.9"
ReinforcementLearningCore = "0.8"
ReinforcementLearningEnvironments = "0.6"
ReinforcementLearningZoo = "0.4"
ReinforcementLearningZoo = "0.5"
Requires = "1"
Setfield = "0.7"
StableRNGs = "1"
Expand Down
4 changes: 2 additions & 2 deletions src/ReinforcementLearningZoo/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -450,13 +450,13 @@ version = "1.0.0"
deps = ["AbstractTrees", "CommonRLInterface", "Markdown", "Random", "Test"]
path = "../ReinforcementLearningBase"
uuid = "e575027e-6cd6-5018-9292-cdc6200d2b44"
version = "0.9.5"
version = "0.9.6"

[[ReinforcementLearningCore]]
deps = ["AbstractTrees", "Adapt", "CUDA", "CircularArrayBuffers", "Compat", "Dates", "Distributions", "ElasticArrays", "FillArrays", "Flux", "Functors", "GPUArrays", "LinearAlgebra", "MacroTools", "Markdown", "ProgressMeter", "Random", "ReinforcementLearningBase", "Setfield", "Statistics", "StatsBase", "UnicodePlots", "Zygote"]
path = "../ReinforcementLearningCore"
uuid = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
version = "0.8.0"
version = "0.8.1"

[[Requires]]
deps = ["UUIDs"]
Expand Down
2 changes: 1 addition & 1 deletion src/ReinforcementLearningZoo/Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "ReinforcementLearningZoo"
uuid = "d607f57d-ee1e-4ba7-bcf2-7734c1e31854"
authors = ["Jun Tian <[email protected]>"]
version = "0.4.1"
version = "0.5.0"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
33 changes: 26 additions & 7 deletions src/ReinforcementLearningZoo/src/algorithms/policy_gradient/sac.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,22 @@ mutable struct SACPolicy{
P,
R<:AbstractRNG,
} <: AbstractPolicy

policy::BA
qnetwork1::BC1
qnetwork2::BC2
target_qnetwork1::BC1
target_qnetwork2::BC2
γ::Float32
ρ::Float32
τ::Float32
α::Float32
batch_size::Int
start_steps::Int
start_policy::P
update_after::Int
update_every::Int
automatic_entropy_tuning::Bool
lr_alpha::Float32
target_entropy::Float32
step::Int
rng::R
# Logging
Expand All @@ -40,12 +42,15 @@ end
- `target_qnetwork2`,
- `start_policy`,
- `γ = 0.99f0`,
- `ρ = 0.995f0`,
- `τ = 0.005f0`,
- `α = 0.2f0`,
- `batch_size = 32`,
- `start_steps = 10000`,
- `update_after = 1000`,
- `update_every = 50`,
- `automatic_entropy_tuning::Bool = false`, whether to automatically tune the entropy.
- `lr_alpha::Float32 = 0.003f0`, learning rate of tuning entropy.
- `action_dims = 0`, the dimension of the action. if `automatic_entropy_tuning = true`, must enter this parameter.
- `step = 0`,
- `rng = Random.GLOBAL_RNG`,
Expand All @@ -63,31 +68,40 @@ function SACPolicy(;
target_qnetwork2,
start_policy,
γ = 0.99f0,
ρ = 0.995f0,
τ = 0.005f0,
α = 0.2f0,
batch_size = 32,
start_steps = 10000,
update_after = 1000,
update_every = 50,
automatic_entropy_tuning = true,
lr_alpha = 0.003f0,
action_dims = 0,
step = 0,
rng = Random.GLOBAL_RNG,
)
copyto!(qnetwork1, target_qnetwork1) # force sync
copyto!(qnetwork2, target_qnetwork2) # force sync
if automatic_entropy_tuning
@assert action_dims != 0
end
SACPolicy(
policy,
qnetwork1,
qnetwork2,
target_qnetwork1,
target_qnetwork2,
γ,
ρ,
τ,
α,
batch_size,
start_steps,
start_policy,
update_after,
update_every,
automatic_entropy_tuning,
lr_alpha,
Float32(-action_dims),
step,
rng,
0f0,
Expand Down Expand Up @@ -142,7 +156,7 @@ end
function RLBase.update!(p::SACPolicy, batch::NamedTuple{SARTS})
s, a, r, t, s′ = send_to_device(device(p.qnetwork1), batch)

γ, ρ, α = p.γ, p.ρ, p.α
γ, τ, α = p.γ, p.τ, p.α

a′, log_π = evaluate(p, s′)
q′_input = vcat(s′, a′)
Expand Down Expand Up @@ -179,11 +193,16 @@ function RLBase.update!(p::SACPolicy, batch::NamedTuple{SARTS})
end
update!(p.policy, p_grad)

# Tune entropy automatically
if p.automatic_entropy_tuning
p.α -= p.lr_alpha * mean(-log_π .- p.target_entropy)
end

# polyak averaging
for (dest, src) in zip(
Flux.params([p.target_qnetwork1, p.target_qnetwork2]),
Flux.params([p.qnetwork1, p.qnetwork2]),
)
dest .= ρ .* dest .+ (1 - ρ) .* src
dest .= (1 - τ) .* dest .+ τ .* src
end
end

0 comments on commit dd17ad6

Please sign in to comment.