Rearrange approximator setup

jeremiahpslewis · jeremiahpslewis · commit 81c161fbaef4 · 2024-03-04T10:44:18.000+01:00
diff --git a/src/ReinforcementLearningCore/src/policies/learners.jl b/src/ReinforcementLearningCore/src/policies/learners.jl
diff --git a/src/ReinforcementLearningCore/src/policies/learners/abstract_learner.jl b/src/ReinforcementLearningCore/src/policies/learners/abstract_learner.jl
@@ -0,0 +1,41 @@
+export AbstractLearner, Approximator
+
+using Flux
+using Functors: @functor
+
+abstract type AbstractLearner end
+
+Base.show(io::IO, m::MIME"text/plain", L::AbstractLearner) = show(io, m, convert(AnnotatedStructTree, L))
+
+# Take Learner and Environment, get state, send to RLCore.forward(Learner, State)
+function forward(L::Le, env::E) where {Le <: AbstractLearner, E <: AbstractEnv}
+    env |> state |> Flux.gpu |> (x -> forward(L, x)) |> Flux.cpu
+end
+
+function RLBase.optimise!(::AbstractLearner, ::AbstractStage, ::Trajectory) end
+
+
+"""
+    Approximator(model, optimiser)
+
+Wraps a Flux trainable model and implements the `RLBase.optimise!(::Approximator, ::Gradient)` 
+interface. See the RLCore documentation for more information on proper usage.
+"""
+struct Approximator{M,O} <: AbstractLearner
+    model::M
+    optimiser_state::O
+end
+
+function Approximator(; model, optimiser)
+    optimiser_state = Flux.setup(optimiser, model)
+    Approximator(gpu(model), optimiser_state) # Pass model to GPU (if available) upon creation
+end
+
+Base.show(io::IO, m::MIME"text/plain", A::Approximator) = show(io, m, convert(AnnotatedStructTree, A))
+
+@functor Approximator (model,)
+
+forward(A::Approximator, args...; kwargs...) = A.model(args...; kwargs...)
+
+RLBase.optimise!(A::Approximator, grad) =
+    Flux.Optimise.update!(A.model, A.optimiser_state, grad)
diff --git a/src/ReinforcementLearningCore/src/policies/learners/learners.jl b/src/ReinforcementLearningCore/src/policies/learners/learners.jl
@@ -0,0 +1,3 @@
+include("abstract_learner.jl")
+include("tabular_approximator.jl")
+include("target_network.jl")
diff --git a/src/ReinforcementLearningCore/src/policies/learners/tabular_approximator.jl b/src/ReinforcementLearningCore/src/policies/learners/tabular_approximator.jl
@@ -0,0 +1,49 @@
+export TabularApproximator, TabularVApproximator, TabularQApproximator
+
+using Flux: gpu
+
+"""
+    TabularApproximator(table<:AbstractArray, opt)
+
+For `table` of 1-d, it will serve as a state value approximator.
+For `table` of 2-d, it will serve as a state-action value approximator.
+
+!!! warning
+    For `table` of 2-d, the first dimension is action and the second dimension is state.
+"""
+# TODO: add back missing AbstractApproximator
+struct TabularApproximator{N,A,O} <: AbstractLearner
+    table::A
+    optimizer::O
+    function TabularApproximator(table::A, opt::O) where {A<:AbstractArray,O}
+        n = ndims(table)
+        n <= 2 || throw(ArgumentError("the dimension of table must be <= 2"))
+        new{n,A,O}(table, opt)
+    end
+end
+
+TabularVApproximator(; n_state, init = 0.0, opt = InvDecay(1.0)) =
+    TabularApproximator(fill(init, n_state), opt)
+
+TabularQApproximator(; n_state, n_action, init = 0.0, opt = InvDecay(1.0)) =
+    TabularApproximator(fill(init, n_action, n_state), opt)
+
+# Take Learner and Environment, get state, send to RLCore.forward(Learner, State)
+function forward(L::TabularApproximator, env::E) where {E <: AbstractEnv}
+    env |> state |> (x -> forward(L, x))
+end
+
+RLCore.forward(
+    app::TabularApproximator{1,R,O},
+    s::I,
+) where {R<:AbstractArray,O,I<:Integer} = @views app.table[s]
+
+RLCore.forward(
+    app::TabularApproximator{2,R,O},
+    s::I,
+) where {R<:AbstractArray,O,I<:Integer} = @views app.table[:, s]
+RLCore.forward(
+    app::TabularApproximator{2,R,O},
+    s::I1,
+    a::I2,
+) where {R<:AbstractArray,O,I1<:Integer,I2<:Integer} = @views app.table[a, s]
diff --git a/src/ReinforcementLearningCore/src/policies/learners/target_network.jl b/src/ReinforcementLearningCore/src/policies/learners/target_network.jl
@@ -3,25 +3,6 @@ export Approximator, TargetNetwork, target, model
 using Flux
 
 
-"""
-    Approximator(model, optimiser)
-
-Wraps a Flux trainable model and implements the `RLBase.optimise!(::Approximator, ::Gradient)` 
-interface. See the RLCore documentation for more information on proper usage.
-"""
-Base.@kwdef mutable struct Approximator{M,O}
-    model::M
-    optimiser::O
-end
-
-Base.show(io::IO, m::MIME"text/plain", A::Approximator) = show(io, m, convert(AnnotatedStructTree, A))
-
-@functor Approximator (model,)
-
-forward(A::Approximator, args...; kwargs...) = A.model(args...; kwargs...)
-
-RLBase.optimise!(A::Approximator, gs) = Flux.Optimise.update!(A.optimiser, Flux.params(A), gs)
-
 target(ap::Approximator) = ap.model #see TargetNetwork
 model(ap::Approximator) = ap.model #see TargetNetwork
 
@@ -68,7 +49,7 @@ target(tn::TargetNetwork) = tn.target
 
 function RLBase.optimise!(tn::TargetNetwork, gs)
     A = tn.network
-    Flux.Optimise.update!(A.optimiser, Flux.params(A), gs)
+    Flux.Optimise.update!(A.optimiser_state, Flux.params(A), gs)
     tn.n_optimise += 1
 
     if tn.n_optimise % tn.sync_freq == 0
diff --git a/src/ReinforcementLearningCore/src/policies/policies.jl b/src/ReinforcementLearningCore/src/policies/policies.jl
@@ -1,6 +1,5 @@
 include("agent/agent.jl")
 include("random_policy.jl")
+include("learners/learners.jl")
 include("explorers/explorers.jl")
-include("learners.jl")
 include("q_based_policy.jl")
-include("approximator.jl")

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+include("abstract_learner.jl")`
	`2`	`+include("tabular_approximator.jl")`
	`3`	`+include("target_network.jl")`