diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 01804ca..35a34d1 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -18,8 +18,8 @@ jobs: fail-fast: false matrix: version: - - '1.9' - '1' + - '^1.11.0-alpha' - 'nightly' os: - ubuntu-latest diff --git a/Project.toml b/Project.toml index 59e64e8..193d5a0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "ReinforcementLearningTrajectories" uuid = "6486599b-a3cd-4e92-a99a-2cea90cc8c3c" -version = "0.3.7" +version = "0.4" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" diff --git a/src/common/ElasticArraySARTTraces.jl b/src/common/ElasticArraySARTSATraces.jl similarity index 69% rename from src/common/ElasticArraySARTTraces.jl rename to src/common/ElasticArraySARTSATraces.jl index 1090ece..0644396 100644 --- a/src/common/ElasticArraySARTTraces.jl +++ b/src/common/ElasticArraySARTSATraces.jl @@ -1,8 +1,6 @@ -export ElasticArraySARTTraces +export ElasticArraySARTSATraces -using ElasticArrays: ElasticArray, resize_lastdim! - -const ElasticArraySARTTraces = Traces{ +const ElasticArraySARTSATraces = Traces{ SS′AA′RT, <:Tuple{ <:MultiplexTraces{SS′,<:Trace{<:ElasticArray}}, @@ -12,7 +10,7 @@ const ElasticArraySARTTraces = Traces{ } } -function ElasticArraySARTTraces(; +function ElasticArraySARTSATraces(; state=Int => (), action=Int => (), reward=Float32 => (), @@ -31,10 +29,3 @@ function ElasticArraySARTTraces(; ) end -##### -# extensions for ElasticArrays -##### - -Base.push!(a::ElasticArray, x) = append!(a, x) -Base.push!(a::ElasticArray{T,1}, x) where {T} = append!(a, [x]) -Base.empty!(a::ElasticArray) = resize_lastdim!(a, 0) \ No newline at end of file diff --git a/src/common/ElasticArraySARTSTraces.jl b/src/common/ElasticArraySARTSTraces.jl new file mode 100644 index 0000000..c833882 --- /dev/null +++ b/src/common/ElasticArraySARTSTraces.jl @@ -0,0 +1,30 @@ +export ElasticArraySARTSTraces + +const ElasticArraySARTSTraces = Traces{ + SS′ART, + <:Tuple{ + <:MultiplexTraces{SS′,<:Trace{<:ElasticArray}}, + <:Trace{<:ElasticArray}, + <:Trace{<:ElasticArray}, + <:Trace{<:ElasticArray}, + } +} + +function ElasticArraySARTSTraces(; + state=Int => (), + action=Int => (), + reward=Float32 => (), + terminal=Bool => ()) + + state_eltype, state_size = state + action_eltype, action_size = action + reward_eltype, reward_size = reward + terminal_eltype, terminal_size = terminal + + MultiplexTraces{SS′}(ElasticArray{state_eltype}(undef, state_size..., 0)) + + Traces( + action = ElasticArray{action_eltype}(undef, action_size..., 0), + reward=ElasticArray{reward_eltype}(undef, reward_size..., 0), + terminal=ElasticArray{terminal_eltype}(undef, terminal_size..., 0), + ) +end diff --git a/src/common/ElasticArraySLARTTraces.jl b/src/common/ElasticArraySLARTTraces.jl new file mode 100644 index 0000000..517eb7f --- /dev/null +++ b/src/common/ElasticArraySLARTTraces.jl @@ -0,0 +1,35 @@ +export ElasticArraySLARTTraces + +const ElasticArraySLARTTraces = Traces{ + SS′LL′AA′RT, + <:Tuple{ + <:MultiplexTraces{SS′,<:Trace{<:ElasticArray}}, + <:MultiplexTraces{LL′,<:Trace{<:ElasticArray}}, + <:MultiplexTraces{AA′,<:Trace{<:ElasticArray}}, + <:Trace{<:ElasticArray}, + <:Trace{<:ElasticArray}, + } +} + +function ElasticArraySLARTTraces(; + capacity::Int, + state=Int => (), + legal_actions_mask=Bool => (), + action=Int => (), + reward=Float32 => (), + terminal=Bool => () +) + state_eltype, state_size = state + action_eltype, action_size = action + legal_actions_mask_eltype, legal_actions_mask_size = legal_actions_mask + reward_eltype, reward_size = reward + terminal_eltype, terminal_size = terminal + + MultiplexTraces{SS′}(ElasticArray{state_eltype}(undef, state_size..., 0)) + + MultiplexTraces{LL′}(ElasticArray{legal_actions_mask_eltype}(undef, legal_actions_mask_size..., 0)) + + MultiplexTraces{AA′}(ElasticArray{action_eltype}(undef, action_size..., 0)) + + Traces( + reward=ElasticArray{reward_eltype}(undef, reward_size..., 0), + terminal=ElasticArray{terminal_eltype}(undef, terminal_size..., 0), + ) +end diff --git a/src/common/common.jl b/src/common/common.jl index b334f7a..f8b4023 100644 --- a/src/common/common.jl +++ b/src/common/common.jl @@ -14,4 +14,7 @@ include("CircularArraySARTSTraces.jl") include("CircularArraySARTSATraces.jl") include("CircularArraySLARTTraces.jl") include("CircularPrioritizedTraces.jl") -include("ElasticArraySARTTraces.jl") +include("common_elastic_array.jl") +include("ElasticArraySARTSTraces.jl") +include("ElasticArraySARTSATraces.jl") +include("ElasticArraySLARTTraces.jl") diff --git a/src/common/common_elastic_array.jl b/src/common/common_elastic_array.jl new file mode 100644 index 0000000..01b4090 --- /dev/null +++ b/src/common/common_elastic_array.jl @@ -0,0 +1,9 @@ +using ElasticArrays: ElasticArray, resize_lastdim! + +##### +# extensions for ElasticArrays +##### + +Base.push!(a::ElasticArray, x) = append!(a, x) +Base.push!(a::ElasticArray{T,1}, x) where {T} = append!(a, [x]) +Base.empty!(a::ElasticArray) = resize_lastdim!(a, 0) diff --git a/src/episodes.jl b/src/episodes.jl index 0113a1c..e93fb92 100644 --- a/src/episodes.jl +++ b/src/episodes.jl @@ -1,5 +1,6 @@ export EpisodesBuffer, PartialNamedTuple import DataStructures.CircularBuffer +using ElasticArrays: ElasticArray, ElasticVector """ EpisodesBuffer(traces::AbstractTraces) @@ -68,12 +69,20 @@ function EpisodesBuffer(traces::AbstractTraces) end end -Base.getindex(es::EpisodesBuffer, idx...) = getindex(es.traces, idx...) -Base.setindex!(es::EpisodesBuffer, idx...) = setindex!(es.traces, idx...) -Base.size(es::EpisodesBuffer) = size(es.traces) -Base.length(es::EpisodesBuffer) = length(es.traces) -Base.keys(es::EpisodesBuffer) = keys(es.traces) -Base.keys(es::EpisodesBuffer{<:Any,<:Any,<:CircularPrioritizedTraces}) = keys(es.traces.traces) +function Base.getindex(es::EpisodesBuffer, idx::Int...) + @boundscheck all(es.sampleable_inds[idx...]) + getindex(es.traces, idx...) +end + +function Base.getindex(es::EpisodesBuffer, idx...) + getindex(es.traces, idx...) +end + +Base.setindex!(eb::EpisodesBuffer, idx...) = setindex!(eb.traces, idx...) +Base.size(eb::EpisodesBuffer) = size(eb.traces) +Base.length(eb::EpisodesBuffer) = length(eb.traces) +Base.keys(eb::EpisodesBuffer) = keys(eb.traces) +Base.keys(eb::EpisodesBuffer{<:Any,<:Any,<:CircularPrioritizedTraces}) = keys(eb.traces.traces) function Base.show(io::IO, m::MIME"text/plain", eb::EpisodesBuffer{names}) where {names} s = nameof(typeof(eb)) t = eb.traces @@ -82,7 +91,7 @@ function Base.show(io::IO, m::MIME"text/plain", eb::EpisodesBuffer{names}) where end ispartial_insert(traces::Traces, xs) = length(xs) < length(traces.traces) #this is the number of traces it contains not the number of steps. -ispartial_insert(es::EpisodesBuffer, xs) = ispartial_insert(es.traces, xs) +ispartial_insert(eb::EpisodesBuffer, xs) = ispartial_insert(eb.traces, xs) ispartial_insert(traces::CircularPrioritizedTraces, xs) = ispartial_insert(traces.traces, xs) function pad!(trace::Trace) @@ -90,6 +99,8 @@ function pad!(trace::Trace) return nothing end +pad!(vect::ElasticArray{T, Vector{T}}) where {T} = push!(vect, zero(T)) +pad!(vect::ElasticVector{T, Vector{T}}) where {T} = push!(vect, zero(T)) pad!(buf::CircularArrayBuffer{T,N,A}) where {T,N,A} = push!(buf, zero(T)) pad!(vect::Vector{T}) where {T} = push!(vect, zero(T)) @@ -123,9 +134,9 @@ pad!(vect::Vector{T}) where {T} = push!(vect, zero(T)) return :($ex) end -fill_multiplex(es::EpisodesBuffer) = fill_multiplex(es.traces) +fill_multiplex(eb::EpisodesBuffer) = fill_multiplex(eb.traces) -fill_multiplex(es::EpisodesBuffer{<:Any,<:Any,<:CircularPrioritizedTraces}) = fill_multiplex(es.traces.traces) +fill_multiplex(eb::EpisodesBuffer{<:Any,<:Any,<:CircularPrioritizedTraces}) = fill_multiplex(eb.traces.traces) function Base.push!(eb::EpisodesBuffer, xs::NamedTuple) push!(eb.traces, xs) @@ -162,17 +173,17 @@ function Base.push!(eb::EpisodesBuffer, xs::PartialNamedTuple) #wrap a NamedTupl end for f in (:pop!, :popfirst!) - @eval function Base.$f(es::EpisodesBuffer) - $f(es.episodes_lengths) - $f(es.sampleable_inds) - $f(es.step_numbers) - $f(es.traces) + @eval function Base.$f(eb::EpisodesBuffer) + $f(eb.episodes_lengths) + $f(eb.sampleable_inds) + $f(eb.step_numbers) + $f(eb.traces) end end -function Base.empty!(es::EpisodesBuffer) - empty!(es.traces) - empty!(es.episodes_lengths) - empty!(es.sampleable_inds) - empty!(es.step_numbers) +function Base.empty!(eb::EpisodesBuffer) + empty!(eb.traces) + empty!(eb.episodes_lengths) + empty!(eb.sampleable_inds) + empty!(eb.step_numbers) end diff --git a/src/traces.jl b/src/traces.jl index 9676c03..1cd36bb 100644 --- a/src/traces.jl +++ b/src/traces.jl @@ -3,6 +3,7 @@ export Trace, Traces, MultiplexTraces import MacroTools: @forward import CircularArrayBuffers.CircularArrayBuffer +using ElasticArrays: ElasticArray import Adapt ##### @@ -55,6 +56,7 @@ Base.setindex!(s::Trace, v, I) = setindex!(s.parent, v, ntuple(i -> i == ndims(s capacity(t::AbstractTrace) = ReinforcementLearningTrajectories.capacity(t.parent) capacity(t::CircularArrayBuffer) = CircularArrayBuffers.capacity(t) capacity(::AbstractVector) = Inf +capacity(::ElasticArray) = Inf ##### diff --git a/test/common.jl b/test/common.jl index 06e69a8..afcc89b 100644 --- a/test/common.jl +++ b/test/common.jl @@ -94,15 +94,15 @@ end @test batch.terminal == Bool[0, 0, 0] |> gpu end -@testset "ElasticArraySARTTraces" begin - t = ElasticArraySARTTraces(; +@testset "ElasticArraySARTSTraces" begin + t = ElasticArraySARTSTraces(; state=Float32 => (2, 3), action=Int => (), reward=Float32 => (), terminal=Bool => () ) - @test t isa ElasticArraySARTTraces + @test t isa ElasticArraySARTSTraces push!(t, (state=ones(Float32, 2, 3), action=1)) push!(t, (reward=1.0f0, terminal=false, state=ones(Float32, 2, 3) * 2, action=2)) @@ -185,4 +185,4 @@ end eb[:priority, [1, 2]] = [0, 0] @test eb[:priority] == [zeros(2);ones(8)] -end \ No newline at end of file +end diff --git a/test/episodes.jl b/test/episodes.jl index 8244b19..ef7855c 100644 --- a/test/episodes.jl +++ b/test/episodes.jl @@ -1,6 +1,7 @@ using ReinforcementLearningTrajectories using CircularArrayBuffers using Test + @testset "EpisodesBuffer" begin @testset "with circular traces" begin eb = EpisodesBuffer( @@ -113,6 +114,8 @@ using Test @test eb.episodes_lengths[end] == 0 @test eb.step_numbers[end] == 1 @test eb.sampleable_inds == [1,1,1,1,1,0,0] + @test eb[:action][6] == 6 + @test eb[:next_action][5] == 6 @test eb[6][:reward] == 0 #6 is not a valid index, the reward there is dummy, filled as zero ep2_len = 0 for (j,i) = enumerate(8:11) @@ -197,4 +200,180 @@ using Test @test eb.step_numbers == [1:16;1:16] @test length(eb) == 31 end + @testset "with ElasticArraySARTSTraces traces" begin + eb = EpisodesBuffer( + ElasticArraySARTSTraces() + ) + #push a first episode l=5 + push!(eb, (state = 1,)) + @test eb.sampleable_inds[end] == 0 + @test eb.episodes_lengths[end] == 0 + @test eb.step_numbers[end] == 1 + for i = 1:5 + push!(eb, (state = i+1, action =i, reward = i, terminal = false)) + @test eb.sampleable_inds[end] == 0 + @test eb.sampleable_inds[end-1] == 1 + @test eb.step_numbers[end] == i + 1 + @test eb.episodes_lengths[end-i:end] == fill(i, i+1) + end + @test eb.sampleable_inds == [1,1,1,1,1,0] + @test length(eb.traces) == 5 + #start new episode of 6 periods. + push!(eb, (state = 7,)) + @test eb.sampleable_inds[end] == 0 + @test eb.sampleable_inds[end-1] == 0 + @test eb.episodes_lengths[end] == 0 + @test eb.step_numbers[end] == 1 + @test eb.sampleable_inds == [1,1,1,1,1,0,0] + @test eb[6][:reward] == 0 #6 is not a valid index, the reward there is filled as zero + ep2_len = 0 + for (j,i) = enumerate(8:11) + ep2_len += 1 + push!(eb, (state = i, action =i-1, reward = i-1, terminal = false)) + @test eb.sampleable_inds[end] == 0 + @test eb.sampleable_inds[end-1] == 1 + @test eb.step_numbers[end] == j + 1 + @test eb.episodes_lengths[end-j:end] == fill(ep2_len, ep2_len + 1) + end + @test eb.sampleable_inds == [1,1,1,1,1,0,1,1,1,1,0] + @test length(eb.traces) == 10 + + for (i, s) = enumerate(12:13) + ep2_len += 1 + push!(eb, (state = s, action =s-1, reward = s-1, terminal = false)) + @test eb.sampleable_inds[end] == 0 + @test eb.sampleable_inds[end-1] == 1 + @test eb.step_numbers[end] == i + 1 + 4 + @test eb.episodes_lengths[end-ep2_len:end] == fill(ep2_len, ep2_len + 1) + end + #episode 1 + for i in 3:13 + if i in (6, 13) + @test eb.sampleable_inds[i] == 0 + continue + else + @test eb.sampleable_inds[i] == 1 + end + b = eb[i] + @test b[:state] == b[:action] == b[:reward] == i + @test b[:next_state] == i + 1 + end + #episode 2 + #start a third episode + push!(eb, (state = 14, )) + @test eb.sampleable_inds[end] == 0 + @test eb.sampleable_inds[end-1] == 0 + @test eb.episodes_lengths[end] == 0 + @test eb.step_numbers[end] == 1 + + for (i,s) in enumerate(15:26) + push!(eb, (state = s, action =s-1, reward = s-1, terminal = false)) + end + @test eb.sampleable_inds[end-5:end] == [fill(true, 5); [false]] + @test eb.episodes_lengths[end-10:end] == fill(length(15:26), 11) + @test eb.step_numbers[end-10:end] == [3:13;] + #= Deactivated until https://github.com/JuliaArrays/ElasticArrays.jl/pull/56/files merged and pop!/popfirst! added to ElasticArrays + step = popfirst!(eb) + @test length(eb) == length(eb.sampleable_inds) - 1 == length(eb.step_numbers) - 1 == length(eb.episodes_lengths) - 1 == 9 + @test first(eb.step_numbers) == 4 + step = pop!(eb) + @test length(eb) == length(eb.sampleable_inds) - 1 == length(eb.step_numbers) - 1 == length(eb.episodes_lengths) - 1 == 8 + @test last(eb.step_numbers) == 12 + @test size(eb) == size(eb.traces) == (8,) + empty!(eb) + @test size(eb) == (0,) == size(eb.traces) == size(eb.sampleable_inds) == size(eb.episodes_lengths) == size(eb.step_numbers) + show(eb); + =# + end + + @testset "ElasticArraySARTSATraces with PartialNamedTuple" begin + eb = EpisodesBuffer( + ElasticArraySARTSATraces() + ) + #push a first episode l=5 + push!(eb, (state = 1,)) + @test eb.sampleable_inds[end] == 0 + @test eb.episodes_lengths[end] == 0 + @test eb.step_numbers[end] == 1 + for i = 1:5 + push!(eb, (state = i+1, action =i, reward = i, terminal = false)) + @test eb.sampleable_inds[end] == 0 + @test eb.sampleable_inds[end-1] == 1 + @test eb.step_numbers[end] == i + 1 + @test eb.episodes_lengths[end-i:end] == fill(i, i+1) + end + push!(eb, PartialNamedTuple((action = 6,))) + @test eb.sampleable_inds == [1,1,1,1,1,0] + @test length(eb.traces) == 5 + #start new episode of 6 periods. + push!(eb, (state = 7,)) + @test eb.sampleable_inds[end] == 0 + @test eb.sampleable_inds[end-1] == 0 + @test eb.episodes_lengths[end] == 0 + @test eb.step_numbers[end] == 1 + @test eb.sampleable_inds == [1,1,1,1,1,0,0] + @test eb[:action][6] == 6 + @test eb[:next_action][5] == 6 + @test eb[:reward][6] == 0 #6 is not a valid index, the reward there is dummy, filled as zero + ep2_len = 0 + for (j,i) = enumerate(8:11) + ep2_len += 1 + push!(eb, (state = i, action =i-1, reward = i-1, terminal = false)) + @test eb.sampleable_inds[end] == 0 + @test eb.sampleable_inds[end-1] == 1 + @test eb.step_numbers[end] == j + 1 + @test eb.episodes_lengths[end-j:end] == fill(ep2_len, ep2_len + 1) + end + @test eb.sampleable_inds == [1,1,1,1,1,0,1,1,1,1,0] + @test length(eb.traces) == 9 #an action is missing at this stage + for (i, s) = enumerate(12:13) + ep2_len += 1 + push!(eb, (state = s, action =s-1, reward = s-1, terminal = false)) + @test eb.sampleable_inds[end] == 0 + @test eb.sampleable_inds[end-1] == 1 + @test eb.step_numbers[end] == i + 1 + 4 + @test eb.episodes_lengths[end-ep2_len:end] == fill(ep2_len, ep2_len + 1) + end + push!(eb, PartialNamedTuple((action = 13,))) + @test length(eb.traces) == 12 + #episode 1 + for i in 1:13 + if i in (6, 13) + @test eb.sampleable_inds[i] == 0 + continue + else + @test eb.sampleable_inds[i] == 1 + end + b = eb[i] + @test b[:state] == b[:action] == b[:reward] == i + @test b[:next_state] == b[:next_action] == i + 1 + end + #episode 2 + #start a third episode + push!(eb, (state = 14,)) + @test eb.sampleable_inds[end] == 0 + @test eb.sampleable_inds[end-1] == 0 + @test eb.episodes_lengths[end] == 0 + @test eb.step_numbers[end] == 1 + #push until it reaches it own start + for (i,s) in enumerate(15:26) + push!(eb, (state = s, action =s-1, reward = s-1, terminal = false)) + end + push!(eb, PartialNamedTuple((action = 26,))) + @test eb.sampleable_inds[end-10:end] == [fill(true, 10); [false]] + @test eb.episodes_lengths[end-10:end] == fill(length(15:26), 11) + @test eb.step_numbers[end-10:end] == [3:13;] + #= Deactivated until https://github.com/JuliaArrays/ElasticArrays.jl/pull/56/files merged and pop!/popfirst! added to ElasticArrays + step = popfirst!(eb) + @test length(eb) == length(eb.sampleable_inds) - 1 == length(eb.step_numbers) - 1 == length(eb.episodes_lengths) - 1 == 9 + @test first(eb.step_numbers) == 4 + step = pop!(eb) + @test length(eb) == length(eb.sampleable_inds) - 1 == length(eb.step_numbers) - 1 == length(eb.episodes_lengths) - 1 == 8 + @test last(eb.step_numbers) == 12 + @test size(eb) == size(eb.traces) == (8,) + empty!(eb) + @test size(eb) == (0,) == size(eb.traces) == size(eb.sampleable_inds) == size(eb.episodes_lengths) == size(eb.step_numbers) + show(eb); + =# + end end diff --git a/test/traces.jl b/test/traces.jl index 63003be..f30b311 100644 --- a/test/traces.jl +++ b/test/traces.jl @@ -137,6 +137,24 @@ using ReinforcementLearningTrajectories: build_trace_index build_trace_index(typeof(t2).parameters[1], typeof(t2).parameters[2]) end +@testset "build_trace_index ElasticArraySARTSATraces" begin + t1 = ElasticArraySARTSATraces(; + state=Float32 => (2, 3), + action=Float32 => (2,), + reward=Float32 => (), + terminal=Bool => () + ) + @test build_trace_index(typeof(t1).parameters[1], typeof(t1).parameters[2]) == Dict(:reward => 3, + :next_state => 1, + :state => 1, + :action => 2, + :next_action => 2, + :terminal => 4) + + t2 = Traces(; a=[2, 3], b=[false, true]) + build_trace_index(typeof(t2).parameters[1], typeof(t2).parameters[2]) +end + @testset "push!(ts::Traces{names,Trs,N,E}, ::Val{k}, v)" begin t1 = CircularArraySARTSATraces(; capacity=3, @@ -152,6 +170,29 @@ end @test size(Base.getindex(t1, 1).state) == (2,3) + t2 = Traces(; a=[2, 3], b=[false, true]) + push!(t2, Val(:a), 5) + @test t2[:a][3] == 5 + + @test size(Base.getindex(t2, :a)) == (3,) + @test Base.getindex(t2, 1) == (; a = 2, b= false) +end + + +@testset "push!(ts::Traces{names,Trs,N,E}, ::Val{k}, v)" begin + t1 = ElasticArraySARTSATraces( + state=Float32 => (2, 3), + action=Float32 => (2,), + reward=Float32 => (), + terminal=Bool => () + ) + push!(t1, Val(:reward), 5) + @test t1[:reward][1] == 5 + + @test size(Base.getindex(t1, :reward)) == (1,) + @test size(Base.getindex(t1, :state)) == (0,) + + t2 = Traces(; a=[2, 3], b=[false, true]) push!(t2, Val(:a), 5) @test t2[:a][3] == 5