diff --git a/src/common/CircularArraySARTSATraces.jl b/src/common/CircularArraySARTSATraces.jl index 393e64b..53678b7 100644 --- a/src/common/CircularArraySARTSATraces.jl +++ b/src/common/CircularArraySARTSATraces.jl @@ -24,12 +24,12 @@ function CircularArraySARTSATraces(; reward_eltype, reward_size = reward terminal_eltype, terminal_size = terminal - MultiplexTraces{SS′}(CircularArrayBuffer{state_eltype}(state_size..., capacity+2)) + + MultiplexTraces{SS′}(CircularArrayBuffer{state_eltype}(state_size..., capacity+1)) + MultiplexTraces{AA′}(CircularArrayBuffer{action_eltype}(action_size..., capacity+1)) + Traces( - reward=CircularArrayBuffer{reward_eltype}(reward_size..., capacity+1), - terminal=CircularArrayBuffer{terminal_eltype}(terminal_size..., capacity+1), + reward=CircularArrayBuffer{reward_eltype}(reward_size..., capacity), + terminal=CircularArrayBuffer{terminal_eltype}(terminal_size..., capacity), ) end -CircularArrayBuffers.capacity(t::CircularArraySARTSATraces) = CircularArrayBuffers.capacity(minimum(map(capacity,t.traces))) +CircularArrayBuffers.capacity(t::CircularArraySARTSATraces) = minimum(map(capacity,t.traces)) diff --git a/src/common/CircularArraySARTSTraces.jl b/src/common/CircularArraySARTSTraces.jl index acf999c..eb43038 100644 --- a/src/common/CircularArraySARTSTraces.jl +++ b/src/common/CircularArraySARTSTraces.jl @@ -17,8 +17,8 @@ function CircularArraySARTSTraces(; state=Int => (), action=Int => (), reward=Float32 => (), - terminal=Bool => ()) - + terminal=Bool => () +) state_eltype, state_size = state action_eltype, action_size = action reward_eltype, reward_size = reward @@ -32,4 +32,4 @@ function CircularArraySARTSTraces(; ) end -CircularArrayBuffers.capacity(t::CircularArraySARTSTraces) = CircularArrayBuffers.capacity(minimum(map(capacity,t.traces))) +CircularArrayBuffers.capacity(t::CircularArraySARTSTraces) = minimum(map(capacity,t.traces)) diff --git a/src/common/CircularArraySLARTTraces.jl b/src/common/CircularArraySLARTTraces.jl index 73f6da9..0677906 100644 --- a/src/common/CircularArraySLARTTraces.jl +++ b/src/common/CircularArraySLARTTraces.jl @@ -34,4 +34,4 @@ function CircularArraySLARTTraces(; ) end -CircularArrayBuffers.capacity(t::CircularArraySLARTTraces) = CircularArrayBuffers.capacity(minimum(map(capacity,t.traces))) \ No newline at end of file +CircularArrayBuffers.capacity(t::CircularArraySLARTTraces) = minimum(map(capacity,t.traces)) \ No newline at end of file diff --git a/src/common/CircularPrioritizedTraces.jl b/src/common/CircularPrioritizedTraces.jl index 09b2ffd..76581af 100644 --- a/src/common/CircularPrioritizedTraces.jl +++ b/src/common/CircularPrioritizedTraces.jl @@ -12,11 +12,7 @@ end function CircularPrioritizedTraces(traces::AbstractTraces{names,Ts}; default_priority) where {names,Ts} new_names = (:key, :priority, names...) new_Ts = Tuple{Int,Float32,Ts.parameters...} - if traces isa CircularArraySARTSATraces - c = capacity(traces) - 1 - else - c = capacity(traces) - end + c = capacity(traces) CircularPrioritizedTraces{typeof(traces),new_names,new_Ts}( CircularVectorBuffer{Int}(c), SumTree(c), @@ -38,22 +34,6 @@ function Base.push!(t::CircularPrioritizedTraces, x) end end -function Base.push!(t::CircularPrioritizedTraces{<:CircularArraySARTSATraces}, x) - initial_length = length(t.traces) - push!(t.traces, x) - if length(t.traces) == 1 - push!(t.keys, 1) - push!(t.priorities, t.default_priority) - elseif length(t.traces) > 1 && (initial_length < length(t.traces) || initial_length == capacity(t.traces)-1 ) - # only add a key if the length changes after insertion of the tuple - # or if the trace is already at capacity - push!(t.keys, t.keys[end] + 1) - push!(t.priorities, t.default_priority) - else - # may be partial inserting at the first step, ignore it - end -end - function Base.setindex!(t::CircularPrioritizedTraces, vs, k::Symbol, keys) if k === :priority @assert length(vs) == length(keys) diff --git a/src/episodes.jl b/src/episodes.jl index 90a8b79..d4314d7 100644 --- a/src/episodes.jl +++ b/src/episodes.jl @@ -5,9 +5,9 @@ using ElasticArrays: ElasticArray, ElasticVector """ EpisodesBuffer(traces::AbstractTraces) -Wraps an `AbstractTraces` object, usually the container of a `Trajectory`. +Wraps an `AbstractTraces` object, usually the container of a `Trajectory`. `EpisodesBuffer` tracks the indexes of the `traces` object that belong to the same episodes. -To that end, it stores +To that end, it stores 1. an vector `sampleable_inds` of Booleans that determine whether an index in Traces is legally sampleable (i.e., it is not the index of a last state of an episode); 2. a vector `episodes_lengths` that contains the total duration of the episode that each step belong to; @@ -32,7 +32,7 @@ end """ PartialNamedTuple(::NamedTuple) -Wraps a NamedTuple to signal an EpisodesBuffer that it is pushed into that it should +Wraps a NamedTuple to signal an EpisodesBuffer that it is pushed into that it should ignore the fact that this is a partial insertion. Used at the end of an episode to complete multiplex traces before moving to the next episode. """ @@ -43,15 +43,13 @@ end # Capacity of an EpisodesBuffer is the capacity of the underlying traces + 1 for certain cases function is_capacity_plus_one(traces::AbstractTraces) if any(t->t isa MultiplexTraces, traces.traces) - # MultiplexTraces buffer next_state and next_action, so we need to add one to the capacity - return true - elseif traces isa CircularPrioritizedTraces - # CircularPrioritizedTraces buffer next_state and next_action, so we need to add one to the capacity + # MultiplexTraces buffer next_state or next_action, so we need to add one to the capacity return true else false end end +is_capacity_plus_one(traces::CircularPrioritizedTraces) = is_capacity_plus_one(traces.traces) function EpisodesBuffer(traces::AbstractTraces) cap = is_capacity_plus_one(traces) ? capacity(traces) + 1 : capacity(traces) @@ -70,7 +68,7 @@ function EpisodesBuffer(traces::AbstractTraces) end function Base.getindex(es::EpisodesBuffer, idx::Int...) - @boundscheck all(es.sampleable_inds[idx...]) + @boundscheck all(es.sampleable_inds[idx...]) || throw(BoundsError(es.sampleable_inds, idx)) getindex(es.traces, idx...) end @@ -79,6 +77,7 @@ function Base.getindex(es::EpisodesBuffer, idx...) end Base.setindex!(eb::EpisodesBuffer, idx...) = setindex!(eb.traces, idx...) +capacity(eb::EpisodesBuffer) = capacity(eb.traces) Base.size(eb::EpisodesBuffer) = size(eb.traces) Base.length(eb::EpisodesBuffer) = length(eb.traces) Base.keys(eb::EpisodesBuffer) = keys(eb.traces) @@ -118,8 +117,6 @@ pad!(vect::Vector{T}) where {T} = push!(vect, zero(T)) end elseif traces_signature <: Tuple traces_signature = traces_signature.parameters - - for tr in traces_signature if !(tr <: MultiplexTraces) #push a duplicate of last element as a dummy element, should never be sampled. @@ -148,7 +145,7 @@ function Base.push!(eb::EpisodesBuffer, xs::NamedTuple) push!(eb.episodes_lengths, 0) push!(eb.sampleable_inds, 0) elseif !partial #typical inserting - if haskey(eb,:next_action) && length(eb) < max_length(eb) # if trace has next_action and lengths are mismatched + if haskey(eb,:next_action) # if trace has next_action if eb.step_numbers[end] > 1 # and if there are sufficient steps in the current episode eb.sampleable_inds[end-1] = 1 # steps are indexable one step later end @@ -171,33 +168,11 @@ function Base.push!(eb::EpisodesBuffer, xs::NamedTuple) return nothing end -function Base.push!(eb::EpisodesBuffer, xs::PartialNamedTuple) #wrap a NamedTuple to push without incrementing the step number. - push!(eb.traces, xs.namedtuple) - eb.sampleable_inds[end-1] = 1 #completes the episode trajectory. -end - -function Base.push!(eb::EpisodesBuffer{<:Any,<:Any,<:CircularArraySARTSATraces}, xs::PartialNamedTuple) - if max_length(eb) == capacity(eb.traces) - popfirst!(eb) - end +function Base.push!(eb::EpisodesBuffer, xs::PartialNamedTuple) #wrap a NamedTuple to push without incrementing the step number. push!(eb.traces, xs.namedtuple) eb.sampleable_inds[end-1] = 1 #completes the episode trajectory. end -function Base.push!(eb::EpisodesBuffer{<:Any,<:Any,<:CircularPrioritizedTraces{<:CircularArraySARTSATraces}}, xs::PartialNamedTuple{@NamedTuple{action::Int64}}) - if max_length(eb) == capacity(eb.traces) - addition = (name => zero(eltype(eb.traces[name])) for name in [:state, :reward, :terminal]) - xs = merge(xs.namedtuple, addition) - push!(eb.traces, xs) - pop!(eb.traces[:state].trace) - pop!(eb.traces[:reward]) - pop!(eb.traces[:terminal]) - else - push!(eb.traces, xs.namedtuple) - eb.sampleable_inds[end-1] = 1 - end -end - for f in (:pop!, :popfirst!) @eval function Base.$f(eb::EpisodesBuffer) $f(eb.episodes_lengths) diff --git a/src/samplers.jl b/src/samplers.jl index e5443a7..21628bd 100644 --- a/src/samplers.jl +++ b/src/samplers.jl @@ -93,10 +93,10 @@ export MetaSampler """ MetaSampler(::NamedTuple) -Wraps a NamedTuple containing multiple samplers. When sampled, returns a named tuple with a +Wraps a NamedTuple containing multiple samplers. When sampled, returns a named tuple with a batch from each sampler. Used internally for algorithms that sample multiple times per epoch. -Note that a single "sampling" with a MetaSampler only increases the Trajectory controler +Note that a single "sampling" with a MetaSampler only increases the Trajectory controler count by 1, not by the number of internal samplers. This should be taken into account when initializing an agent. @@ -131,15 +131,15 @@ export MultiBatchSampler """ MultiBatchSampler(sampler, n) -Wraps a sampler. When sampled, will sample n batches using sampler. Useful in combination +Wraps a sampler. When sampled, will sample n batches using sampler. Useful in combination with MetaSampler to allow different sampling rates between samplers. -Note that a single "sampling" with a MultiBatchSampler only increases the Trajectory +Note that a single "sampling" with a MultiBatchSampler only increases the Trajectory controler count by 1, not by `n`. This should be taken into account when initializing an agent. # Example ``` -MetaSampler(policy = MultiBatchSampler(BatchSampler(10), 3), +MetaSampler(policy = MultiBatchSampler(BatchSampler(10), 3), critic = MultiBatchSampler(BatchSampler(100), 5)) ``` """ @@ -169,13 +169,13 @@ export NStepBatchSampler NStepBatchSampler{names}(; n, γ, batchsize=32, stacksize=nothing, rng=Random.GLOBAL_RNG) Used to sample a discounted sum of consecutive rewards in the framework of n-step TD learning. -The "next" element of Multiplexed traces (such as the next_state or the next_action) will be +The "next" element of Multiplexed traces (such as the next_state or the next_action) will be that in up to `n > 1` steps later in the buffer. The reward will be the discounted sum of the `n` rewards, with `γ` as the discount factor. -NStepBatchSampler may also be used with n ≥ 1 to sample a "stack" of states if `stacksize` is set +NStepBatchSampler may also be used with n ≥ 1 to sample a "stack" of states if `stacksize` is set to an integer > 1. This samples the (stacksize - 1) previous states. This is useful in the case -of partial observability, for example when the state is approximated by `stacksize` consecutive +of partial observability, for example when the state is approximated by `stacksize` consecutive frames. """ mutable struct NStepBatchSampler{names, S <: Union{Nothing,Int}, R <: AbstractRNG} @@ -187,17 +187,17 @@ mutable struct NStepBatchSampler{names, S <: Union{Nothing,Int}, R <: AbstractRN end NStepBatchSampler(t::AbstractTraces; kw...) = NStepBatchSampler{keys(t)}(; kw...) -function NStepBatchSampler{names}(; n, γ, batchsize=32, stacksize=nothing, rng=Random.default_rng()) where {names} +function NStepBatchSampler{names}(; n, γ, batchsize=32, stacksize=nothing, rng=Random.default_rng()) where {names} @assert n >= 1 "n must be ≥ 1." ss = stacksize == 1 ? nothing : stacksize NStepBatchSampler{names, typeof(ss), typeof(rng)}(n, γ, batchsize, ss, rng) end #return a boolean vector of the valid sample indices given the stacksize and the truncated n for each index. -function valid_range(s::NStepBatchSampler, eb::EpisodesBuffer) +function valid_range(s::NStepBatchSampler, eb::EpisodesBuffer) range = copy(eb.sampleable_inds) ns = Vector{Int}(undef, length(eb.sampleable_inds)) - stacksize = isnothing(s.stacksize) ? 1 : s.stacksize + stacksize = isnothing(s.stacksize) ? 1 : s.stacksize for idx in eachindex(range) step_number = eb.step_numbers[idx] range[idx] = step_number >= stacksize && eb.sampleable_inds[idx] @@ -258,9 +258,9 @@ end """ EpisodesSampler() -A sampler that samples all Episodes present in the Trajectory and divides them into +A sampler that samples all Episodes present in the Trajectory and divides them into Episode containers. Truncated Episodes (e.g. due to the buffer capacity) are sampled as well. -There will be at most one truncated episode and it will always be the first one. +There will be at most one truncated episode and it will always be the first one. """ struct EpisodesSampler{names} end @@ -295,7 +295,7 @@ function StatsBase.sample(::EpisodesSampler, t::EpisodesBuffer, names) idx += 1 end end - + return [make_episode(t, r, names) for r in ranges] end @@ -304,29 +304,29 @@ end """ MultiStepSampler{names}(batchsize, n, stacksize, rng) -Sampler that fetches steps `[x, x+1, ..., x + n -1]` for each trace of each sampled index -`x`. The samples are returned in an array of batchsize elements. For each element, n is -truncated by the end of its episode. This means that the dimensions of each sample are not -the same. +Sampler that fetches steps `[x, x+1, ..., x + n -1]` for each trace of each sampled index +`x`. The samples are returned in an array of batchsize elements. For each element, n is +truncated by the end of its episode. This means that the dimensions of each sample are not +the same. """ struct MultiStepSampler{names, S <: Union{Nothing,Int}, R <: AbstractRNG} n::Int batchsize::Int stacksize::S - rng::R + rng::R end MultiStepSampler(t::AbstractTraces; kw...) = MultiStepSampler{keys(t)}(; kw...) -function MultiStepSampler{names}(; n::Int, batchsize, stacksize=nothing, rng=Random.default_rng()) where {names} +function MultiStepSampler{names}(; n::Int, batchsize, stacksize=nothing, rng=Random.default_rng()) where {names} @assert n >= 1 "n must be ≥ 1." ss = stacksize == 1 ? nothing : stacksize MultiStepSampler{names, typeof(ss), typeof(rng)}(n, batchsize, ss, rng) end -function valid_range(s::MultiStepSampler, eb::EpisodesBuffer) +function valid_range(s::MultiStepSampler, eb::EpisodesBuffer) range = copy(eb.sampleable_inds) ns = Vector{Int}(undef, length(eb.sampleable_inds)) - stacksize = isnothing(s.stacksize) ? 1 : s.stacksize + stacksize = isnothing(s.stacksize) ? 1 : s.stacksize for idx in eachindex(range) step_number = eb.step_numbers[idx] range[idx] = step_number >= stacksize && eb.sampleable_inds[idx] @@ -353,7 +353,7 @@ function fetch(::MultiStepSampler, trace, ::Val, inds, ns) [trace[idx:(idx + ns[i] - 1)] for (i,idx) in enumerate(inds)] end -function fetch(s::MultiStepSampler{names, Int}, trace::AbstractTrace, ::Union{Val{:state}, Val{:next_state}}, inds, ns) where {names} +function fetch(s::MultiStepSampler{names, Int}, trace::AbstractTrace, ::Union{Val{:state}, Val{:next_state}}, inds, ns) where {names} [trace[[idx + i + n - 1 for i in -s.stacksize+1:0, n in 1:ns[j]]] for (j,idx) in enumerate(inds)] end diff --git a/test/common.jl b/test/common.jl index 714520b..117d6ad 100644 --- a/test/common.jl +++ b/test/common.jl @@ -34,15 +34,20 @@ end ) |> gpu @test t isa CircularArraySARTSATraces + @test ReinforcementLearningTrajectories.capacity(t) == 3 + @test CircularArrayBuffers.capacity(t) == 3 - push!(t, (state=ones(Float32, 2, 3),)) + push!(t, (state=ones(Float32, 2, 3),) |> gpu) push!(t, (action=ones(Float32, 2), next_state=ones(Float32, 2, 3) * 2) |> gpu) @test length(t) == 0 push!(t, (reward=1.0f0, terminal=false) |> gpu) @test length(t) == 0 # next_action is still missing - push!(t, (state=ones(Float32, 2, 3) * 3, action=ones(Float32, 2) * 2) |> gpu) + push!(t, (action=ones(Float32, 2) * 2,) |> gpu) + @test length(t) == 1 + + push!(t, (state=ones(Float32, 2, 3) * 3,) |> gpu) @test length(t) == 1 # this will trigger the scalar indexing of CuArray @@ -71,29 +76,33 @@ end @test length(t) == 3 + push!(t, (action=ones(Float32, 2) * 6,) |> gpu) + @test length(t) == 3 + # this will trigger the scalar indexing of CuArray CUDA.@allowscalar @test t[1] == ( - state=ones(Float32, 2, 3) * 2, - next_state=ones(Float32, 2, 3) * 3, - action=ones(Float32, 2) * 2, - next_action=ones(Float32, 2) * 3, - reward=2.0f0, + state=ones(Float32, 2, 3) * 3, + next_state=ones(Float32, 2, 3) * 4, + action=ones(Float32, 2) * 3, + next_action=ones(Float32, 2) * 4, + reward=3.0f0, terminal=false, ) CUDA.@allowscalar @test t[end] == ( - state=ones(Float32, 2, 3) * 4, - next_state=ones(Float32, 2, 3) * 5, - action=ones(Float32, 2) * 4, - next_action=ones(Float32, 2) * 5, - reward=4.0f0, + state=ones(Float32, 2, 3) * 5, + next_state=ones(Float32, 2, 3) * 6, + action=ones(Float32, 2) * 5, + next_action=ones(Float32, 2) * 6, + reward=5.0f0, terminal=false, ) batch = t[1:3] @test size(batch.state) == (2, 3, 3) @test size(batch.action) == (2, 3) - @test batch.reward == [2.0, 3.0, 4.0] |> gpu + @test batch.reward == [3.0, 4.0, 5.0] |> gpu @test batch.terminal == Bool[0, 0, 0] |> gpu + end @testset "ElasticArraySARTSTraces" begin @@ -127,6 +136,8 @@ end ) @test t isa CircularArraySLARTTraces + @test ReinforcementLearningTrajectories.capacity(t) == 3 + @test CircularArrayBuffers.capacity(t) == 3 end @testset "CircularPrioritizedTraces-SARTS" begin @@ -136,6 +147,7 @@ end ), default_priority=1.0f0 ) + @test ReinforcementLearningTrajectories.capacity(t) == 3 push!(t, (state=0, action=0)) @@ -168,14 +180,14 @@ end default_priority=1.0f0 ) - eb = EpisodesBuffer(t) + eb = EpisodesBuffer(t) push!(eb, (state = 1, action = 1)) for i = 1:5 - push!(eb, (state = i+1, action =i+1, reward = i, terminal = false)) + push!(eb, (state = i+1, action = i+1, reward = i, terminal = false)) end push!(eb, (state = 7, action = 7)) for (j,i) = enumerate(8:11) - push!(eb, (state = i, action =i, reward = i-1, terminal = false)) + push!(eb, (state = i, action = i, reward = i-1, terminal = false)) end s = BatchSampler(1000) b = sample(s, eb) @@ -196,6 +208,7 @@ end ), default_priority=1.0f0 ) + @test ReinforcementLearningTrajectories.capacity(t) == 3 push!(t, (state=0, action=0)) @@ -209,6 +222,8 @@ end b = sample(s, t) + @test t[:priority] == [1.0f0, 1.0f0, 1.0f0] + t[:priority, [1, 2]] = [0, 0] # shouldn't be changed since [1,2] are old keys @@ -227,18 +242,19 @@ end ), default_priority=1.0f0 ) - - eb = EpisodesBuffer(t) + + eb = EpisodesBuffer(t) push!(eb, (state = 1,)) for i = 1:5 - push!(eb, (state = i+1, action =i, reward = i, terminal = false)) + push!(eb, (state = i+1, action = i, reward = i, terminal = false)) end push!(eb, PartialNamedTuple((action = 6,))) push!(eb, (state = 7,)) - for (j,i) = enumerate(8:11) - push!(eb, (state = i, action =i-1, reward = i-1, terminal = false)) + for i = 8:11 + push!(eb, (state = i, action = i-1, reward = i-1, terminal = false)) end - push!(eb, PartialNamedTuple((action=12,))) + push!(eb, PartialNamedTuple((action=11,))) + s = BatchSampler(1000) b = sample(s, eb) cm = counter(b[:state]) diff --git a/test/episodes.jl b/test/episodes.jl index 0932416..2633307 100644 --- a/test/episodes.jl +++ b/test/episodes.jl @@ -3,54 +3,65 @@ using CircularArrayBuffers using Test @testset "EpisodesBuffer" begin - @testset "with circular traces" begin + @testset "with circular SARTS traces" begin eb = EpisodesBuffer( CircularArraySARTSTraces(; capacity=10) ) - #push a first episode l=5 + + # push first episode (five steps) push!(eb, (state = 1,)) @test eb.sampleable_inds[end] == 0 @test eb.episodes_lengths[end] == 0 @test eb.step_numbers[end] == 1 for i = 1:5 - push!(eb, (state = i+1, action =i, reward = i, terminal = false)) + push!(eb, (state = i+1, action = i, reward = i, terminal = false)) @test eb.sampleable_inds[end] == 0 @test eb.sampleable_inds[end-1] == 1 @test eb.step_numbers[end] == i + 1 @test eb.episodes_lengths[end-i:end] == fill(i, i+1) end + @test eb[end] == (state = 5, next_state = 6, action = 5, reward = 5, terminal = false) @test eb.sampleable_inds == [1,1,1,1,1,0] @test length(eb.traces) == 5 - #start new episode of 6 periods. + + # start second episode push!(eb, (state = 7,)) @test eb.sampleable_inds[end] == 0 @test eb.sampleable_inds[end-1] == 0 @test eb.episodes_lengths[end] == 0 @test eb.step_numbers[end] == 1 @test eb.sampleable_inds == [1,1,1,1,1,0,0] - @test eb[6][:reward] == 0 #6 is not a valid index, the reward there is filled as zero + @test eb[:reward][6] == 0 # 6 is not a valid index, filled with dummy value zero + @test_throws BoundsError eb[6] # 6 is not a valid index + @test_throws BoundsError eb[7] # 7 is not a valid index + + # push four steps of second episode ep2_len = 0 - for (j,i) = enumerate(8:11) + for (i,s) = enumerate(8:11) ep2_len += 1 - push!(eb, (state = i, action =i-1, reward = i-1, terminal = false)) + push!(eb, (state = s, action = s-1, reward = s-1, terminal = false)) @test eb.sampleable_inds[end] == 0 @test eb.sampleable_inds[end-1] == 1 - @test eb.step_numbers[end] == j + 1 - @test eb.episodes_lengths[end-j:end] == fill(ep2_len, ep2_len + 1) + @test eb.step_numbers[end] == i + 1 + @test eb.episodes_lengths[end-i:end] == fill(ep2_len, ep2_len + 1) end + @test eb[end] == (state = 10, next_state = 11, action = 10, reward = 10, terminal = false) @test eb.sampleable_inds == [1,1,1,1,1,0,1,1,1,1,0] - @test length(eb.traces) == 10 - #three last steps replace oldest steps in the buffer. + @test length(eb) == 10 + # push two more steps of second episode, which replace the oldest steps in the buffer for (i, s) = enumerate(12:13) ep2_len += 1 - push!(eb, (state = s, action =s-1, reward = s-1, terminal = false)) + push!(eb, (state = s, action = s-1, reward = s-1, terminal = false)) @test eb.sampleable_inds[end] == 0 @test eb.sampleable_inds[end-1] == 1 @test eb.step_numbers[end] == i + 1 + 4 @test eb.episodes_lengths[end-ep2_len:end] == fill(ep2_len, ep2_len + 1) end - #episode 1 + @test eb[end] == (state = 12, next_state = 13, action = 12, reward = 12, terminal = false) + @test eb.sampleable_inds == [1,1,1,0,1,1,1,1,1,1,0] + + # verify episode 2 for (i,s) in enumerate(3:13) if i in (4, 11) @test eb.sampleable_inds[i] == 0 @@ -62,8 +73,8 @@ using Test @test b[:state] == b[:action] == b[:reward] == s @test b[:next_state] == s + 1 end - #episode 2 - #start a third episode + + # push third episode push!(eb, (state = 14, )) @test eb.sampleable_inds[end] == 0 @test eb.sampleable_inds[end-1] == 0 @@ -71,28 +82,28 @@ using Test @test eb.step_numbers[end] == 1 #push until it reaches it own start for (i,s) in enumerate(15:26) - push!(eb, (state = s, action =s-1, reward = s-1, terminal = false)) + push!(eb, (state = s, action = s-1, reward = s-1, terminal = false)) end @test eb.sampleable_inds == [fill(true, 10); [false]] @test eb.episodes_lengths == fill(length(15:26), 11) @test eb.step_numbers == [3:13;] - step = popfirst!(eb) + popfirst!(eb) @test length(eb) == length(eb.sampleable_inds) - 1 == length(eb.step_numbers) - 1 == length(eb.episodes_lengths) - 1 == 9 @test first(eb.step_numbers) == 4 - step = pop!(eb) + pop!(eb) @test length(eb) == length(eb.sampleable_inds) - 1 == length(eb.step_numbers) - 1 == length(eb.episodes_lengths) - 1 == 8 @test last(eb.step_numbers) == 12 @test size(eb) == size(eb.traces) == (8,) empty!(eb) @test size(eb) == (0,) == size(eb.traces) == size(eb.sampleable_inds) == size(eb.episodes_lengths) == size(eb.step_numbers) - show(eb); end - @testset "with PartialNamedTuple" begin + + @testset "with SARTSA traces and PartialNamedTuple" begin eb = EpisodesBuffer( CircularArraySARTSATraces(; capacity=10) ) - #push a first episode l=5 + # push first episode (five steps) push!(eb, (state = 1,)) @test eb.sampleable_inds[end] == 0 @test eb.episodes_lengths[end] == 0 @@ -107,38 +118,46 @@ using Test @test eb.step_numbers[end] == i + 1 @test eb.episodes_lengths[end-i:end] == fill(i, i+1) end + @test eb.sampleable_inds == [1,1,1,1,0,0] push!(eb, PartialNamedTuple((action = 6,))) @test eb.sampleable_inds == [1,1,1,1,1,0] @test length(eb.traces) == 5 - #start new episode of 6 periods. + + # start second episode push!(eb, (state = 7,)) @test eb.sampleable_inds[end] == 0 @test eb.sampleable_inds[end-1] == 0 @test eb.episodes_lengths[end] == 0 @test eb.step_numbers[end] == 1 @test eb.sampleable_inds == [1,1,1,1,1,0,0] - @test eb[:action][6] == 6 - @test eb[:next_action][5] == 6 - @test eb[:reward][6] == 0 #6 is not a valid index, the reward there is dummy, filled as zero - @test_throws BoundsError eb[6] #6 is not a valid index, the reward there is dummy, filled as zero + @test eb[5][:next_action] == eb[:next_action][5] == 6 + @test eb[:reward][6] == 0 # 6 is not a valid index, the reward there is dummy, filled as zero + @test_throws BoundsError eb[6] # 6 is not a valid index ep2_len = 0 - for (j,i) = enumerate(8:11) + # push four steps of second episode + for (i,s) = enumerate(8:11) ep2_len += 1 - push!(eb, (state = i, action =i-1, reward = i-1, terminal = false)) + push!(eb, (state = s, action = s-1, reward = s-1, terminal = false)) @test eb.sampleable_inds[end] == 0 @test eb.sampleable_inds[end-1] == 0 if eb.step_numbers[end] > 2 @test eb.sampleable_inds[end-2] == 1 end - @test eb.step_numbers[end] == j + 1 - @test eb.episodes_lengths[end-j:end] == fill(ep2_len, ep2_len + 1) + @test eb.step_numbers[end] == i + 1 + @test eb.episodes_lengths[end-i:end] == fill(ep2_len, ep2_len + 1) end @test eb.sampleable_inds == [1,1,1,1,1,0,1,1,1,0,0] - @test length(eb.traces) == 9 #an action is missing at this stage - #three last steps replace oldest steps in the buffer. + @test length(eb.traces) == 9 # an action is missing at this stage + @test eb.sampleable_inds[end] == 0 + @test eb.sampleable_inds[end-1] == 0 + if eb.step_numbers[end] > 2 + @test eb.sampleable_inds[end-2] == 1 + end + + # push two more steps of second episode, which replace the oldest steps in the buffer for (i, s) = enumerate(12:13) ep2_len += 1 - push!(eb, (state = s, action =s-1, reward = s-1, terminal = false)) + push!(eb, (state = s, action = s-1, reward = s-1, terminal = false)) @test eb.sampleable_inds[end] == 0 @test eb.sampleable_inds[end-1] == 0 if eb.step_numbers[end] > 2 @@ -149,7 +168,8 @@ using Test end push!(eb, PartialNamedTuple((action = 13,))) @test length(eb.traces) == 10 - #episode 1 + + # verify episode 2 for (i,s) in enumerate(3:13) if i in (4, 11) @test eb.sampleable_inds[i] == 0 @@ -161,16 +181,16 @@ using Test @test b[:state] == b[:action] == b[:reward] == s @test b[:next_state] == b[:next_action] == s + 1 end - #episode 2 - #start a third episode + + # push third episode push!(eb, (state = 14,)) @test eb.sampleable_inds[end] == 0 @test eb.sampleable_inds[end-1] == 0 @test eb.episodes_lengths[end] == 0 @test eb.step_numbers[end] == 1 - #push until it reaches it own start + # push until it reaches it own start for (i,s) in enumerate(15:26) - push!(eb, (state = s, action =s-1, reward = s-1, terminal = false)) + push!(eb, (state = s, action = s-1, reward = s-1, terminal = false)) end push!(eb, PartialNamedTuple((action = 26,))) @test eb.sampleable_inds == [fill(true, 10); [false]] @@ -185,7 +205,6 @@ using Test @test size(eb) == size(eb.traces) == (8,) empty!(eb) @test size(eb) == (0,) == size(eb.traces) == size(eb.sampleable_inds) == size(eb.episodes_lengths) == size(eb.step_numbers) - show(eb); end @testset "with vector traces" begin eb = EpisodesBuffer( @@ -193,7 +212,7 @@ using Test state=Int[], reward=Int[]) ) - push!(eb, (state = 1,)) #partial inserting + push!(eb, (state = 1,)) # partial inserting for i = 1:15 push!(eb, (state = i+1, reward =i)) end @@ -201,7 +220,7 @@ using Test @test eb.sampleable_inds == [fill(true, 15); [false]] @test all(==(15), eb.episodes_lengths) @test eb.step_numbers == [1:16;] - push!(eb, (state = 1,)) #partial inserting + push!(eb, (state = 1,)) # partial inserting for i = 1:15 push!(eb, (state = i+1, reward =i)) end @@ -210,11 +229,12 @@ using Test @test eb.step_numbers == [1:16;1:16] @test length(eb) == 31 end - @testset "with ElasticArraySARTSTraces traces" begin + + @testset "with ElasticArraySARTSTraces" begin eb = EpisodesBuffer( ElasticArraySARTSTraces() ) - #push a first episode l=5 + # push first episode (five steps) push!(eb, (state = 1,)) @test eb.sampleable_inds[end] == 0 @test eb.episodes_lengths[end] == 0 @@ -228,15 +248,18 @@ using Test end @test eb.sampleable_inds == [1,1,1,1,1,0] @test length(eb.traces) == 5 - #start new episode of 6 periods. + + # start second episode push!(eb, (state = 7,)) @test eb.sampleable_inds[end] == 0 @test eb.sampleable_inds[end-1] == 0 @test eb.episodes_lengths[end] == 0 @test eb.step_numbers[end] == 1 @test eb.sampleable_inds == [1,1,1,1,1,0,0] - @test eb[6][:reward] == 0 #6 is not a valid index, the reward there is filled as zero + @test eb[:reward][6] == 0 #6 is not a valid index, the reward there is dummy, filled as zero + @test_throws BoundsError eb[6] #6 is not a valid index ep2_len = 0 + # push four steps of second episode for (j,i) = enumerate(8:11) ep2_len += 1 push!(eb, (state = i, action =i-1, reward = i-1, terminal = false)) @@ -248,6 +271,7 @@ using Test @test eb.sampleable_inds == [1,1,1,1,1,0,1,1,1,1,0] @test length(eb.traces) == 10 + # push two more steps of second episode, which replace the oldest steps in the buffer for (i, s) = enumerate(12:13) ep2_len += 1 push!(eb, (state = s, action =s-1, reward = s-1, terminal = false)) @@ -256,7 +280,7 @@ using Test @test eb.step_numbers[end] == i + 1 + 4 @test eb.episodes_lengths[end-ep2_len:end] == fill(ep2_len, ep2_len + 1) end - #episode 1 + # verify episode 2 for i in 3:13 if i in (6, 13) @test eb.sampleable_inds[i] == 0 @@ -268,14 +292,15 @@ using Test @test b[:state] == b[:action] == b[:reward] == i @test b[:next_state] == i + 1 end - #episode 2 - #start a third episode + + # push third episode push!(eb, (state = 14, )) @test eb.sampleable_inds[end] == 0 @test eb.sampleable_inds[end-1] == 0 @test eb.episodes_lengths[end] == 0 @test eb.step_numbers[end] == 1 + # push until it reaches it own start for (i,s) in enumerate(15:26) push!(eb, (state = s, action =s-1, reward = s-1, terminal = false)) end @@ -292,15 +317,14 @@ using Test @test size(eb) == size(eb.traces) == (8,) empty!(eb) @test size(eb) == (0,) == size(eb.traces) == size(eb.sampleable_inds) == size(eb.episodes_lengths) == size(eb.step_numbers) - show(eb); =# end - @testset "ElasticArraySARTSATraces with PartialNamedTuple" begin + @testset "ElasticArraySARTSATraces with PartialNamedTuple" begin eb = EpisodesBuffer( ElasticArraySARTSATraces() ) - #push a first episode l=5 + # push first episode (five steps) push!(eb, (state = 1,)) @test eb.sampleable_inds[end] == 0 @test eb.episodes_lengths[end] == 0 @@ -318,7 +342,8 @@ using Test push!(eb, PartialNamedTuple((action = 6,))) @test eb.sampleable_inds == [1,1,1,1,1,0] @test length(eb.traces) == 5 - #start new episode of 6 periods. + + # start second episode push!(eb, (state = 7,)) @test eb.sampleable_inds[end] == 0 @test eb.sampleable_inds[end-1] == 0 @@ -329,6 +354,7 @@ using Test @test eb[:next_action][5] == 6 @test eb[:reward][6] == 0 #6 is not a valid index, the reward there is dummy, filled as zero ep2_len = 0 + # push four steps of second episode for (j,i) = enumerate(8:11) ep2_len += 1 push!(eb, (state = i, action =i-1, reward = i-1, terminal = false)) @@ -341,7 +367,8 @@ using Test @test eb.episodes_lengths[end-j:end] == fill(ep2_len, ep2_len + 1) end @test eb.sampleable_inds == [1,1,1,1,1,0,1,1,1,0,0] - @test length(eb.traces) == 9 #an action is missing at this stage + @test length(eb.traces) == 9 # an action is missing at this stage + # push two more steps of second episode, which replace the oldest steps in the buffer for (i, s) = enumerate(12:13) ep2_len += 1 push!(eb, (state = s, action =s-1, reward = s-1, terminal = false)) @@ -355,7 +382,8 @@ using Test end push!(eb, PartialNamedTuple((action = 13,))) @test length(eb.traces) == 12 - #episode 1 + + # verify episode 2 for i in 1:13 if i in (6, 13) @test eb.sampleable_inds[i] == 0 @@ -367,8 +395,8 @@ using Test @test b[:state] == b[:action] == b[:reward] == i @test b[:next_state] == b[:next_action] == i + 1 end - #episode 2 - #start a third episode + + # push third episode push!(eb, (state = 14,)) @test eb.sampleable_inds[end] == 0 @test eb.sampleable_inds[end-1] == 0 @@ -392,7 +420,6 @@ using Test @test size(eb) == size(eb.traces) == (8,) empty!(eb) @test size(eb) == (0,) == size(eb.traces) == size(eb.sampleable_inds) == size(eb.episodes_lengths) == size(eb.step_numbers) - show(eb); =# end end diff --git a/test/samplers.jl b/test/samplers.jl index 2565ddd..dc931a2 100644 --- a/test/samplers.jl +++ b/test/samplers.jl @@ -13,17 +13,20 @@ import ReinforcementLearningTrajectories.fetch @test keys(b) == (:state, :action) @test size(b.state) == (3, 4, sz) @test size(b.action) == (sz,) - + #In EpisodesBuffer - eb = EpisodesBuffer(CircularArraySARTSATraces(capacity=10)) - push!(eb, (state = 1, action = 1)) + eb = EpisodesBuffer(CircularArraySARTSATraces(capacity=10)) + push!(eb, (state = 1,)) for i = 1:5 - push!(eb, (state = i+1, action =i+1, reward = i, terminal = false)) + push!(eb, (state = i+1, action = i, reward = i, terminal = false)) end - push!(eb, (state = 7, action = 7)) + push!(eb, PartialNamedTuple((next_action = 6,))) + push!(eb, (state = 7,)) for (j,i) = enumerate(8:11) - push!(eb, (state = i, action =i, reward = i-1, terminal = false)) + push!(eb, (state = i, action = i-1, reward = i-1, terminal = false)) end + push!(eb, PartialNamedTuple((next_action = 11,))) + s = BatchSampler(1000) b = sample(s, eb) cm = counter(b[:state]) @@ -70,7 +73,7 @@ import ReinforcementLearningTrajectories.fetch @test length(batches) == 11 @test length(batches[1][:policy][:a]) == 3 @test length(batches[1][:critic]) == 2 # we sampled 2 batches for critic - @test length(batches[1][:critic][1][:b]) == 5 #each batch is 5 samples + @test length(batches[1][:critic][1][:b]) == 5 #each batch is 5 samples end #! format: off @@ -79,17 +82,20 @@ import ReinforcementLearningTrajectories.fetch n_stack = 2 n_horizon = 3 batchsize = 1000 - eb = EpisodesBuffer(CircularArraySARTSATraces(capacity=10)) + eb = EpisodesBuffer(CircularArraySARTSATraces(capacity=10)) s1 = NStepBatchSampler(eb, n=n_horizon, γ=γ, stacksize=n_stack, batchsize=batchsize) - push!(eb, (state = 1, action = 1)) + push!(eb, (state = 1,)) for i = 1:5 - push!(eb, (state = i+1, action =i+1, reward = i, terminal = i == 5)) + push!(eb, (state = i+1, action =i, reward = i, terminal = i == 5)) end - push!(eb, (state = 7, action = 7)) + push!(eb, PartialNamedTuple((next_action = 6,))) + push!(eb, (state = 7,)) for (j,i) = enumerate(8:11) - push!(eb, (state = i, action =i, reward = i-1, terminal = false)) + push!(eb, (state = i, action = i-1, reward = i-1, terminal = false)) end + push!(eb, PartialNamedTuple((next_action = 11,))) + weights, ns = ReinforcementLearningTrajectories.valid_range(s1, eb) @test weights == [0,1,1,1,1,0,0,1,1,1,0] @test ns == [3,3,3,2,1,-1,3,3,2,1,0] #the -1 is due to ep_lengths[6] being that of 2nd episode but step_numbers[6] being that of 1st episode @@ -108,7 +114,7 @@ import ReinforcementLearningTrajectories.fetch @test next_states == [4 5 5 5 10 10 10; 5 6 6 6 11 11 11] @test all(in(eachcol(next_states)), unique(eachcol(batch[:next_state]))) - #action: samples normally + # action: samples normally actions = ReinforcementLearningTrajectories.fetch(s1, eb[:action], Val(:action), inds, ns[inds]) @test actions == inds @test all(in(actions), unique(batch[:action])) @@ -128,17 +134,17 @@ import ReinforcementLearningTrajectories.fetch γ = 0.99 n_horizon = 3 batchsize = 4 - eb = EpisodesBuffer(CircularPrioritizedTraces(CircularArraySARTSATraces(capacity=10), default_priority = 10f0)) + eb = EpisodesBuffer(CircularPrioritizedTraces(CircularArraySARTSATraces(capacity=10), default_priority = 10f0)) s1 = NStepBatchSampler(eb, n=n_horizon, γ=γ, batchsize=batchsize) - + push!(eb, (state = 1,)) for i = 1:5 - push!(eb, (state = i+1, action =i, reward = i, terminal = i == 5)) + push!(eb, (state = i+1, action = i, reward = i, terminal = i == 5)) end push!(eb, PartialNamedTuple((action=6,))) push!(eb, (state = 7,)) for (j,i) = enumerate(7:10) - push!(eb, (state = i+1, action =i, reward = i, terminal = i==10)) + push!(eb, (state = i+1, action = i, reward = i, terminal = i==10)) end push!(eb, PartialNamedTuple((action = 11,))) weights, ns = ReinforcementLearningTrajectories.valid_range(s1, eb) @@ -151,14 +157,14 @@ import ReinforcementLearningTrajectories.fetch @testset "EpisodesSampler" begin s = EpisodesSampler() - eb = EpisodesBuffer(CircularArraySARTSTraces(capacity=10)) + eb = EpisodesBuffer(CircularArraySARTSTraces(capacity=10)) push!(eb, (state = 1,)) for i = 1:5 - push!(eb, (state = i+1, action =i, reward = i, terminal = false)) + push!(eb, (state = i+1, action = i, reward = i, terminal = false)) end push!(eb, (state = 7,)) for (j,i) = enumerate(8:12) - push!(eb, (state = i, action =i-1, reward = i-1, terminal = false)) + push!(eb, (state = i, action = i-1, reward = i-1, terminal = false)) end b = sample(s, eb) @@ -171,25 +177,24 @@ import ReinforcementLearningTrajectories.fetch @test b[2][:next_state] == [8:12;] @test b[2][:action] == [7:11;] @test b[2][:reward] == [7:11;] - + for (j,i) = enumerate(2:5) push!(eb, (state = i, action =i, reward = i-1, terminal = false)) end #only the last state of the first episode is still buffered. Should not be sampled. b = sample(s, eb) @test length(b) == 1 - - #with specified traces + # with specified traces s = EpisodesSampler{(:state,)}() - eb = EpisodesBuffer(CircularArraySARTSTraces(capacity=10)) - push!(eb, (state = 1, action = 1)) + eb = EpisodesBuffer(CircularArraySARTSTraces(capacity=10)) + push!(eb, (state = 1,)) for i = 1:5 - push!(eb, (state = i+1, action =i+1, reward = i, terminal = false)) + push!(eb, (state = i+1, action = i, reward = i, terminal = false)) end - push!(eb, (state = 7, action = 7)) + push!(eb, (state = 7,)) for (j,i) = enumerate(8:12) - push!(eb, (state = i, action =i, reward = i-1, terminal = false)) + push!(eb, (state = i, action = i-1, reward = i-1, terminal = false)) end b = sample(s, eb) @@ -202,34 +207,38 @@ import ReinforcementLearningTrajectories.fetch n_stack = 2 n_horizon = 3 batchsize = 1000 - eb = EpisodesBuffer(CircularArraySARTSATraces(capacity=10)) + eb = EpisodesBuffer(CircularArraySARTSATraces(capacity=10)) s1 = MultiStepSampler(eb, n=n_horizon, stacksize=n_stack, batchsize=batchsize) - push!(eb, (state = 1, action = 1)) + push!(eb, (state = 1,)) for i = 1:5 - push!(eb, (state = i+1, action =i+1, reward = i, terminal = i == 5)) + push!(eb, (state = i+1, action = i, reward = i, terminal = i == 5)) end - push!(eb, (state = 7, action = 7)) + push!(eb, PartialNamedTuple((action=6,))) + push!(eb, (state = 7,)) for (j,i) = enumerate(8:11) - push!(eb, (state = i, action =i, reward = i-1, terminal = false)) + push!(eb, (state = i, action = i-1, reward = i-1, terminal = false)) end + push!(eb, PartialNamedTuple((action=11,))) + weights, ns = ReinforcementLearningTrajectories.valid_range(s1, eb) @test weights == [0,1,1,1,1,0,0,1,1,1,0] - @test ns == [3,3,3,2,1,-1,3,3,2,1,0] #the -1 is due to ep_lengths[6] being that of 2nd episode but step_numbers[6] being that of 1st episode + @test ns == [3,3,3,2,1,-1,3,3,2,1,0] # the -1 is due to ep_lengths[6] being that of 2nd episode but step_numbers[6] being that of 1st episode inds = [i for i in eachindex(weights) if weights[i] == 1] batch = sample(s1, eb) for key in keys(eb) @test haskey(batch, key) end - #state and next_state: samples with stacksize + + # state and next_state: samples with stacksize states = ReinforcementLearningTrajectories.fetch(s1, eb[:state], Val(:state), inds, ns[inds]) @test states == [[1 2 3; 2 3 4], [2 3 4; 3 4 5], [3 4; 4 5], [4; 5;;], [7 8 9; 8 9 10], [8 9; 9 10], [9; 10;;]] @test all(in(states), batch[:state]) - #next_state: samples with stacksize and nsteps forward + # next_state: samples with stacksize and nsteps forward next_states = ReinforcementLearningTrajectories.fetch(s1, eb[:next_state], Val(:next_state), inds, ns[inds]) @test next_states == [[2 3 4; 3 4 5], [3 4 5; 4 5 6], [4 5; 5 6], [5; 6;;], [8 9 10; 9 10 11], [9 10; 10 11], [10; 11;;]] @test all(in(next_states), batch[:next_state]) - #all other traces sample normally + # all other traces sample normally actions = ReinforcementLearningTrajectories.fetch(s1, eb[:action], Val(:action), inds, ns[inds]) @test actions == [[2,3,4], [3,4,5], [4,5], [5], [8,9,10], [9,10],[10]] @test all(in(actions), batch[:action])