Skip to content

Commit 22a5a54

Browse files
author
Jeremiah Lewis
committed
Fix naming per PR comments
1 parent b2dd626 commit 22a5a54

File tree

14 files changed

+48
-48
lines changed

14 files changed

+48
-48
lines changed

docs/homepage/blog/a_practical_introduction_to_RL.jl/index.html

+7-7
Original file line numberDiff line numberDiff line change
@@ -13872,7 +13872,7 @@ <h2 id="Q2:-What-if-we-want-to-stop-after-several-episodes?"><strong>Q2: What if
1387213872
<div class="prompt input_prompt">In&nbsp;[15]:</div>
1387313873
<div class="inner_cell">
1387413874
<div class="input_area">
13875-
<div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span><span class="n">policy</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterEpisode</span><span class="p">(</span><span class="mi">10</span><span class="p">))</span>
13875+
<div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span><span class="n">policy</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterNEpisodes</span><span class="p">(</span><span class="mi">10</span><span class="p">))</span>
1387613876
</pre></div>
1387713877

1387813878
</div>
@@ -13907,7 +13907,7 @@ <h3 id="Q2.b:-What-if-we-want-to-stop-until-arbitrary-condition-meets?"><strong>
1390713907
In RL.jl, several common ones are already provided, like:</p>
1390813908
<ul>
1390913909
<li><code>StopAfterStep</code></li>
13910-
<li><code>StopAfterEpisode</code></li>
13910+
<li><code>StopAfterNEpisodes</code></li>
1391113911
<li><code>StopAfterNSeconds</code></li>
1391213912
<li>...</li>
1391313913
</ul>
@@ -13936,7 +13936,7 @@ <h2 id="Q3:-How-to-collect-experiment-results?"><strong>Q3: How to collect exper
1393613936
<div class="prompt input_prompt">In&nbsp;[16]:</div>
1393713937
<div class="inner_cell">
1393813938
<div class="input_area">
13939-
<div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span><span class="n">policy</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterEpisode</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="n">TotalRewardPerEpisode</span><span class="p">())</span>
13939+
<div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span><span class="n">policy</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterNEpisodes</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="n">TotalRewardPerEpisode</span><span class="p">())</span>
1394013940
</pre></div>
1394113941

1394213942
</div>
@@ -14144,7 +14144,7 @@ <h2 id="The-Actor-Mode">The <em>Actor</em> Mode<a class="anchor-link" href="#The
1414414144
<div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span>
1414514145
<span class="n">policy</span><span class="p">,</span>
1414614146
<span class="n">RandomWalk1D</span><span class="p">(),</span>
14147-
<span class="n">StopAfterEpisode</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span>
14147+
<span class="n">StopAfterNEpisodes</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span>
1414814148
<span class="n">TotalRewardPerEpisode</span><span class="p">()</span>
1414914149
<span class="p">)</span>
1415014150
</pre></div>
@@ -14311,7 +14311,7 @@ <h2 id="The-Training-Mode">The <em>Training</em> Mode<a class="anchor-link" href
1431114311
<div class="prompt input_prompt">In&nbsp;[22]:</div>
1431214312
<div class="inner_cell">
1431314313
<div class="input_area">
14314-
<div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span><span class="n">agent</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterEpisode</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="n">TotalRewardPerEpisode</span><span class="p">())</span>
14314+
<div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span><span class="n">agent</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterNEpisodes</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="n">TotalRewardPerEpisode</span><span class="p">())</span>
1431514315
</pre></div>
1431614316

1431714317
</div>
@@ -14383,7 +14383,7 @@ <h2 id="The-Training-Mode">The <em>Training</em> Mode<a class="anchor-link" href
1438314383
<div class="inner_cell">
1438414384
<div class="input_area">
1438514385
<div class=" highlight hl-julia"><pre><span></span><span class="n">hook</span> <span class="o">=</span> <span class="n">StepsPerEpisode</span><span class="p">()</span>
14386-
<span class="n">run</span><span class="p">(</span><span class="n">agent</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterEpisode</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="n">hook</span><span class="p">)</span>
14386+
<span class="n">run</span><span class="p">(</span><span class="n">agent</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterNEpisodes</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="n">hook</span><span class="p">)</span>
1438714387
<span class="n">plot</span><span class="p">(</span><span class="n">hook</span><span class="o">.</span><span class="n">steps</span><span class="p">[</span><span class="mi">1</span><span class="o">:</span><span class="k">end</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
1438814388
</pre></div>
1438914389

@@ -14593,7 +14593,7 @@ <h3 id="Q4:-Why-does-it-need-more-than-3-steps-to-reach-our-goal?">Q4: Why does
1459314593
<span class="n">explorer</span><span class="o">=</span><span class="n">GreedyExplorer</span><span class="p">()</span>
1459414594
<span class="p">),</span>
1459514595
<span class="n">env</span><span class="p">,</span>
14596-
<span class="n">StopAfterEpisode</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span>
14596+
<span class="n">StopAfterNEpisodes</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span>
1459714597
<span class="n">hook</span>
1459814598
<span class="p">)</span>
1459914599
<span class="n">plot</span><span class="p">(</span><span class="n">hook</span><span class="o">.</span><span class="n">steps</span><span class="p">[</span><span class="mi">1</span><span class="o">:</span><span class="k">end</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>

docs/homepage/blog/ospp_report_210370190/index.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ Besides, I implement the [`EDManager`](https://juliareinforcementlearning.org/do
656656
function Base.run(
657657
π::EDManager,
658658
env::AbstractEnv,
659-
stop_condition = StopAfterEpisode(1),
659+
stop_condition = StopAfterNEpisodes(1),
660660
hook::AbstractHook = EmptyHook(),
661661
)
662662
@assert NumAgentStyle(env) == MultiAgent(2) "ED algorithm only support 2-players games."
@@ -757,7 +757,7 @@ EDmanager = EDManager(
757757
)
758758
)
759759
# initialize the `stop_condition` and `hook`.
760-
stop_condition = StopAfterEpisode(100_000, is_show_progress=!haskey(ENV, "CI"))
760+
stop_condition = StopAfterNEpisodes(100_000, is_show_progress=!haskey(ENV, "CI"))
761761
hook = KuhnOpenNewEDHook(0, 100, [], [])
762762
```
763763

docs/src/How_to_use_hooks.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ end
6868
(h::TimeCostPerEpisode)(::PreEpisodeStage, policy, env) = h.t = time_ns()
6969
(h::TimeCostPerEpisode)(::PostEpisodeStage, policy, env) = push!(h.time_costs, time_ns()-h.t)
7070
h = TimeCostPerEpisode()
71-
run(RandomPolicy(), CartPoleEnv(), StopAfterEpisode(10), h)
71+
run(RandomPolicy(), CartPoleEnv(), StopAfterNEpisodes(10), h)
7272
h.time_costs
7373
```
7474

@@ -97,7 +97,7 @@ policy = RandomPolicy()
9797
run(
9898
policy,
9999
CartPoleEnv(),
100-
StopAfterEpisode(100),
100+
StopAfterNEpisodes(100),
101101
DoEveryNEpisode(;n=10) do t, policy, env
102102
# In real world cases, the policy is usually wrapped in an Agent,
103103
# we need to extract the inner policy to run it in the *actor* mode.
@@ -107,7 +107,7 @@ run(
107107
# polluting the original env.
108108
109109
hook = TotalRewardPerEpisode(;is_display_on_exit=false)
110-
run(policy, CartPoleEnv(), StopAfterEpisode(10), hook)
110+
run(policy, CartPoleEnv(), StopAfterNEpisodes(10), hook)
111111
112112
# now you can report the result of the hook.
113113
println("avg reward at episode $t is: $(mean(hook.rewards))")
@@ -192,7 +192,7 @@ hook = ComposedHook(
192192
end
193193
end
194194
)
195-
run(RandomPolicy(), CartPoleEnv(), StopAfterEpisode(50), hook)
195+
run(RandomPolicy(), CartPoleEnv(), StopAfterNEpisodes(50), hook)
196196
readdir(tf_log_dir)
197197
```
198198

docs/src/How_to_write_a_customized_environment.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ ReinforcementLearning.jl also work. Similar to the test above, let's try the
117117
[`RandomPolicy`](@ref) first:
118118

119119
```@repl customized_env
120-
run(RandomPolicy(action_space(env)), env, StopAfterEpisode(1_000))
120+
run(RandomPolicy(action_space(env)), env, StopAfterNEpisodes(1_000))
121121
```
122122

123123
If no error shows up, then it means our environment at least works with
@@ -126,7 +126,7 @@ episode to see the performance of the `RandomPolicy`.
126126

127127
```@repl customized_env
128128
hook = TotalRewardPerEpisode()
129-
run(RandomPolicy(action_space(env)), env, StopAfterEpisode(1_000), hook)
129+
run(RandomPolicy(action_space(env)), env, StopAfterNEpisodes(1_000), hook)
130130
using Plots
131131
pyplot() #hide
132132
plot(hook.rewards)
@@ -198,7 +198,7 @@ Nice job! Now we are ready to run the experiment:
198198

199199
```@repl customized_env
200200
h = TotalRewardPerEpisode()
201-
run(p, wrapped_env, StopAfterEpisode(1_000), h)
201+
run(p, wrapped_env, StopAfterNEpisodes(1_000), h)
202202
plot(h.rewards)
203203
savefig("custom_env_random_policy_reward_wrapped_env.svg"); nothing # hide
204204
```

docs/src/non_episodic.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ Using this means that the value of the terminal state is set to 0 when learning
99

1010
Also called _Continuing tasks_ (Sutton & Barto, 2018), non-episodic environment do not have a terminal state and thus may run for ever, or until the `stop_condition` is reached. Sometimes however, one may want to periodically reset the environment to start fresh. A first possibility is to implement `RLBase.is_terminated(::YourEnvironment)` to reset according to an arbitrary condition. However this may not be a good idea because the value of the last state (note that it is not a _terminal_ state) will be bootstrapped to 0 during learning, even though it is not the true value of the state.
1111

12-
To manage this, we provide the `ResetAfterNSteps(n)` condition as an argument to `run(policy, env, stop_condition, hook, reset_condition = ResetAtTerminal())`. The default `ResetAtTerminal()` assumes an episodic environment, changing that to `ResetAfterNSteps(n)` will no longer check `is_terminated` but will instead call `reset!` every `n` steps. This way, the value of the last state will not be multiplied by 0 during bootstrapping and the correct value can be learned.
12+
To manage this, we provide the `ResetAfterNSteps(n)` condition as an argument to `run(policy, env, stop_condition, hook, reset_condition = ResetIfEnvTerminated())`. The default `ResetIfEnvTerminated()` assumes an episodic environment, changing that to `ResetAfterNSteps(n)` will no longer check `is_terminated` but will instead call `reset!` every `n` steps. This way, the value of the last state will not be multiplied by 0 during bootstrapping and the correct value can be learned.
1313

1414
## Custom reset conditions
1515

@@ -39,7 +39,7 @@ end
3939
run(agent, env, stop_condition, hook, MyCondition(ResetAfterNSteps(10000)))
4040
```
4141

42-
A last possibility is to use an anonymous function. This approach cannot be used to implement stateful conditions (such as `ResetAfterNSteps`). For example here is alternative way to implement `ResetAtTerminal`:
42+
A last possibility is to use an anonymous function. This approach cannot be used to implement stateful conditions (such as `ResetAfterNSteps`). For example here is alternative way to implement `ResetIfEnvTerminated`:
4343

4444
```julia
4545
run(agent, env, stop_condition, hook, (p,e) -> is_terminated(e))

docs/src/tutorial.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ a descriptive pattern.
4343
run(
4444
RandomPolicy(),
4545
RandomWalk1D(),
46-
StopAfterEpisode(10),
46+
StopAfterNEpisodes(10),
4747
TotalRewardPerEpisode()
4848
)
4949
```
@@ -58,7 +58,7 @@ policy = TabularPolicy(;table=Dict(zip(1:NS, fill(2, NS))))
5858
run(
5959
policy,
6060
RandomWalk1D(),
61-
StopAfterEpisode(10),
61+
StopAfterNEpisodes(10),
6262
TotalRewardPerEpisode()
6363
)
6464
```
@@ -91,7 +91,7 @@ this policy to the `env` to estimate its performance.
9191
run(
9292
policy,
9393
RandomWalk1D(),
94-
StopAfterEpisode(10),
94+
StopAfterNEpisodes(10),
9595
TotalRewardPerEpisode()
9696
)
9797
```
@@ -109,7 +109,7 @@ agent = Agent(
109109
policy = policy,
110110
trajectory = VectorSARTTrajectory()
111111
)
112-
run(agent, env, StopAfterEpisode(10), TotalRewardPerEpisode())
112+
run(agent, env, StopAfterNEpisodes(10), TotalRewardPerEpisode())
113113
```
114114

115115
Here the [`VectorSARTTrajectory`](@ref) is used to store the **S**tate,

src/ReinforcementLearningCore/src/core/run.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ end
1717
function Base.run(
1818
policy::AbstractPolicy,
1919
env::AbstractEnv,
20-
stop_condition::AbstractStopCondition=StopAfterEpisode(1),
20+
stop_condition::AbstractStopCondition=StopAfterNEpisodes(1),
2121
hook::AbstractHook=EmptyHook(),
2222
reset_condition::AbstractResetCondition=ResetIfEnvTerminated()
2323
)

src/ReinforcementLearningCore/src/core/stop_conditions.jl

+12-12
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
export AbstractStopCondition, StopAfterStep,
2-
StopAfterEpisode, StopWhenDone, StopSignal, StopAfterNoImprovement, StopAfterNSeconds, ComposedStopCondition
2+
StopAfterNEpisodes, StopIfEnvTerminated, StopSignal, StopAfterNoImprovement, StopAfterNSeconds, ComposedStopCondition
33

44
import ProgressMeter
55

@@ -66,40 +66,40 @@ end
6666
check!(s::StopAfterStep{Nothing}, args...) = _stop_after_step(s)
6767

6868
#####
69-
# StopAfterEpisode
69+
# StopAfterNEpisodes
7070
#####
7171

7272
"""
73-
StopAfterEpisode(episode; cur = 0, is_show_progress = true)
73+
StopAfterNEpisodes(episode; cur = 0, is_show_progress = true)
7474
7575
Return `true` after being called `episode`. If `is_show_progress` is `true`, the `ProgressMeter` will be used to show progress.
7676
"""
77-
mutable struct StopAfterEpisode{Tl} <: AbstractStopCondition
77+
mutable struct StopAfterNEpisodes{Tl} <: AbstractStopCondition
7878
episode::Int
7979
cur::Int
8080
"IGNORE"
8181
progress::Tl
8282
end
8383

84-
function StopAfterEpisode(episode; cur = 0, is_show_progress = true)
84+
function StopAfterNEpisodes(episode; cur = 0, is_show_progress = true)
8585
if is_show_progress
8686
progress = ProgressMeter.Progress(episode, dt = 1)
8787
ProgressMeter.update!(progress, cur)
8888
else
8989
progress = nothing
9090
end
91-
StopAfterEpisode(episode, cur, progress)
91+
StopAfterNEpisodes(episode, cur, progress)
9292
end
9393

94-
function check!(s::StopAfterEpisode{Nothing}, agent, env)
94+
function check!(s::StopAfterNEpisodes{Nothing}, agent, env)
9595
if is_terminated(env)
9696
s.cur += 1
9797
end
9898

9999
s.cur >= s.episode
100100
end
101101

102-
function check!(s::StopAfterEpisode, agent, env)
102+
function check!(s::StopAfterNEpisodes, agent, env)
103103
if is_terminated(env)
104104
s.cur += 1
105105
ProgressMeter.next!(s.progress)
@@ -157,17 +157,17 @@ function check!(s::StopAfterNoImprovement, agent, env)
157157
end
158158

159159
#####
160-
# StopWhenDone
160+
# StopIfEnvTerminated
161161
#####
162162

163163
"""
164-
StopWhenDone()
164+
StopIfEnvTerminated()
165165
166166
Return `true` if the environment is terminated.
167167
"""
168-
struct StopWhenDone <: AbstractStopCondition end
168+
struct StopIfEnvTerminated <: AbstractStopCondition end
169169

170-
check!(s::StopWhenDone, agent, env) = is_terminated(env)
170+
check!(s::StopIfEnvTerminated, agent, env) = is_terminated(env)
171171

172172
#####
173173
# StopSignal

src/ReinforcementLearningCore/test/core/base.jl

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ using TimerOutputs
2020
@test sum(hook[]) + length(hook[]) - 1 == length(agent.trajectory.container)
2121
end
2222

23-
@testset "StopAfterEpisode" begin
23+
@testset "StopAfterNEpisodes" begin
2424
agent = Agent(
2525
RandomPolicy(),
2626
Trajectory(
@@ -30,7 +30,7 @@ using TimerOutputs
3030
),
3131
)
3232
env = RandomWalk1D()
33-
stop_condition = StopAfterEpisode(10)
33+
stop_condition = StopAfterNEpisodes(10)
3434
hook = StepsPerEpisode()
3535
run(agent, env, stop_condition, hook)
3636

@@ -67,7 +67,7 @@ using TimerOutputs
6767
),
6868
)
6969
env = RandomWalk1D()
70-
stop_condition = StopAfterEpisode(10)
70+
stop_condition = StopAfterNEpisodes(10)
7171
hook = StepsPerEpisode()
7272

7373
exp = Experiment(policy, env, stop_condition, hook)

src/ReinforcementLearningCore/test/core/hooks.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ end
3232

3333
function test_run!(hook::AbstractHook)
3434
hook_ = deepcopy(hook)
35-
run(RandomPolicy(), RandomWalk1D(), StopAfterEpisode(10), hook_)
35+
run(RandomPolicy(), RandomWalk1D(), StopAfterNEpisodes(10), hook_)
3636
return hook_
3737
end
3838

src/ReinforcementLearningCore/test/core/stop_conditions.jl

+4-4
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ end
1616
@test sum([check!(composed_stop) for i in 1:20]) == 18
1717
end
1818

19-
@testset "StopAfterEpisode" begin
20-
stop_1 = StopAfterEpisode(2)
21-
stop_2 = StopAfterEpisode(2; is_show_progress=false)
22-
stop_3 = StopAfterEpisode(2; is_show_progress=true)
19+
@testset "StopAfterNEpisodes" begin
20+
stop_1 = StopAfterNEpisodes(2)
21+
stop_2 = StopAfterNEpisodes(2; is_show_progress=false)
22+
stop_3 = StopAfterNEpisodes(2; is_show_progress=true)
2323

2424
for stop_condition in (stop_1, stop_2)
2525
env = RandomWalk1D()

src/ReinforcementLearningCore/test/policies/multi_agent.jl

+3-3
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ end
7272
multiagent_hook = MultiAgentHook((; :Cross => StepsPerEpisode(), :Nought => StepsPerEpisode()))
7373

7474
env = TicTacToeEnv()
75-
stop_condition = StopWhenDone()
75+
stop_condition = StopIfEnvTerminated()
7676
hook = StepsPerEpisode()
7777

7878
@test RLBase.reward(env, :Cross) == 0
@@ -130,7 +130,7 @@ end
130130
))
131131

132132
env = RockPaperScissorsEnv()
133-
stop_condition = StopWhenDone()
133+
stop_condition = StopIfEnvTerminated()
134134
composed_hook = ComposedHook(
135135
BatchStepsPerEpisode(10),
136136
RewardsPerEpisode(),
@@ -183,7 +183,7 @@ end
183183

184184
let err = nothing
185185
try
186-
x = run(m, e, StopAfterEpisode(10), hooks)
186+
x = run(m, e, StopAfterNEpisodes(10), hooks)
187187
catch err
188188
end
189189
@test !(err isa Exception)

src/ReinforcementLearningEnvironments/src/environments/examples/GraphShortestPathEnv.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ end
114114
115115
(h::ShortestPathCount)(::PreEpisodeStage, policy, env) = push!(h.shortest_paths, M[env.goal, env.pos])
116116
117-
h = run(policy, env, StopAfterEpisode(1_000), ComposedHook(StepsPerEpisode(), ShortestPathCount()))
117+
h = run(policy, env, StopAfterNEpisodes(1_000), ComposedHook(StepsPerEpisode(), ShortestPathCount()))
118118
119119
using UnicodePlots
120120

src/ReinforcementLearningEnvironments/test/environments/examples/tic_tac_toe.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
multiagent_hook = MultiAgentHook((; :Cross => StepsPerEpisode(), :Nought => StepsPerEpisode()))
2323

2424
env = TicTacToeEnv()
25-
stop_condition = StopWhenDone()
25+
stop_condition = StopIfEnvTerminated()
2626

2727
RLBase.test_interfaces!(env)
2828
RLBase.test_runnable!(env)

0 commit comments

Comments
 (0)