Fix naming per PR comments

Jeremiah Lewis · Jeremiah Lewis · commit 22a5a547bb87 · 2024-03-14T12:05:23.000+01:00
diff --git a/docs/homepage/blog/a_practical_introduction_to_RL.jl/index.html b/docs/homepage/blog/a_practical_introduction_to_RL.jl/index.html
@@ -13872,7 +13872,7 @@ <h2 id="Q2:-What-if-we-want-to-stop-after-several-episodes?"><strong>Q2: What if
 <div class="prompt input_prompt">In&nbsp;[15]:</div>
 <div class="inner_cell">
     <div class="input_area">
-<div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span><span class="n">policy</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterEpisode</span><span class="p">(</span><span class="mi">10</span><span class="p">))</span>
+<div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span><span class="n">policy</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterNEpisodes</span><span class="p">(</span><span class="mi">10</span><span class="p">))</span>
 </pre></div>
 
     </div>
@@ -13907,7 +13907,7 @@ <h3 id="Q2.b:-What-if-we-want-to-stop-until-arbitrary-condition-meets?"><strong>
 In RL.jl, several common ones are already provided, like:</p>
 <ul>
 <li><code>StopAfterStep</code></li>
-<li><code>StopAfterEpisode</code></li>
+<li><code>StopAfterNEpisodes</code></li>
 <li><code>StopAfterNSeconds</code></li>
 <li>...</li>
 </ul>
@@ -13936,7 +13936,7 @@ <h2 id="Q3:-How-to-collect-experiment-results?"><strong>Q3: How to collect exper
 <div class="prompt input_prompt">In&nbsp;[16]:</div>
 <div class="inner_cell">
     <div class="input_area">
-<div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span><span class="n">policy</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterEpisode</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="n">TotalRewardPerEpisode</span><span class="p">())</span>
+<div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span><span class="n">policy</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterNEpisodes</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="n">TotalRewardPerEpisode</span><span class="p">())</span>
 </pre></div>
 
     </div>
@@ -14144,7 +14144,7 @@ <h2 id="The-Actor-Mode">The <em>Actor</em> Mode<a class="anchor-link" href="#The
 <div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span>
    <span class="n">policy</span><span class="p">,</span>
    <span class="n">RandomWalk1D</span><span class="p">(),</span>
-   <span class="n">StopAfterEpisode</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span>
+   <span class="n">StopAfterNEpisodes</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span>
    <span class="n">TotalRewardPerEpisode</span><span class="p">()</span>
 <span class="p">)</span>
 </pre></div>
@@ -14311,7 +14311,7 @@ <h2 id="The-Training-Mode">The <em>Training</em> Mode<a class="anchor-link" href
 <div class="prompt input_prompt">In&nbsp;[22]:</div>
 <div class="inner_cell">
     <div class="input_area">
-<div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span><span class="n">agent</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterEpisode</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="n">TotalRewardPerEpisode</span><span class="p">())</span>
+<div class=" highlight hl-julia"><pre><span></span><span class="n">run</span><span class="p">(</span><span class="n">agent</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterNEpisodes</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="n">TotalRewardPerEpisode</span><span class="p">())</span>
 </pre></div>
 
     </div>
@@ -14383,7 +14383,7 @@ <h2 id="The-Training-Mode">The <em>Training</em> Mode<a class="anchor-link" href
 <div class="inner_cell">
     <div class="input_area">
 <div class=" highlight hl-julia"><pre><span></span><span class="n">hook</span> <span class="o">=</span> <span class="n">StepsPerEpisode</span><span class="p">()</span>
-<span class="n">run</span><span class="p">(</span><span class="n">agent</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterEpisode</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="n">hook</span><span class="p">)</span>
+<span class="n">run</span><span class="p">(</span><span class="n">agent</span><span class="p">,</span> <span class="n">env</span><span class="p">,</span> <span class="n">StopAfterNEpisodes</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span> <span class="n">hook</span><span class="p">)</span>
 <span class="n">plot</span><span class="p">(</span><span class="n">hook</span><span class="o">.</span><span class="n">steps</span><span class="p">[</span><span class="mi">1</span><span class="o">:</span><span class="k">end</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
 </pre></div>
 
@@ -14593,7 +14593,7 @@ <h3 id="Q4:-Why-does-it-need-more-than-3-steps-to-reach-our-goal?">Q4: Why does
         <span class="n">explorer</span><span class="o">=</span><span class="n">GreedyExplorer</span><span class="p">()</span>
     <span class="p">),</span>
     <span class="n">env</span><span class="p">,</span>
-    <span class="n">StopAfterEpisode</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span>
+    <span class="n">StopAfterNEpisodes</span><span class="p">(</span><span class="mi">10</span><span class="p">),</span>
     <span class="n">hook</span>
 <span class="p">)</span>
 <span class="n">plot</span><span class="p">(</span><span class="n">hook</span><span class="o">.</span><span class="n">steps</span><span class="p">[</span><span class="mi">1</span><span class="o">:</span><span class="k">end</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
diff --git a/docs/homepage/blog/ospp_report_210370190/index.md b/docs/homepage/blog/ospp_report_210370190/index.md
@@ -656,7 +656,7 @@ Besides, I implement the [`EDManager`](https://juliareinforcementlearning.org/do
 function Base.run(
     π::EDManager,
     env::AbstractEnv,
-    stop_condition = StopAfterEpisode(1),
+    stop_condition = StopAfterNEpisodes(1),
     hook::AbstractHook = EmptyHook(),
 )
     @assert NumAgentStyle(env) == MultiAgent(2) "ED algorithm only support 2-players games."
@@ -757,7 +757,7 @@ EDmanager = EDManager(
     )
 )
 # initialize the `stop_condition` and `hook`.
-stop_condition = StopAfterEpisode(100_000, is_show_progress=!haskey(ENV, "CI"))
+stop_condition = StopAfterNEpisodes(100_000, is_show_progress=!haskey(ENV, "CI"))
 hook = KuhnOpenNewEDHook(0, 100, [], [])
 ```
 
diff --git a/docs/src/How_to_use_hooks.md b/docs/src/How_to_use_hooks.md
@@ -68,7 +68,7 @@ end
 (h::TimeCostPerEpisode)(::PreEpisodeStage, policy, env) = h.t = time_ns()
 (h::TimeCostPerEpisode)(::PostEpisodeStage, policy, env) = push!(h.time_costs, time_ns()-h.t)
 h = TimeCostPerEpisode()
-run(RandomPolicy(), CartPoleEnv(), StopAfterEpisode(10), h)
+run(RandomPolicy(), CartPoleEnv(), StopAfterNEpisodes(10), h)
 h.time_costs
 ```
 
@@ -97,7 +97,7 @@ policy = RandomPolicy()
 run(
     policy,
     CartPoleEnv(),
-    StopAfterEpisode(100),
+    StopAfterNEpisodes(100),
     DoEveryNEpisode(;n=10) do t, policy, env
         # In real world cases, the policy is usually wrapped in an Agent,
         # we need to extract the inner policy to run it in the *actor* mode.
@@ -107,7 +107,7 @@ run(
         # polluting the original env.
 
         hook = TotalRewardPerEpisode(;is_display_on_exit=false)
-        run(policy, CartPoleEnv(), StopAfterEpisode(10), hook)
+        run(policy, CartPoleEnv(), StopAfterNEpisodes(10), hook)
 
         # now you can report the result of the hook.
         println("avg reward at episode $t is: $(mean(hook.rewards))")
@@ -192,7 +192,7 @@ hook = ComposedHook(
         end
     end
 )
-run(RandomPolicy(), CartPoleEnv(), StopAfterEpisode(50), hook)
+run(RandomPolicy(), CartPoleEnv(), StopAfterNEpisodes(50), hook)
 readdir(tf_log_dir)
 ```
 
diff --git a/docs/src/How_to_write_a_customized_environment.md b/docs/src/How_to_write_a_customized_environment.md
@@ -117,7 +117,7 @@ ReinforcementLearning.jl also work. Similar to the test above, let's try the
 [`RandomPolicy`](@ref) first:
 
 ```@repl customized_env
-run(RandomPolicy(action_space(env)), env, StopAfterEpisode(1_000)) 
+run(RandomPolicy(action_space(env)), env, StopAfterNEpisodes(1_000)) 
 ```
 
 If no error shows up, then it means our environment at least works with
@@ -126,7 +126,7 @@ episode to see the performance of the `RandomPolicy`.
 
 ```@repl customized_env
 hook = TotalRewardPerEpisode()
-run(RandomPolicy(action_space(env)), env, StopAfterEpisode(1_000), hook)
+run(RandomPolicy(action_space(env)), env, StopAfterNEpisodes(1_000), hook)
 using Plots
 pyplot() #hide
 plot(hook.rewards)
@@ -198,7 +198,7 @@ Nice job! Now we are ready to run the experiment:
 
 ```@repl customized_env
 h = TotalRewardPerEpisode()
-run(p, wrapped_env, StopAfterEpisode(1_000), h)
+run(p, wrapped_env, StopAfterNEpisodes(1_000), h)
 plot(h.rewards)
 savefig("custom_env_random_policy_reward_wrapped_env.svg"); nothing # hide
 ```
diff --git a/docs/src/non_episodic.md b/docs/src/non_episodic.md
@@ -9,7 +9,7 @@ Using this means that the value of the terminal state is set to 0 when learning
 
 Also called _Continuing tasks_ (Sutton & Barto, 2018), non-episodic environment do not have a terminal state and thus may run for ever, or until the `stop_condition` is reached. Sometimes however, one may want to periodically reset the environment to start fresh. A first possibility is to implement `RLBase.is_terminated(::YourEnvironment)` to reset according to an arbitrary condition. However this may not be a good idea because the value of the last state (note that it is not a _terminal_ state) will be bootstrapped to 0 during learning, even though it is not the true value of the state. 
 
-To manage this, we provide the `ResetAfterNSteps(n)` condition as an argument to `run(policy, env, stop_condition, hook, reset_condition = ResetAtTerminal())`. The default `ResetAtTerminal()` assumes an episodic environment, changing that to `ResetAfterNSteps(n)` will no longer check `is_terminated` but will instead call `reset!` every `n` steps. This way, the value of the last state will not be multiplied by 0 during bootstrapping and the correct value can be learned. 
+To manage this, we provide the `ResetAfterNSteps(n)` condition as an argument to `run(policy, env, stop_condition, hook, reset_condition = ResetIfEnvTerminated())`. The default `ResetIfEnvTerminated()` assumes an episodic environment, changing that to `ResetAfterNSteps(n)` will no longer check `is_terminated` but will instead call `reset!` every `n` steps. This way, the value of the last state will not be multiplied by 0 during bootstrapping and the correct value can be learned. 
 
 ## Custom reset conditions
 
@@ -39,7 +39,7 @@ end
 run(agent, env, stop_condition, hook, MyCondition(ResetAfterNSteps(10000)))
 ```
 
-A last possibility is to use an anonymous function. This approach cannot be used to implement stateful conditions (such as `ResetAfterNSteps`). For example here is alternative way to implement `ResetAtTerminal`:
+A last possibility is to use an anonymous function. This approach cannot be used to implement stateful conditions (such as `ResetAfterNSteps`). For example here is alternative way to implement `ResetIfEnvTerminated`:
 
 ```julia
 run(agent, env, stop_condition, hook, (p,e) -> is_terminated(e))
diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md
@@ -43,7 +43,7 @@ a descriptive pattern.
 run(
     RandomPolicy(),
     RandomWalk1D(),
-    StopAfterEpisode(10),
+    StopAfterNEpisodes(10),
     TotalRewardPerEpisode()
 )
 ```
@@ -58,7 +58,7 @@ policy = TabularPolicy(;table=Dict(zip(1:NS, fill(2, NS))))
 run(
     policy,
     RandomWalk1D(),
-    StopAfterEpisode(10),
+    StopAfterNEpisodes(10),
     TotalRewardPerEpisode()
 )
 ```
@@ -91,7 +91,7 @@ this policy to the `env` to estimate its performance.
 run(
     policy,
     RandomWalk1D(),
-    StopAfterEpisode(10),
+    StopAfterNEpisodes(10),
     TotalRewardPerEpisode()
 )
 ```
@@ -109,7 +109,7 @@ agent = Agent(
     policy = policy,
     trajectory = VectorSARTTrajectory()
 )
-run(agent, env, StopAfterEpisode(10), TotalRewardPerEpisode())
+run(agent, env, StopAfterNEpisodes(10), TotalRewardPerEpisode())
 ```
 
 Here the [`VectorSARTTrajectory`](@ref) is used to store the **S**tate,
diff --git a/src/ReinforcementLearningCore/src/core/run.jl b/src/ReinforcementLearningCore/src/core/run.jl
@@ -17,7 +17,7 @@ end
 function Base.run(
     policy::AbstractPolicy,
     env::AbstractEnv,
-    stop_condition::AbstractStopCondition=StopAfterEpisode(1),
+    stop_condition::AbstractStopCondition=StopAfterNEpisodes(1),
     hook::AbstractHook=EmptyHook(),
     reset_condition::AbstractResetCondition=ResetIfEnvTerminated()
 )
diff --git a/src/ReinforcementLearningCore/src/core/stop_conditions.jl b/src/ReinforcementLearningCore/src/core/stop_conditions.jl
@@ -1,5 +1,5 @@
 export AbstractStopCondition, StopAfterStep,
-    StopAfterEpisode, StopWhenDone, StopSignal, StopAfterNoImprovement, StopAfterNSeconds, ComposedStopCondition
+    StopAfterNEpisodes, StopIfEnvTerminated, StopSignal, StopAfterNoImprovement, StopAfterNSeconds, ComposedStopCondition
 
 import ProgressMeter
 
@@ -66,40 +66,40 @@ end
 check!(s::StopAfterStep{Nothing}, args...) = _stop_after_step(s)
 
 #####
-# StopAfterEpisode
+# StopAfterNEpisodes
 #####
 
 """
-    StopAfterEpisode(episode; cur = 0, is_show_progress = true)
+    StopAfterNEpisodes(episode; cur = 0, is_show_progress = true)
 
 Return `true` after being called `episode`. If `is_show_progress` is `true`, the `ProgressMeter` will be used to show progress.
 """
-mutable struct StopAfterEpisode{Tl} <: AbstractStopCondition
+mutable struct StopAfterNEpisodes{Tl} <: AbstractStopCondition
     episode::Int
     cur::Int
     "IGNORE"
     progress::Tl
 end
 
-function StopAfterEpisode(episode; cur = 0, is_show_progress = true)
+function StopAfterNEpisodes(episode; cur = 0, is_show_progress = true)
     if is_show_progress
         progress = ProgressMeter.Progress(episode, dt = 1)
         ProgressMeter.update!(progress, cur)
     else
         progress = nothing
     end
-    StopAfterEpisode(episode, cur, progress)
+    StopAfterNEpisodes(episode, cur, progress)
 end
 
-function check!(s::StopAfterEpisode{Nothing}, agent, env)
+function check!(s::StopAfterNEpisodes{Nothing}, agent, env)
     if is_terminated(env)
         s.cur += 1
     end
 
     s.cur >= s.episode
 end
 
-function check!(s::StopAfterEpisode, agent, env)
+function check!(s::StopAfterNEpisodes, agent, env)
     if is_terminated(env)
         s.cur += 1
         ProgressMeter.next!(s.progress)
@@ -157,17 +157,17 @@ function check!(s::StopAfterNoImprovement, agent, env)
 end
 
 #####
-# StopWhenDone
+# StopIfEnvTerminated
 #####
 
 """
-    StopWhenDone()
+    StopIfEnvTerminated()
 
 Return `true` if the environment is terminated.
 """
-struct StopWhenDone <: AbstractStopCondition end
+struct StopIfEnvTerminated <: AbstractStopCondition end
 
-check!(s::StopWhenDone, agent, env) = is_terminated(env)
+check!(s::StopIfEnvTerminated, agent, env) = is_terminated(env)
 
 #####
 # StopSignal
diff --git a/src/ReinforcementLearningCore/test/core/base.jl b/src/ReinforcementLearningCore/test/core/base.jl
@@ -20,7 +20,7 @@ using TimerOutputs
             @test sum(hook[]) + length(hook[]) - 1 == length(agent.trajectory.container)
         end
 
-        @testset "StopAfterEpisode" begin
+        @testset "StopAfterNEpisodes" begin
             agent = Agent(
                 RandomPolicy(),
                 Trajectory(
@@ -30,7 +30,7 @@ using TimerOutputs
                 ),
             )
             env = RandomWalk1D()
-            stop_condition = StopAfterEpisode(10)
+            stop_condition = StopAfterNEpisodes(10)
             hook = StepsPerEpisode()
             run(agent, env, stop_condition, hook)
 
@@ -67,7 +67,7 @@ using TimerOutputs
             ),
         )
         env = RandomWalk1D()
-        stop_condition = StopAfterEpisode(10)
+        stop_condition = StopAfterNEpisodes(10)
         hook = StepsPerEpisode()
 
         exp = Experiment(policy, env, stop_condition, hook)
diff --git a/src/ReinforcementLearningCore/test/core/hooks.jl b/src/ReinforcementLearningCore/test/core/hooks.jl
@@ -32,7 +32,7 @@ end
 
 function test_run!(hook::AbstractHook)
     hook_ = deepcopy(hook)
-    run(RandomPolicy(), RandomWalk1D(), StopAfterEpisode(10), hook_)
+    run(RandomPolicy(), RandomWalk1D(), StopAfterNEpisodes(10), hook_)
     return hook_
 end
 
diff --git a/src/ReinforcementLearningCore/test/core/stop_conditions.jl b/src/ReinforcementLearningCore/test/core/stop_conditions.jl
@@ -16,10 +16,10 @@ end
     @test sum([check!(composed_stop) for i in 1:20]) == 18
 end
 
-@testset "StopAfterEpisode" begin
-    stop_1 = StopAfterEpisode(2)
-    stop_2 = StopAfterEpisode(2; is_show_progress=false)
-    stop_3 = StopAfterEpisode(2; is_show_progress=true)
+@testset "StopAfterNEpisodes" begin
+    stop_1 = StopAfterNEpisodes(2)
+    stop_2 = StopAfterNEpisodes(2; is_show_progress=false)
+    stop_3 = StopAfterNEpisodes(2; is_show_progress=true)
 
     for stop_condition in (stop_1, stop_2)
         env = RandomWalk1D()
diff --git a/src/ReinforcementLearningCore/test/policies/multi_agent.jl b/src/ReinforcementLearningCore/test/policies/multi_agent.jl
@@ -72,7 +72,7 @@ end
     multiagent_hook = MultiAgentHook((; :Cross => StepsPerEpisode(), :Nought => StepsPerEpisode()))
 
     env = TicTacToeEnv()
-    stop_condition = StopWhenDone()
+    stop_condition = StopIfEnvTerminated()
     hook = StepsPerEpisode()
 
     @test RLBase.reward(env, :Cross) == 0
@@ -130,7 +130,7 @@ end
     ))
 
     env = RockPaperScissorsEnv()
-    stop_condition = StopWhenDone()
+    stop_condition = StopIfEnvTerminated()
     composed_hook = ComposedHook(
         BatchStepsPerEpisode(10),
         RewardsPerEpisode(),
@@ -183,7 +183,7 @@ end
 
     let err = nothing
         try
-            x = run(m, e, StopAfterEpisode(10), hooks)
+            x = run(m, e, StopAfterNEpisodes(10), hooks)
         catch err
         end
         @test !(err isa Exception)
diff --git a/src/ReinforcementLearningEnvironments/src/environments/examples/GraphShortestPathEnv.jl b/src/ReinforcementLearningEnvironments/src/environments/examples/GraphShortestPathEnv.jl
@@ -114,7 +114,7 @@ end
 
 (h::ShortestPathCount)(::PreEpisodeStage, policy, env) = push!(h.shortest_paths, M[env.goal, env.pos])
 
-h = run(policy, env, StopAfterEpisode(1_000), ComposedHook(StepsPerEpisode(), ShortestPathCount()))
+h = run(policy, env, StopAfterNEpisodes(1_000), ComposedHook(StepsPerEpisode(), ShortestPathCount()))
 
 using UnicodePlots
 
diff --git a/src/ReinforcementLearningEnvironments/test/environments/examples/tic_tac_toe.jl b/src/ReinforcementLearningEnvironments/test/environments/examples/tic_tac_toe.jl
@@ -22,7 +22,7 @@
     multiagent_hook = MultiAgentHook((; :Cross => StepsPerEpisode(), :Nought => StepsPerEpisode()))
 
     env = TicTacToeEnv()
-    stop_condition = StopWhenDone()
+    stop_condition = StopIfEnvTerminated()
 
     RLBase.test_interfaces!(env)
     RLBase.test_runnable!(env)

Original file line number	Diff line number	Diff line change
@@ -656,7 +656,7 @@ Besides, I implement the [`EDManager`](https://juliareinforcementlearning.org/do
`656`	`656`	`function Base.run(`
`657`	`657`	`π::EDManager,`
`658`	`658`	`env::AbstractEnv,`
`659`		`- stop_condition = StopAfterEpisode(1),`
	`659`	`+ stop_condition = StopAfterNEpisodes(1),`
`660`	`660`	`hook::AbstractHook = EmptyHook(),`
`661`	`661`	`)`
`662`	`662`	`@assert NumAgentStyle(env) == MultiAgent(2) "ED algorithm only support 2-players games."`
`@@ -757,7 +757,7 @@ EDmanager = EDManager(`
`757`	`757`	`)`
`758`	`758`	`)`
`759`	`759`	# initialize the `stop_condition` and `hook`.
`760`		`-stop_condition = StopAfterEpisode(100_000, is_show_progress=!haskey(ENV, "CI"))`
	`760`	`+stop_condition = StopAfterNEpisodes(100_000, is_show_progress=!haskey(ENV, "CI"))`
`761`	`761`	`hook = KuhnOpenNewEDHook(0, 100, [], [])`
`762`	`762`	```
`763`	`763`
Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@ end`
`17`	`17`	`function Base.run(`
`18`	`18`	`policy::AbstractPolicy,`
`19`	`19`	`env::AbstractEnv,`
`20`		`- stop_condition::AbstractStopCondition=StopAfterEpisode(1),`
	`20`	`+ stop_condition::AbstractStopCondition=StopAfterNEpisodes(1),`
`21`	`21`	`hook::AbstractHook=EmptyHook(),`
`22`	`22`	`reset_condition::AbstractResetCondition=ResetIfEnvTerminated()`
`23`	`23`	`)`