From aa5575a38e5d3e2fe8722ed8d932d2d3a9d48c25 Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <zachary.sunberg@colorado.edu>
Date: Sat, 16 Jan 2021 14:49:43 -0700
Subject: [PATCH 1/5] added multiplayer functions

---
 src/CommonRLInterface.jl | 33 ++++++++++++-----
 src/multiplayer.jl       | 77 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+), 9 deletions(-)
 create mode 100644 src/multiplayer.jl

diff --git a/src/CommonRLInterface.jl b/src/CommonRLInterface.jl
index 2502641..4ab0a99 100644
--- a/src/CommonRLInterface.jl
+++ b/src/CommonRLInterface.jl
@@ -33,9 +33,9 @@ This function is a *static property* of the environment; the value it returns sh
 
 ---
 
-    actions(env::AbstractEnv, i::Integer)
+    actions(env::AbstractEnv, player_index)
 
-Return a collection of all the actions available to player i.
+Return a collection of all the actions available to a given player.
 
 This function is a *static property* of the environment; the value it returns should not change based on the state.
 """
@@ -53,18 +53,33 @@ function observe end
 """
     r = act!(env::AbstractEnv, a)
 
-Take action `a` for the current player, advance AbstractEnv `env` forward one step, and return rewards for all players.
+Take action `a` and advance AbstractEnv `env` forward one step, and return rewards for all players.
 
 This is a *required function* that must be provided by every AbstractEnv.
-"""
-function act! end
 
-"""
-    player(env::AbstractEnv) 
+If the environment has a single player, it is acceptable to return a scalar number. If there are multiple players, it should return a container with indexed with the items in the collection returned by `player_indices`.
+
+# Example
+
+## Single Player
+```julia
+function act!(env::MyMDPEnv, a)
+    env.state += a + randn()
+    return env.s^2
+end
+```
+
+## Two Player
 
-Return the index of the player who should play next in the environment.
+```julia
+function act!(env::MyMDPEnv, a)
+    env.positions[player(env)] += a # In this game, each player has a position that is updated by his or her action
+    rewards = in_goal.(env.positions) # Rewards are +1 for being in a goal region, 0 otherwise
+    return rewards # returns a vector of rewards for each player
+end
+```
 """
-function player end
+function act! end
 
 """
     terminated(env::AbstractEnv)
diff --git a/src/multiplayer.jl b/src/multiplayer.jl
new file mode 100644
index 0000000..bce1bd9
--- /dev/null
+++ b/src/multiplayer.jl
@@ -0,0 +1,77 @@
+"""
+    player_indices(env::AbstractEnv)
+
+Return an iterable collection of indices for all players.
+
+Typically, the indices will be integers, but the only requirement is that they be valid indices for the collection returned by `act!`
+
+This function is a *static property* of the environment; the value it returns should not change based on the state.
+
+# Example
+
+```julia
+@provide player_indices(::MyEnv) = 1:2
+```
+"""
+function player_indices end
+
+"""
+    player(env::AbstractEnv) 
+
+Return the index of the player who should play next in the environment.
+
+The index should be one of the items in the collection returned by `player_indices`.
+"""
+function player end
+
+"""
+    all_act!(env::AbstractEnv, actions)
+
+Take `actions` for all players and advance AbstractEnv `env` forward, and return rewards for all players.
+
+Environments that support simultaneous actions by all players should implement this in addition to or instead of `act!`.
+
+The `actions` container should be indexed by the indices returned by `player_indices(env)`.
+"""
+function all_act! end
+
+"""
+    all_observe(env::AbstractEnv)
+
+Return observations from the environment for all players.
+
+Environments that support simultaneous actions by all players should implement this in addition to or instead of `observe`.
+"""
+function all_observe end
+
+"""
+    UtilityStyle(env)
+
+Trait that allows an environment to declare certain properties about the relative utility for the players.
+
+Possible returns are:
+- `ZeroSum()`
+- `ConstantSum()`
+- `GeneralSum()`
+- `IdenticalUtility()`
+
+See the docstrings for each for more details.
+"""
+abstract type UtilityStyle end
+
+"""
+If `UtilityStyle(env) == ZeroSum()` then the sum of the rewards returned by `act!` is always zero.
+"""
+struct ZeroSum <: UtilityStyle end
+"""
+If `UtilityStyle(env) == ConstantSum()` then the sum of the rewards returned by `act!` will always be the same constant.
+"""
+struct ConstantSum <: UtilityStyle end
+"""
+If `UtilityStyle(env) == GeneralSum()`, the sum of rewards over a trajectory can take any form.
+"""
+struct GeneralSum <: UtilityStyle end
+"""
+If `UtilityStyle(env) == IdenticalUtility()`, all entries of the reward returned by `act!` will be identical for all players.
+"""
+struct IdenticalUtility <: UtilityStyle end

From 94a6cabfaa5b4689b9693d0769bf11dfff20dd22 Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <zachary.sunberg@colorado.edu>
Date: Sun, 17 Jan 2021 00:13:29 -0700
Subject: [PATCH 2/5] indices must be integers

---
 src/multiplayer.jl | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/multiplayer.jl b/src/multiplayer.jl
index bce1bd9..e6b27db 100644
--- a/src/multiplayer.jl
+++ b/src/multiplayer.jl
@@ -1,9 +1,7 @@
 """
     player_indices(env::AbstractEnv)
 
-Return an iterable collection of indices for all players.
-
-Typically, the indices will be integers, but the only requirement is that they be valid indices for the collection returned by `act!`
+Return an iterable collection of integer indices for all players.
 
 This function is a *static property* of the environment; the value it returns should not change based on the state.
 
@@ -19,19 +17,15 @@ function player_indices end
     player(env::AbstractEnv) 
 
 Return the index of the player who should play next in the environment.
-
-The index should be one of the items in the collection returned by `player_indices`.
 """
 function player end
 
 """
-    all_act!(env::AbstractEnv, actions)
+    all_act!(env::AbstractEnv, actions::AbstractVector)
 
 Take `actions` for all players and advance AbstractEnv `env` forward, and return rewards for all players.
 
 Environments that support simultaneous actions by all players should implement this in addition to or instead of `act!`.
-
-The `actions` container should be indexed by the indices returned by `player_indices(env)`.
 """
 function all_act! end
 

From bc73edcdc8c0642ee43b2ed97fd7482d8ad3895b Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <zachary.sunberg@colorado.edu>
Date: Sun, 17 Jan 2021 01:16:41 -0700
Subject: [PATCH 3/5] started tic tac toe

---
 examples/tictactoe.jl | 56 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 examples/tictactoe.jl

diff --git a/examples/tictactoe.jl b/examples/tictactoe.jl
new file mode 100644
index 0000000..eeb2a59
--- /dev/null
+++ b/examples/tictactoe.jl
@@ -0,0 +1,56 @@
+#XXX not finished yet
+using CommonRLInterface
+
+const RL = CommonRLInterface
+
+mutable struct TicTacToe <: AbstractEnv
+    board::Matrix{Int} # 0 = untaken, 1 = x, 2 = o
+end
+
+TicTacToe() = TicTacToe(1, zeros(Int, 3, 3))
+
+iswinner(b, p) = any(all(b[i,:].==p) for i in 1:3) ||
+                 any(all(b[:,i]) for i in 1:3) ||
+                 all(b[i,i]==p for i in 1:3) ||
+                 all(b[i,4-i] == p for i in 1:3)
+
+other(p) = mod1(p+1,2)
+
+RL.reset!(env::TicTacToe) = fill!(env.board, 0)
+RL.actions(env::TicTacToe, player=0) = vec([(i, j) for i in 1:3, j in 1:3])
+RL.observe(env::TicTacToe) = env.board
+RL.terminated(env::TicTacToe) = any(iswinner(env.board, p) for p in 1:2) || all(env.board .!= 0)
+
+function RL.act!(env::TicTacToe, a)
+    p = player(env)
+    r = [0, 0]
+    if env.board[a] == 0
+        
+    else
+        # if you take an illegal action, you lose
+        rewards[p] = -1
+        rewards[other(p)] = 1
+    end
+    return rewards
+end
+
+@provide RL.player_indices(env::TicTacToe) = 1:2
+@provide function RL.player(env::TicTacToe)
+    if sum(env.board == 3)
+        
+    else
+
+    end
+end
+
+RL.render(env::TicTacToe) = env
+
+function Base.show(::IO, ::MIME"text/plain", env::TicTacToe)
+    chars = [' ', 'x', 'o']
+    for i in 1:3
+        for j in 1:3
+            print(io, '|'*chars[env.board[i,j]])
+        end
+        println(io, '|')
+    end
+end

From f2be3800766ec5edc4f9a599dafc11dedc2f18c0 Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <zachary.sunberg@colorado.edu>
Date: Sat, 23 Jan 2021 17:15:32 -0700
Subject: [PATCH 4/5] finished multiplayer docs

---
 docs/make.jl             |  1 +
 docs/src/index.md        |  4 ++--
 docs/src/multiplayer.md  | 28 ++++++++++++++++++++++++++++
 docs/src/optional.md     |  4 ++--
 examples/tictactoe.jl    | 10 +++++-----
 src/CommonRLInterface.jl | 15 +++++++++++++--
 src/multiplayer.jl       |  8 ++++----
 7 files changed, 55 insertions(+), 15 deletions(-)
 create mode 100644 docs/src/multiplayer.md

diff --git a/docs/make.jl b/docs/make.jl
index 97dddb9..1a33f03 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -14,6 +14,7 @@ makedocs(;
     pages=[
         "Home" => "index.md",
         "required.md",
+        "multiplayer.md",
         "optional.md",
         "wrappers.md",
         "faqs.md"
diff --git a/docs/src/index.md b/docs/src/index.md
index dc80e3c..cddbb7d 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -6,10 +6,10 @@ CurrentModule = CommonRLInterface
 
 A description of the purpose of the CommonRLInterface package can be found in [the README on GitHub](https://github.com/JuliaReinforcementLearning/CommonRLInterface.jl).
 
-An example environment can be found in the [examples directory on GitHub](https://github.com/JuliaReinforcementLearning/CommonRLInterface.jl/tree/master/examples).
+Example environment can be found in the [examples directory on GitHub](https://github.com/JuliaReinforcementLearning/CommonRLInterface.jl/tree/master/examples).
 
 Detailed reference documentation can be found using the links below:
 
 ```@contents
-Pages = ["required.md", "optional.md", "wrappers.md", "faqs.md"]
+Pages = ["required.md", "multiplayer.md", "optional.md", "wrappers.md", "faqs.md"]
 ```
diff --git a/docs/src/multiplayer.md b/docs/src/multiplayer.md
new file mode 100644
index 0000000..341bd87
--- /dev/null
+++ b/docs/src/multiplayer.md
@@ -0,0 +1,28 @@
+# Multiplayer Interface
+
+CommonRLInterface provides a basic interface for multiplayer games.
+
+## Sequential games
+
+Sequential games should implement the optional function [`players`](@ref) to return a range of player ids, and [`player`](@ref) to indicate which player's turn it is. There is no requirement that players play in the order returned by the `players` function. Only the action for the current player should be supplied to [`act!`](@ref), but rewards for all players should be returned. [`observe`](@ref) returns the observation for only the current player.
+
+## Simultaneous Games/Multi-agent (PO)MDPs
+
+Environments in which all players take actions at once should implement the [`all_act!`](@ref) and [`all_observe`](@ref) optional functions which take a collection of actions for all players and return observations for each player, respectively.
+
+## Indicating reward properties
+
+The [`UtilityStyle`](@ref) trait can be used to indicate that the rewards will meet properties, for example that rewards for all players are identical or that the game is zero-sum.
+
+
+```@docs
+players
+player
+all_act!
+all_observe
+UtilityStyle
+ZeroSum
+ConstantSum
+GeneralSum
+IdenticalUtility
+```
diff --git a/docs/src/optional.md b/docs/src/optional.md
index 84bea96..f2e63b3 100644
--- a/docs/src/optional.md
+++ b/docs/src/optional.md
@@ -27,7 +27,8 @@ The optional interface currently contains the following functions:
 - [`valid_actions`](@ref)
 - [`valid_action_mask`](@ref)
 - [`observations`](@ref)
-- [`player`](@ref)
+
+Additional optional functions for multiplayer environments are contained in the [Multiplayer Interface](@ref)
 
 To propose adding a new function to the interface, please file an issue with the "candidate interface function" label.
 
@@ -43,5 +44,4 @@ setstate!
 valid_actions
 valid_action_mask
 observations
-player
 ```
diff --git a/examples/tictactoe.jl b/examples/tictactoe.jl
index eeb2a59..53b4b9a 100644
--- a/examples/tictactoe.jl
+++ b/examples/tictactoe.jl
@@ -34,16 +34,16 @@ function RL.act!(env::TicTacToe, a)
     return rewards
 end
 
-@provide RL.player_indices(env::TicTacToe) = 1:2
+@provide RL.players(env::TicTacToe) = 1:2
 @provide function RL.player(env::TicTacToe)
-    if sum(env.board == 3)
-        
+    if sum(env.board%3==0)
+        return 1
     else
-
+        return 2
     end
 end
 
-RL.render(env::TicTacToe) = env
+@provide RL.render(env::TicTacToe) = env
 
 function Base.show(::IO, ::MIME"text/plain", env::TicTacToe)
     chars = [' ', 'x', 'o']
diff --git a/src/CommonRLInterface.jl b/src/CommonRLInterface.jl
index 4ab0a99..ec19249 100644
--- a/src/CommonRLInterface.jl
+++ b/src/CommonRLInterface.jl
@@ -8,7 +8,6 @@ export
     actions,
     observe,
     act!,
-    player,
     terminated
 
 abstract type AbstractEnv end
@@ -57,7 +56,7 @@ Take action `a` and advance AbstractEnv `env` forward one step, and return rewar
 
 This is a *required function* that must be provided by every AbstractEnv.
 
-If the environment has a single player, it is acceptable to return a scalar number. If there are multiple players, it should return a container with indexed with the items in the collection returned by `player_indices`.
+If the environment has a single player, it is acceptable to return a scalar number. If there are multiple players, it should return a container indexed with the items in the collection returned by `players`.
 
 # Example
 
@@ -186,6 +185,18 @@ export
     valid_action_mask
 include("spaces.jl")
 
+export
+    players,
+    player,
+    all_act!,
+    all_observe,
+    UtilityStyle,
+    ZeroSum,
+    ConstantSum,
+    GeneralSum,
+    IdenticalUtility
+include("multiplayer.jl")
+
 export
     Wrappers
 include("wrappers.jl")
diff --git a/src/multiplayer.jl b/src/multiplayer.jl
index e6b27db..004bb89 100644
--- a/src/multiplayer.jl
+++ b/src/multiplayer.jl
@@ -1,17 +1,17 @@
 """
-    player_indices(env::AbstractEnv)
+    players(env::AbstractEnv)
 
-Return an iterable collection of integer indices for all players.
+Return an ordered iterable collection of integer indices for all players, starting with one.
 
 This function is a *static property* of the environment; the value it returns should not change based on the state.
 
 # Example
 
 ```julia
-@provide player_indices(::MyEnv) = 1:2
+@provide players(::MyEnv) = 1:2
 ```
 """
-function player_indices end
+function players end
 
 """
     player(env::AbstractEnv) 

From 25a4650691796aca454578d9dbab776e9a1e2026 Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <zachary.sunberg@colorado.edu>
Date: Sat, 23 Jan 2021 18:24:31 -0700
Subject: [PATCH 5/5] finished examples and such

---
 README.md                            |  6 +++-
 docs/src/index.md                    |  2 +-
 examples/rock_paper_scissors.jl      | 45 ++++++++++++++++++++++++++++
 examples/tictactoe.jl                | 39 ++++++++++++++++--------
 src/CommonRLInterface.jl             |  6 ++--
 test/examples/gridworld.jl           |  1 -
 test/examples/rock_paper_scissors.jl | 11 +++++++
 test/examples/tictactoe.jl           | 12 ++++++++
 test/runtests.jl                     |  6 ++--
 9 files changed, 106 insertions(+), 22 deletions(-)
 create mode 100644 examples/rock_paper_scissors.jl
 create mode 100644 test/examples/rock_paper_scissors.jl
 create mode 100644 test/examples/tictactoe.jl

diff --git a/README.md b/README.md
index d8d40e7..7bf4778 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ By design, this package is only concerned with environments and *not* with polic
 
 ## Documentation
 
-Detailed documentation can be found here: [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaReinforcementLearning.github.io/CommonRLInterface.jl/stable). A brief overview is given below:
+A few simple examples can be found in the examples directory. Detailed documentation can be found here: [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaReinforcementLearning.github.io/CommonRLInterface.jl/stable). A brief overview is given below:
 
 ### Required Interface
 
@@ -35,6 +35,10 @@ terminated(env) # returns true or false indicating whether the environment has f
 
 Additional behavior for an environment can be specified with the optional interface outlined in the documentation. The `provided` function can be used to check whether optional behavior is provided by the environment.
 
+### Multiplayer Environments
+
+Optional functions allow implementation of both sequential and simultaneous games and multi-agent (PO)MDPs
+
 ### Wrappers
 
 A wrapper system described in the documentation allows for easy modification of environments.
diff --git a/docs/src/index.md b/docs/src/index.md
index cddbb7d..f2d714c 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -6,7 +6,7 @@ CurrentModule = CommonRLInterface
 
 A description of the purpose of the CommonRLInterface package can be found in [the README on GitHub](https://github.com/JuliaReinforcementLearning/CommonRLInterface.jl).
 
-Example environment can be found in the [examples directory on GitHub](https://github.com/JuliaReinforcementLearning/CommonRLInterface.jl/tree/master/examples).
+Example environments can be found in the [examples directory on GitHub](https://github.com/JuliaReinforcementLearning/CommonRLInterface.jl/tree/master/examples).
 
 Detailed reference documentation can be found using the links below:
 
diff --git a/examples/rock_paper_scissors.jl b/examples/rock_paper_scissors.jl
new file mode 100644
index 0000000..4da0739
--- /dev/null
+++ b/examples/rock_paper_scissors.jl
@@ -0,0 +1,45 @@
+using CommonRLInterface
+
+const RL = CommonRLInterface
+
+mutable struct RockPaperScissors <: AbstractEnv
+    status::Symbol # either :start, the play of the first player, or :done
+end
+
+RockPaperScissors() = RockPaperScissors(:start)
+
+beats(a, b) = (a==:rock && b==:scissors) || (a==:scissors && b==:paper) || (a==:paper && b==:rock)
+
+# Really all_act!, actions, terminated, players, and reset! are all that's needed to describe the game
+
+@provide function RL.all_act!(env::RockPaperScissors, as)
+    env.status = :done
+    if beats(as[1], as[2]) 
+        return (1, -1)
+    elseif beats(as[2], as[1])
+        return (-1, 1)
+    else
+        return (0, 0)
+    end
+end
+
+RL.actions(env::RockPaperScissors, player=0) = (:rock, :paper, :scissors)
+RL.terminated(env::RockPaperScissors) = env.status == :done
+RL.reset!(env::RockPaperScissors) = env.status = :start
+@provide RL.players(env::RockPaperScissors) = 1:2
+
+# We may also wish to implement the rest of the required interface
+
+RL.observe(env::RockPaperScissors) = [0]
+
+function RL.act!(env::RockPaperScissors, a)
+    if env.status == :start
+        env.status = a
+        return (0, 0)
+    else
+        return all_act!(env, (env.status, a))
+    end
+end
+
+@provide RL.player(env::RockPaperScissors) = env.status == :start ? 1 : 2
+@provide RL.UtilityStyle(env::RockPaperScissors) = ZeroSum()
diff --git a/examples/tictactoe.jl b/examples/tictactoe.jl
index 53b4b9a..01624a2 100644
--- a/examples/tictactoe.jl
+++ b/examples/tictactoe.jl
@@ -7,36 +7,47 @@ mutable struct TicTacToe <: AbstractEnv
     board::Matrix{Int} # 0 = untaken, 1 = x, 2 = o
 end
 
-TicTacToe() = TicTacToe(1, zeros(Int, 3, 3))
+TicTacToe() = TicTacToe(zeros(Int, 3, 3))
 
 iswinner(b, p) = any(all(b[i,:].==p) for i in 1:3) ||
-                 any(all(b[:,i]) for i in 1:3) ||
+                 any(all(b[:,i].==p) for i in 1:3) ||
                  all(b[i,i]==p for i in 1:3) ||
-                 all(b[i,4-i] == p for i in 1:3)
+                 all(b[i,4-i]==p for i in 1:3)
 
 other(p) = mod1(p+1,2)
 
 RL.reset!(env::TicTacToe) = fill!(env.board, 0)
 RL.actions(env::TicTacToe, player=0) = vec([(i, j) for i in 1:3, j in 1:3])
-RL.observe(env::TicTacToe) = env.board
+# symmetrical observations for both players: +1 for your square, -1 for other square
+RL.observe(env::TicTacToe) = zeros(Int, 3, 3) + (env.board.==player(env)) - (env.board.==other(player(env)))
 RL.terminated(env::TicTacToe) = any(iswinner(env.board, p) for p in 1:2) || all(env.board .!= 0)
 
 function RL.act!(env::TicTacToe, a)
     p = player(env)
-    r = [0, 0]
-    if env.board[a] == 0
-        
+    winner = 0
+    if env.board[a...] == 0
+        env.board[a...] = p
+        for pp in players(env)
+            if iswinner(env.board, pp)
+                winner = pp
+            end
+        end
     else
         # if you take an illegal action, you lose
-        rewards[p] = -1
-        rewards[other(p)] = 1
+        winner = other(p)
+    end
+
+    if winner == 1
+        return (1, -1)
+    elseif winner == 2
+        return (-1, 1)
     end
-    return rewards
+    return (0, 0)
 end
 
 @provide RL.players(env::TicTacToe) = 1:2
 @provide function RL.player(env::TicTacToe)
-    if sum(env.board%3==0)
+    if sum(env.board)%3 == 0
         return 1
     else
         return 2
@@ -45,12 +56,14 @@ end
 
 @provide RL.render(env::TicTacToe) = env
 
-function Base.show(::IO, ::MIME"text/plain", env::TicTacToe)
+function Base.show(io::IO, ::MIME"text/plain", env::TicTacToe)
     chars = [' ', 'x', 'o']
     for i in 1:3
         for j in 1:3
-            print(io, '|'*chars[env.board[i,j]])
+            print(io, '|'*chars[env.board[i,j]+1])
         end
         println(io, '|')
     end
 end
+
+@provide RL.UtilityStyle(::TicTacToe) = ZeroSum()
diff --git a/src/CommonRLInterface.jl b/src/CommonRLInterface.jl
index ec19249..5667aaf 100644
--- a/src/CommonRLInterface.jl
+++ b/src/CommonRLInterface.jl
@@ -56,7 +56,7 @@ Take action `a` and advance AbstractEnv `env` forward one step, and return rewar
 
 This is a *required function* that must be provided by every AbstractEnv.
 
-If the environment has a single player, it is acceptable to return a scalar number. If there are multiple players, it should return a container indexed with the items in the collection returned by `players`.
+If the environment has a single player, it is acceptable to return a scalar number. If there are multiple players, it should return a container with all rewards indexed by player number.
 
 # Example
 
@@ -72,9 +72,9 @@ end
 
 ```julia
 function act!(env::MyMDPEnv, a)
-    env.positions[player(env)] += a # In this game, each player has a position that is updated by his or her action
+    env.positions[player(env)] += a   # In this game, each player has a position that is updated by his or her action
     rewards = in_goal.(env.positions) # Rewards are +1 for being in a goal region, 0 otherwise
-    return rewards # returns a vector of rewards for each player
+    return rewards                    # returns a vector of rewards for each player
 end
 ```
 """
diff --git a/test/examples/gridworld.jl b/test/examples/gridworld.jl
index 76be6f0..6a4efbf 100644
--- a/test/examples/gridworld.jl
+++ b/test/examples/gridworld.jl
@@ -3,7 +3,6 @@ module GW
 end
 
 @testset "gridworld" begin
-
     env = GW.GridWorld()
     reset!(env)
     while !terminated(env)
diff --git a/test/examples/rock_paper_scissors.jl b/test/examples/rock_paper_scissors.jl
new file mode 100644
index 0000000..3f1b69d
--- /dev/null
+++ b/test/examples/rock_paper_scissors.jl
@@ -0,0 +1,11 @@
+module RPS 
+    include("../../examples/rock_paper_scissors.jl")
+end
+
+@testset "rock paper scissors" begin
+    g = RPS.RockPaperScissors()
+    reset!(g)
+    while !terminated(g)
+        @test sum(act!(g, rand(actions(g)))) == 0
+    end
+end
diff --git a/test/examples/tictactoe.jl b/test/examples/tictactoe.jl
new file mode 100644
index 0000000..98e41bb
--- /dev/null
+++ b/test/examples/tictactoe.jl
@@ -0,0 +1,12 @@
+module TTT
+    include("../../examples/tictactoe.jl")
+end
+
+@testset "tictactoe" begin
+    g = TTT.TicTacToe()
+    reset!(g)
+    while !terminated(g)
+        @test sum(act!(g, rand(actions(g)))) == 0
+        render(g)
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 3aa81f5..d11d421 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -162,8 +162,8 @@ end
     @test observations(env) == 1:10
 end
 
-if VERSION >= v"1.4" # not sure if this is the actual minimal version, but I know it will work
-    include("examples/gridworld.jl")
-end
+include("examples/gridworld.jl")
+include("examples/tictactoe.jl")
+include("examples/rock_paper_scissors.jl")
 
 include("wrappers.jl")