From a7eedd7fd4c7ff6aa4aba8efe400e5e6c6143bcd Mon Sep 17 00:00:00 2001 From: Mytolo Date: Tue, 18 Jul 2023 13:06:21 +0200 Subject: [PATCH] adjusted pettingzoo to PettingZooEnv simultaneous environment more convenient (#925) --- .../src/environments/3rd_party/pettingzoo.jl | 101 +++++++++--------- .../src/environments/3rd_party/structs.jl | 20 ++-- 2 files changed, 63 insertions(+), 58 deletions(-) diff --git a/src/ReinforcementLearningEnvironments/src/environments/3rd_party/pettingzoo.jl b/src/ReinforcementLearningEnvironments/src/environments/3rd_party/pettingzoo.jl index d466dd2e3..43069c34c 100644 --- a/src/ReinforcementLearningEnvironments/src/environments/3rd_party/pettingzoo.jl +++ b/src/ReinforcementLearningEnvironments/src/environments/3rd_party/pettingzoo.jl @@ -1,17 +1,18 @@ using .PyCall - np = pyimport("numpy") -export PettingzooEnv +export PettingZooEnv + """ - PettingzooEnv(;kwargs...) + PettingZooEnv(;kwargs...) -`PettingzooEnv` is an interface of the python library pettingzoo for multi agent reinforcement learning environments. It can be used to test multi +`PettingZooEnv` is an interface of the python library Pettingzoo for multi agent reinforcement learning environments. It can be used to test multi agent reinforcement learning algorithms implemented in JUlia ReinforcementLearning. """ -function PettingzooEnv(name::String; seed=123, args...) + +function PettingZooEnv(name::String; seed=123, args...) if !PyCall.pyexists("pettingzoo.$name") error("Cannot import pettingzoo.$name") end @@ -20,7 +21,7 @@ function PettingzooEnv(name::String; seed=123, args...) pyenv.reset(seed=seed) obs_space = space_transform(pyenv.observation_space(pyenv.agents[1])) act_space = space_transform(pyenv.action_space(pyenv.agents[1])) - env = PettingzooEnv{typeof(act_space),typeof(obs_space),typeof(pyenv)}( + env = PettingZooEnv{typeof(act_space),typeof(obs_space),typeof(pyenv)}( pyenv, obs_space, act_space, @@ -33,13 +34,12 @@ end # basic function needed for simulation ======================================================================== -function RLBase.reset!(env::PettingzooEnv) +function RLBase.reset!(env::PettingZooEnv) pycall!(env.state, env.pyenv.reset, PyObject, env.seed) - env.ts = 1 nothing end -function RLBase.is_terminated(env::PettingzooEnv) +function RLBase.is_terminated(env::PettingZooEnv) _, _, t, d, _ = pycall(env.pyenv.last, PyObject) t || d end @@ -48,96 +48,96 @@ end ## State / observation implementations ======================================================================== -RLBase.state(env::PettingzooEnv, ::Observation{Any}, players::Tuple) = Dict(p => state(env, p) for p in players) +RLBase.state(env::PettingZooEnv, ::Observation{Any}, players::Tuple) = Dict(p => state(env, p) for p in players) # partial observability is default for pettingzoo -function RLBase.state(env::PettingzooEnv, ::Observation{Any}, player) +function RLBase.state(env::PettingZooEnv, ::Observation{Any}, player) env.pyenv.observe(player) end ## state space ========================================================================================================================================= -RLBase.state_space(env::PettingzooEnv, ::Observation{Any}, players) = Space(Dict(player => state_space(env, player) for player in players)) +RLBase.state_space(env::PettingZooEnv, ::Observation{Any}, players) = Space(Dict(player => state_space(env, player) for player in players)) # partial observability -RLBase.state_space(env::PettingzooEnv, ::Observation{Any}, player::String) = space_transform(env.pyenv.observation_space(player)) +RLBase.state_space(env::PettingZooEnv, ::Observation{Any}, player::Symbol) = space_transform(env.pyenv.observation_space(String(player))) # for full observability. Be careful: action_space has also to be adjusted -# RLBase.state_space(env::PettingzooEnv, ::Observation{Any}, player::String) = space_transform(env.pyenv.state_space) +# RLBase.state_space(env::PettingZooEnv, ::Observation{Any}, player::String) = space_transform(env.pyenv.state_space) ## action space implementations ==================================================================================== -RLBase.action_space(env::PettingzooEnv, players::Tuple{String}) = +RLBase.action_space(env::PettingZooEnv, players::Tuple{Symbol}) = Space(Dict(p => action_space(env, p) for p in players)) -RLBase.action_space(env::PettingzooEnv, player::String) = space_transform(env.pyenv.action_space(player)) +RLBase.action_space(env::PettingZooEnv, player::Symbol) = space_transform(env.pyenv.action_space(String(player))) -RLBase.action_space(env::PettingzooEnv, player::Integer) = space_transform(env.pyenv.action_space(env.pyenv.agents[player])) +RLBase.action_space(env::PettingZooEnv, player::Integer) = space_transform(env.pyenv.action_space(env.pyenv.agents[player])) -RLBase.action_space(env::PettingzooEnv, player::DefaultPlayer) = env.action_space +RLBase.action_space(env::PettingZooEnv, player::DefaultPlayer) = env.action_space ## action functions ======================================================================================================================== -function RLBase.act!(env::PettingzooEnv, actions::Dict, players::Tuple) - @assert length(actions) == length(players) - env.ts += 1 - for p in players - env(actions[p]) +function RLBase.act!(env::PettingZooEnv, actions::Dict{Symbol, Int}) + @assert length(actions) == length(players(env)) + for p in env.pyenv.agents + pycall(env.pyenv.step, PyObject, actions[p]) end end -function RLBase.act!(env::PettingzooEnv, actions::Dict, player) - @assert length(actions) == length(players(env)) - for p in players(env) - env(actions[p]) +function RLBase.act!(env::PettingZooEnv, actions::Dict{Symbol, Real}) + @assert length(actions) == length(env.pyenv.agents) + for p in env.pyenv.agents + pycall(env.pyenv.step, PyObject, np.array(actions[p]; dtype=np.float32)) end end -function RLBase.act!(env::PettingzooEnv, actions::Dict{String, Int}) - @assert length(actions) == length(players(env)) +function RLBase.act!(env::PettingZooEnv, actions::Dict{Symbol, Vector}) + @assert length(actions) == length(env.pyenv.agents) for p in env.pyenv.agents - pycall(env.pyenv.step, PyObject, actions[p]) + RLBase.act!(env, p) end end -function RLBase.act!(env::PettingzooEnv, actions::Dict{String, Real}) - @assert length(actions) == length(players(env)) - env.ts += 1 - for p in env.pyenv.agents - pycall(env.pyenv.step, PyObject, np.array(actions[p]; dtype=np.float32)) +function RLBase.act!(env::PettingZooEnv, actions::NamedTuple) + @assert length(actions) == length(env.pyenv.agents) + for player ∈ players(env) + RLBase.act!(env, actions[player]) end end -function RLBase.act!(env::PettingzooEnv, action::Vector) +# for vectors, pettingzoo need them to be in proper numpy type +function RLBase.act!(env::PettingZooEnv, action::Vector) pycall(env.pyenv.step, PyObject, np.array(action; dtype=np.float32)) end -function RLBase.act!(env::PettingzooEnv, action::Integer) - env.ts += 1 +function RLBase.act!(env::PettingZooEnv, action) pycall(env.pyenv.step, PyObject, action) end # reward of player ====================================================================================================================== -function RLBase.reward(env::PettingzooEnv, player::String) - env.pyenv.rewards[player] +function RLBase.reward(env::PettingZooEnv, player::Symbol) + env.pyenv.rewards[String(player)] end # Multi agent part ========================================================================================================================================= -RLBase.players(env::PettingzooEnv) = env.pyenv.agents +RLBase.players(env::PettingZooEnv) = Symbol.(env.pyenv.agents) + +function RLBase.current_player(env::PettingZooEnv) + return Symbol(env.pyenv.agents[env.current_player]) +end -function RLBase.current_player(env::PettingzooEnv, post_action=false) - cur_id = env.ts % length(env.pyenv.agents) == 0 ? length(env.pyenv.agents) : env.ts % length(env.pyenv.agents) - cur_id = post_action ? (cur_id - 1 == 0 ? length(env.pyenv.agents) : cur_id - 1) : cur_id - return env.pyenv.agents[cur_id] +function RLBase.next_player!(env::PettingZooEnv) + env.current_player = env.current_player < length(env.pyenv.agents) ? env.current_player + 1 : 1 end -function RLBase.NumAgentStyle(env::PettingzooEnv) +function RLBase.NumAgentStyle(env::PettingZooEnv) n = length(env.pyenv.agents) if n == 1 SingleAgent() @@ -146,9 +146,8 @@ function RLBase.NumAgentStyle(env::PettingzooEnv) end end - -RLBase.DynamicStyle(::PettingzooEnv) = SEQUENTIAL -RLBase.ActionStyle(::PettingzooEnv) = MINIMAL_ACTION_SET -RLBase.InformationStyle(::PettingzooEnv) = IMPERFECT_INFORMATION -RLBase.ChanceStyle(::PettingzooEnv) = EXPLICIT_STOCHASTIC +RLBase.DynamicStyle(::PettingZooEnv) = SIMULTANEOUS +RLBase.ActionStyle(::PettingZooEnv) = MINIMAL_ACTION_SET +RLBase.InformationStyle(::PettingZooEnv) = IMPERFECT_INFORMATION +RLBase.ChanceStyle(::PettingZooEnv) = EXPLICIT_STOCHASTIC diff --git a/src/ReinforcementLearningEnvironments/src/environments/3rd_party/structs.jl b/src/ReinforcementLearningEnvironments/src/environments/3rd_party/structs.jl index 284c500e4..a6e727dc9 100644 --- a/src/ReinforcementLearningEnvironments/src/environments/3rd_party/structs.jl +++ b/src/ReinforcementLearningEnvironments/src/environments/3rd_party/structs.jl @@ -1,11 +1,17 @@ -mutable struct PettingzooEnv{Ta,To,P} <: AbstractEnv - pyenv::P - observation_space::To - action_space::Ta - state::P - seed::Union{Int, Nothing} - ts::Int +# Parametrization: +# Ta : Type of action_space +# To : Type of observation_space +# P : Type of environment most common: PyObject + +mutable struct PettingZooEnv{Ta,To,P} <: AbstractEnv + pyenv::P + observation_space::To + action_space::Ta + state::P + seed::Union{Int, Nothing} + current_player::Int end + export PettingzooEnv struct GymEnv{T,Ta,To,P} <: AbstractEnv