Skip to content

Commit

Permalink
adjusted pettingzoo to PettingZooEnv simultaneous environment more co…
Browse files Browse the repository at this point in the history
…nvenient (#925)
  • Loading branch information
Mytolo committed Jul 19, 2023
1 parent 309fbf5 commit a7eedd7
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 58 deletions.
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
using .PyCall


np = pyimport("numpy")

export PettingzooEnv
export PettingZooEnv


"""
PettingzooEnv(;kwargs...)
PettingZooEnv(;kwargs...)
`PettingzooEnv` is an interface of the python library pettingzoo for multi agent reinforcement learning environments. It can be used to test multi
`PettingZooEnv` is an interface of the python library Pettingzoo for multi agent reinforcement learning environments. It can be used to test multi
agent reinforcement learning algorithms implemented in JUlia ReinforcementLearning.
"""
function PettingzooEnv(name::String; seed=123, args...)

function PettingZooEnv(name::String; seed=123, args...)
if !PyCall.pyexists("pettingzoo.$name")
error("Cannot import pettingzoo.$name")
end
Expand All @@ -20,7 +21,7 @@ function PettingzooEnv(name::String; seed=123, args...)
pyenv.reset(seed=seed)
obs_space = space_transform(pyenv.observation_space(pyenv.agents[1]))
act_space = space_transform(pyenv.action_space(pyenv.agents[1]))
env = PettingzooEnv{typeof(act_space),typeof(obs_space),typeof(pyenv)}(
env = PettingZooEnv{typeof(act_space),typeof(obs_space),typeof(pyenv)}(
pyenv,
obs_space,
act_space,
Expand All @@ -33,13 +34,12 @@ end

# basic function needed for simulation ========================================================================

function RLBase.reset!(env::PettingzooEnv)
function RLBase.reset!(env::PettingZooEnv)
pycall!(env.state, env.pyenv.reset, PyObject, env.seed)
env.ts = 1
nothing
end

function RLBase.is_terminated(env::PettingzooEnv)
function RLBase.is_terminated(env::PettingZooEnv)
_, _, t, d, _ = pycall(env.pyenv.last, PyObject)
t || d
end
Expand All @@ -48,96 +48,96 @@ end

## State / observation implementations ========================================================================

RLBase.state(env::PettingzooEnv, ::Observation{Any}, players::Tuple) = Dict(p => state(env, p) for p in players)
RLBase.state(env::PettingZooEnv, ::Observation{Any}, players::Tuple) = Dict(p => state(env, p) for p in players)


# partial observability is default for pettingzoo
function RLBase.state(env::PettingzooEnv, ::Observation{Any}, player)
function RLBase.state(env::PettingZooEnv, ::Observation{Any}, player)
env.pyenv.observe(player)
end


## state space =========================================================================================================================================

RLBase.state_space(env::PettingzooEnv, ::Observation{Any}, players) = Space(Dict(player => state_space(env, player) for player in players))
RLBase.state_space(env::PettingZooEnv, ::Observation{Any}, players) = Space(Dict(player => state_space(env, player) for player in players))

# partial observability
RLBase.state_space(env::PettingzooEnv, ::Observation{Any}, player::String) = space_transform(env.pyenv.observation_space(player))
RLBase.state_space(env::PettingZooEnv, ::Observation{Any}, player::Symbol) = space_transform(env.pyenv.observation_space(String(player)))

# for full observability. Be careful: action_space has also to be adjusted
# RLBase.state_space(env::PettingzooEnv, ::Observation{Any}, player::String) = space_transform(env.pyenv.state_space)
# RLBase.state_space(env::PettingZooEnv, ::Observation{Any}, player::String) = space_transform(env.pyenv.state_space)


## action space implementations ====================================================================================

RLBase.action_space(env::PettingzooEnv, players::Tuple{String}) =
RLBase.action_space(env::PettingZooEnv, players::Tuple{Symbol}) =
Space(Dict(p => action_space(env, p) for p in players))

RLBase.action_space(env::PettingzooEnv, player::String) = space_transform(env.pyenv.action_space(player))
RLBase.action_space(env::PettingZooEnv, player::Symbol) = space_transform(env.pyenv.action_space(String(player)))

RLBase.action_space(env::PettingzooEnv, player::Integer) = space_transform(env.pyenv.action_space(env.pyenv.agents[player]))
RLBase.action_space(env::PettingZooEnv, player::Integer) = space_transform(env.pyenv.action_space(env.pyenv.agents[player]))

RLBase.action_space(env::PettingzooEnv, player::DefaultPlayer) = env.action_space
RLBase.action_space(env::PettingZooEnv, player::DefaultPlayer) = env.action_space

## action functions ========================================================================================================================

function RLBase.act!(env::PettingzooEnv, actions::Dict, players::Tuple)
@assert length(actions) == length(players)
env.ts += 1
for p in players
env(actions[p])
function RLBase.act!(env::PettingZooEnv, actions::Dict{Symbol, Int})
@assert length(actions) == length(players(env))
for p in env.pyenv.agents
pycall(env.pyenv.step, PyObject, actions[p])
end
end

function RLBase.act!(env::PettingzooEnv, actions::Dict, player)
@assert length(actions) == length(players(env))
for p in players(env)
env(actions[p])
function RLBase.act!(env::PettingZooEnv, actions::Dict{Symbol, Real})
@assert length(actions) == length(env.pyenv.agents)
for p in env.pyenv.agents
pycall(env.pyenv.step, PyObject, np.array(actions[p]; dtype=np.float32))
end
end

function RLBase.act!(env::PettingzooEnv, actions::Dict{String, Int})
@assert length(actions) == length(players(env))
function RLBase.act!(env::PettingZooEnv, actions::Dict{Symbol, Vector})
@assert length(actions) == length(env.pyenv.agents)
for p in env.pyenv.agents
pycall(env.pyenv.step, PyObject, actions[p])
RLBase.act!(env, p)
end
end

function RLBase.act!(env::PettingzooEnv, actions::Dict{String, Real})
@assert length(actions) == length(players(env))
env.ts += 1
for p in env.pyenv.agents
pycall(env.pyenv.step, PyObject, np.array(actions[p]; dtype=np.float32))
function RLBase.act!(env::PettingZooEnv, actions::NamedTuple)
@assert length(actions) == length(env.pyenv.agents)
for player players(env)
RLBase.act!(env, actions[player])
end
end

function RLBase.act!(env::PettingzooEnv, action::Vector)
# for vectors, pettingzoo need them to be in proper numpy type
function RLBase.act!(env::PettingZooEnv, action::Vector)
pycall(env.pyenv.step, PyObject, np.array(action; dtype=np.float32))
end

function RLBase.act!(env::PettingzooEnv, action::Integer)
env.ts += 1
function RLBase.act!(env::PettingZooEnv, action)
pycall(env.pyenv.step, PyObject, action)
end

# reward of player ======================================================================================================================
function RLBase.reward(env::PettingzooEnv, player::String)
env.pyenv.rewards[player]
function RLBase.reward(env::PettingZooEnv, player::Symbol)
env.pyenv.rewards[String(player)]
end


# Multi agent part =========================================================================================================================================


RLBase.players(env::PettingzooEnv) = env.pyenv.agents
RLBase.players(env::PettingZooEnv) = Symbol.(env.pyenv.agents)

function RLBase.current_player(env::PettingZooEnv)
return Symbol(env.pyenv.agents[env.current_player])
end

function RLBase.current_player(env::PettingzooEnv, post_action=false)
cur_id = env.ts % length(env.pyenv.agents) == 0 ? length(env.pyenv.agents) : env.ts % length(env.pyenv.agents)
cur_id = post_action ? (cur_id - 1 == 0 ? length(env.pyenv.agents) : cur_id - 1) : cur_id
return env.pyenv.agents[cur_id]
function RLBase.next_player!(env::PettingZooEnv)
env.current_player = env.current_player < length(env.pyenv.agents) ? env.current_player + 1 : 1
end

function RLBase.NumAgentStyle(env::PettingzooEnv)
function RLBase.NumAgentStyle(env::PettingZooEnv)
n = length(env.pyenv.agents)
if n == 1
SingleAgent()
Expand All @@ -146,9 +146,8 @@ function RLBase.NumAgentStyle(env::PettingzooEnv)
end
end


RLBase.DynamicStyle(::PettingzooEnv) = SEQUENTIAL
RLBase.ActionStyle(::PettingzooEnv) = MINIMAL_ACTION_SET
RLBase.InformationStyle(::PettingzooEnv) = IMPERFECT_INFORMATION
RLBase.ChanceStyle(::PettingzooEnv) = EXPLICIT_STOCHASTIC
RLBase.DynamicStyle(::PettingZooEnv) = SIMULTANEOUS
RLBase.ActionStyle(::PettingZooEnv) = MINIMAL_ACTION_SET
RLBase.InformationStyle(::PettingZooEnv) = IMPERFECT_INFORMATION
RLBase.ChanceStyle(::PettingZooEnv) = EXPLICIT_STOCHASTIC

Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
mutable struct PettingzooEnv{Ta,To,P} <: AbstractEnv
pyenv::P
observation_space::To
action_space::Ta
state::P
seed::Union{Int, Nothing}
ts::Int
# Parametrization:
# Ta : Type of action_space
# To : Type of observation_space
# P : Type of environment most common: PyObject

mutable struct PettingZooEnv{Ta,To,P} <: AbstractEnv
pyenv::P
observation_space::To
action_space::Ta
state::P
seed::Union{Int, Nothing}
current_player::Int
end

export PettingzooEnv

struct GymEnv{T,Ta,To,P} <: AbstractEnv
Expand Down

0 comments on commit a7eedd7

Please sign in to comment.