diff --git a/Project.toml b/Project.toml index 9db6a36..461dfa6 100644 --- a/Project.toml +++ b/Project.toml @@ -3,11 +3,11 @@ uuid = "d842c3ba-07a1-494f-bbec-f5741b0a3e98" authors = ["Zachary Sunberg and contributors"] version = "0.1.0" -[deps] -MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +# [deps] +# MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" [compat] -MacroTools = "0.5" +# MacroTools = "0.5" julia = "1.4" [extras] diff --git a/README.md b/README.md index 1721cdc..beee0c4 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,10 @@ [![Coverage](https://codecov.io/gh/JuliaReinforcementLearning/CommonRLInterface.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaReinforcementLearning/CommonRLInterface.jl) This package is designed for two reasons: -1. to provide compatibility between different reinforcement learning (RL) environment interfaces - for example, an algorithm that uses `YourRLInterface` should be able to use an environment from `MyRLInterface` *without* depending on `MyRLInterface` as long as they both support `CommonEnv`. +1. to provide compatibility between different reinforcement learning (RL) environment interfaces - for example, an algorithm that uses `YourRLInterface` should be able to use an environment from `MyRLInterface` *without* depending on `MyRLInterface` as long as they both support `CommonRLInterface`. 2. to provide a very basic interface for users to write their own RL environments and algorithms. -To accomplish this, there is a single abstract environment type, `CommonEnv`, a small required interface, and a larger optional interface. +To accomplish this, there is a single abstract environment type, `AbstractEnv`, a small required interface, and a larger optional interface will be added soon. ## Required Interface @@ -22,51 +22,34 @@ actions(env) # returns the set of all possible actions for the environment ## Optional Interface -A number of other functions are available to provide additional functionality. - -For example, if an algorithm needs to create an independent copy of the environment at the current state, it can use `clone(env)`. The algorithm can check if `clone(env)` is available for an environment with -```julia -provided(clone, env) -``` -and give an appropriate error message if it is not. - -A particular `CommonEnv` can opt-in to providing the `clone` function with the `@provide` macro, for example -```julia -@provide CommonRLInterface.clone(env::MyCommonEnv) = deepcopy(env) -``` +In the near future, a number of optional interface functions will be added. Please file an issue if you would like to see a particular interface function. ## Additional info -(This will eventually go in the Documenter.jl-generated docs) - -### What does it mean for an RL Framework to "support" CommonEnv? +### What does it mean for an RL Framework to "support" CommonRLInterface? -Suppose you have an abstract environment type in your package called `YourEnv`. Support for CommonEnv means: +Suppose you have an abstract environment type in your package called `YourEnv`. Support for `AbstractEnv` means: -1. You provide a constructor method +1. You provide a convert methods ```julia - YourEnv(env::CommonEnv) # might require extra args and keyword args in some cases + convert(Type{YourEnv}, ::AbstractEnv) + convert(Type{AbstractEnv}, ::YourEnv) ``` + If there are additional options in the conversion, you are encouraged to create and document constructors with additional arguments. -2. You provide an implementation of the interface functions in `YourEnv` only using functions from CommonRLInterface - -3. You provide `CommonEnv` constructor method - ```julia - CommonEnv(env::YourEnv) # might require extra args and keyword args - ``` - which returns a `YourCommonEnv <: CommonEnv` +2. You provide an implementation of the interface functions from your framework only using functions from CommonRLInterface 4. You implement at minimum - `CommonRL.reset!(::YourCommonEnv)` - `CommonRL.step!(::YourCommonEnv, a)` - `CommonRL.actions(::YourCommonEnv)` - and as many optional functions as you'd like to support. + and as many optional functions as you'd like to support, where `YourCommonEnv` is the concrete type returned by `convert(Type{AbstractEnv}, ::YourEnv)` ### What does an environment implementation look like? A 1-D LQR problem with discrete actions might look like this: ```julia -mutable struct LQREnv <: CommonEnv +mutable struct LQREnv <: AbstractEnv s::Float64 end @@ -75,20 +58,21 @@ function CommonRLInterface.reset!(m::LQREnv) end function CommonRLInterface.step!(m::LQREnv, a) - r = -s^2 - a^2 + r = -m.s^2 - a^2 sp = m.s = m.s + a + randn() return sp, r, false, NamedTuple() end CommonRLInterface.actions(m::LQREnv) = (-1.0, 0.0, 1.0) -@provide CommonRLInterface.clone(m::LQREnv) = LQREnv(m.s) +# from version 0.2 on, you can implement optional functions like this: +# @provide CommonRLInterface.clone(m::LQREnv) = LQREnv(m.s) ``` ### What does a simulation with a random policy look like? ```julia -env = YourEnv(0.0) +env = YourEnv() done = false o = reset!(env) acts = actions(env) @@ -100,9 +84,9 @@ end @show rsum ``` -### What does it mean for an algorithm to "support" CommonEnv? +### What does it mean for an algorithm to "support" CommonRLInterface? -You should have a method of your solver or algorithm that accepts a `CommonEnv`, perhaps handling it by converting it to your framework first, e.g. +You should have a method of your solver or algorithm that accepts a `AbstractEnv`, perhaps handling it by converting it to your framework first, e.g. ``` -solve(env::CommonEnv) = solve(YourEnv(env)) +solve(env::AbstractEnv) = solve(convert(YourEnv, env)) ``` diff --git a/src/CommonRLInterface.jl b/src/CommonRLInterface.jl index a92fd9d..7ae3237 100644 --- a/src/CommonRLInterface.jl +++ b/src/CommonRLInterface.jl @@ -1,14 +1,14 @@ module CommonRLInterface -using MacroTools +# using MacroTools export - CommonEnv, + AbstractEnv, step!, reset!, actions -abstract type CommonEnv end +abstract type AbstractEnv end """ o, r, done, info = step!(env, a) @@ -35,6 +35,8 @@ TODO: document concept of a set. """ function actions end +# Below to be enabled in v0.2 +#= export provided, @provide @@ -99,5 +101,6 @@ Create a clone of CommonEnv `env` at the current state. Two clones are assumed to be completely independent of each other - no action applied to one will affect the other. """ function clone end +=# end diff --git a/test/runtests.jl b/test/runtests.jl index 41139a8..6394e03 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,40 @@ using CommonRLInterface using Test +@testset "from README" begin + mutable struct LQREnv <: AbstractEnv + s::Float64 + end + + function CommonRLInterface.reset!(m::LQREnv) + m.s = 0.0 + end + + function CommonRLInterface.step!(m::LQREnv, a) + r = -m.s^2 - a^2 + sp = m.s = m.s + a + randn() + return sp, r, false, NamedTuple() + end + + CommonRLInterface.actions(m::LQREnv) = (-1.0, 0.0, 1.0) + + env = LQREnv(0.0) + done = false + o = reset!(env) + acts = actions(env) + rsum = 0.0 + step = 1 + while !done && step <= 10 + o, r, done, info = step!(env, rand(acts)) + r += rsum + step += 1 + end + @show rsum +end + + +# tests to be enabled in v0.2 +#= function f end # h needs to be out here for the inference to work for some reason @@ -55,3 +89,4 @@ struct MyCommonEnv <: CommonEnv end @test clone(MyCommonEnv()) == MyCommonEnv() end +=#