Skip to content

Commit

Permalink
Version 0.2.1 (#65)
Browse files Browse the repository at this point in the history
* feat(project): setup dev for v0.2.1

* feat(agents): DefaultWriter is now initialized in base class. New method for generating unique IDs for objects (metadata_utils.get_unique_id()). Agent (base class) now has a property unique_id.

* feat(network): message about how many bytes are being sent and received

* feat(agent,manager,writers): DefaultWriter now wraps (optionally) a tensorboard SummaryWriter. AgentManager has a new option to enable tensorboard logging (enable_tensorboard) and handles automatically the log_dirs of each SummaryWriter (inside DefaultWriter).

* fix(dqn): tests

* fix(agent): set_writer() fixed

* minor improvements

* feat(network): Tensorboard files now can be sent from server to client (using zip files)

* fix(network): bug

* fix(manager): Fix bug in AgentHandler that was not sending eval_env in the kwargs to initialize/load the agent instance.

* feat(writer, manager): Option to limit data stored in memory by DefaultWriter, AgentManager can now receive extra params for DefaultWriter.

* fix(torch dqn): set_writer bug

* feat(wrapper, gym_make): Add option to wrap observation_space and action_space using rlberry.spaces. Seeding from gym (in Dict spaces)  might be too slow in some computers, as I've been told, and rlberry.spaces should solve seeding issues.

* feat(process_env, Agent): Replaced assertion by warning if Agent is not reseeded.

* feat(agent, manager): Agent now accepts an output dir as argument. This directory is set automatically by AgentManager

* fix(manager): minor bug

* fix(agent): out dir bug

* feat(manager): Now it is possible to pass init_kwargs to each instance separately.

* feat(gridworld): remove default terminal_states

* fix(gridworld): fix get_layout_img method

* feat(manager): method get_agent_instances() that returns trained instances

* feat(manager/evaluation): In plot_writer_data, handling better the kwargs for sns.lineplot

* feat(plot_writer_data): Possibility to set xtag (tag used for x-axis)

* update version
  • Loading branch information
omardrwch committed Nov 19, 2021
1 parent f697260 commit 1701a48
Show file tree
Hide file tree
Showing 46 changed files with 731 additions and 241 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ cov_html/*
dev/*
results/*
temp/*
client_data/*
remote_data/*
rlberry_data/*

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ authors:

title: "rlberry - A Reinforcement Learning Library for Research and Education"
abbreviation: rlberry
version: 0.2
doi: 10.5281/zenodo.5544540
version: 0.2.1
doi: 10.5281/zenodo.5223307
date-released: 2021-10-01
url: "https://github.com/rlberry-py/rlberry"
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
<a href="https://codecov.io/gh/rlberry-py/rlberry">
<img alt="codecov" src="https://codecov.io/gh/rlberry-py/rlberry/branch/main/graph/badge.svg?token=TIFP7RUD75">
</a>
<a href="https://img.shields.io/pypi/pyversions/rlberry">
<!-- <a href="https://img.shields.io/pypi/pyversions/rlberry">
<img alt="PyPI - Python Version" src="https://img.shields.io/pypi/pyversions/rlberry">
</a>
</a> -->
</p>

<p align="center">
Expand Down
2 changes: 1 addition & 1 deletion examples/demo_adaptiveql.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
preprocess_func=np.cumsum, title='Cumulative Rewards')

for stats in multimanagers.managers:
agent = stats.agent_handlers[0]
agent = stats.get_agent_instances()[0]
try:
agent.Qtree.plot(0, 25)
except AttributeError:
Expand Down
24 changes: 20 additions & 4 deletions examples/demo_agent_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,21 @@
init_kwargs=params,
eval_kwargs=eval_kwargs,
n_fit=4,
seed=123)
seed=123,
enable_tensorboard=True,
default_writer_kwargs=dict(
maxlen=N_EPISODES - 10,
log_interval=5.0,
))
rskernel_stats = AgentManager(
RSKernelUCBVIAgent,
train_env,
fit_budget=N_EPISODES,
init_kwargs=params_kernel,
eval_kwargs=eval_kwargs,
n_fit=4,
seed=123)
seed=123,
enable_tensorboard=True)
a2c_stats = AgentManager(
A2CAgent,
train_env,
Expand All @@ -81,16 +87,26 @@
for st in agent_manager_list:
st.fit()

# Fit RSUCBVI for 50 more episodes
rsucbvi_stats.fit(budget=50)

# learning curves
plot_writer_data(agent_manager_list,
tag='episode_rewards',
preprocess_func=np.cumsum,
title='cumulative rewards',
show=False)

plot_writer_data(agent_manager_list,
tag='episode_rewards',
title='episode rewards',
show=False)

# compare final policies
output = evaluate_agents(agent_manager_list)

print(output)

for st in agent_manager_list:
st.clear_output_dir()
# uncomment to delete output directories
# for st in agent_manager_list:
# st.clear_output_dir()
4 changes: 2 additions & 2 deletions examples/demo_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
agent.set_writer(SummaryWriter())

print(f"Running DQN on {env}")
print(f"Visualize with tensorboard by \
running:\n$tensorboard --logdir {Path(agent.writer.log_dir).parent}")
print("Visualize with tensorboard by "
f"running:\n$tensorboard --logdir {Path(agent.writer.log_dir).parent}")

agent.fit(budget=50)

Expand Down
5 changes: 5 additions & 0 deletions examples/demo_experiment/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,9 @@
del multimanagers

data = load_experiment_results('results', 'params_experiment')

print(data)

# Fit one of the managers for a few more episodes
# If tensorboard is enabled, you should see more episodes ran for 'rsucbvi_alternative'
data['manager']['rsucbvi_alternative'].fit(50)
40 changes: 20 additions & 20 deletions examples/demo_hyperparam_optim.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from rlberry.envs.benchmarks.ball_exploration import PBall2D
from rlberry.agents.torch.ppo import PPOAgent
from rlberry.agents.torch import REINFORCEAgent
from rlberry.manager import AgentManager

if __name__ == '__main__':
Expand All @@ -11,54 +11,54 @@
# -----------------------------
# Parameters
# -----------------------------
N_EPISODES = 100
N_EPISODES = 10
GAMMA = 0.99
HORIZON = 50
BONUS_SCALE_FACTOR = 0.1
MIN_DIST = 0.1

params_ppo = {"gamma": GAMMA,
"horizon": HORIZON,
"learning_rate": 0.0003}
params = {"gamma": GAMMA,
"horizon": HORIZON,
"learning_rate": 0.0003}

eval_kwargs = dict(eval_horizon=HORIZON, n_simulations=20)

# -------------------------------
# Run AgentManager and save results
# --------------------------------
ppo_stats = AgentManager(
PPOAgent, train_env, fit_budget=N_EPISODES,
init_kwargs=params_ppo,
manager = AgentManager(
REINFORCEAgent, train_env, fit_budget=N_EPISODES,
init_kwargs=params,
eval_kwargs=eval_kwargs,
n_fit=4,
output_dir='dev/')
n_fit=4)

# hyperparam optim with multiple threads
ppo_stats.optimize_hyperparams(
manager.optimize_hyperparams(
n_trials=5, timeout=None,
n_fit=2,
sampler_method='optuna_default',
optuna_parallelization='thread')

initial_n_trials = len(ppo_stats.optuna_study.trials)
initial_n_trials = len(manager.optuna_study.trials)

# save
ppo_stats_fname = ppo_stats.save()
del ppo_stats
manager_fname = manager.save()
del manager

# load
ppo_stats = AgentManager.load(ppo_stats_fname)
manager = AgentManager.load(manager_fname)

# continue previous optimization, now with 120s of timeout and multiprocessing
ppo_stats.optimize_hyperparams(
manager.optimize_hyperparams(
n_trials=512, timeout=120,
n_fit=2,
n_fit=8,
continue_previous=True,
optuna_parallelization='process')
optuna_parallelization='process',
n_optuna_workers=4)

print("number of initial trials = ", initial_n_trials)
print("number of trials after continuing= ", len(ppo_stats.optuna_study.trials))
print("number of trials after continuing= ", len(manager.optuna_study.trials))

print("----")
print("fitting agents after choosing hyperparams...")
ppo_stats.fit() # fit the 4 agents
manager.fit() # fit the 4 agents
19 changes: 12 additions & 7 deletions examples/demo_network/run_remote_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
port = int(input("Select server port: "))
client = BerryClient(port=port)

FIT_BUDGET = 1000
FIT_BUDGET = 500

local_manager = AgentManager(
agent_class=REINFORCEAgent,
Expand All @@ -35,10 +35,11 @@
fit_budget=FIT_BUDGET,
init_kwargs=dict(gamma=0.99),
eval_kwargs=dict(eval_horizon=200, n_simulations=20),
n_fit=2,
n_fit=3,
seed=10,
agent_name='REINFORCE(remote)',
parallelization='process'
parallelization='process',
enable_tensorboard=True,
)

remote_manager.set_writer(
Expand All @@ -48,7 +49,7 @@
)

# Optimize hyperparams of remote agent
best_params = remote_manager.optimize_hyperparams(timeout=120, optuna_parallelization='process')
best_params = remote_manager.optimize_hyperparams(timeout=60, optuna_parallelization='process')
print(f'best params = {best_params}')

# Test save/load
Expand All @@ -62,13 +63,17 @@
mmanagers.append(remote_manager)
mmanagers.run()

# Fit remotely for a few more episodes
remote_manager.fit(budget=100)

# plot
plot_writer_data(mmanagers.managers, tag='episode_rewards', show=False)
evaluate_agents(mmanagers.managers, n_simulations=10, show=True)

# Test some methods
print([manager.eval_agents() for manager in mmanagers.managers])

for manager in mmanagers.managers:
manager.clear_handlers()
manager.clear_output_dir()
# # uncomment to clear output files
# for manager in mmanagers.managers:
# manager.clear_handlers()
# manager.clear_output_dir()
4 changes: 0 additions & 4 deletions rlberry/agents/adaptiveql/adaptiveql.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import gym.spaces as spaces
import numpy as np
from rlberry.agents import AgentWithSimplePolicy
from rlberry.utils.writers import DefaultWriter
from rlberry.agents.adaptiveql.tree import MDPTreePartition

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -85,9 +84,6 @@ def reset(self):
# info
self.episode = 0

# default writer
self.writer = DefaultWriter(self.name, metadata=self._metadata)

def policy(self, observation):
action, _ = self.Qtree.get_argmax_and_node(observation, 0)
return action
Expand Down
53 changes: 45 additions & 8 deletions rlberry/agents/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
import numpy as np
from inspect import signature
from pathlib import Path
from rlberry import metadata_utils
from rlberry import types
from rlberry.seeding.seeder import Seeder
from rlberry.seeding import safe_reseed
from rlberry.envs.utils import process_env
from typing import Any, Optional, Mapping
from rlberry.utils.writers import DefaultWriter
from typing import Optional


logger = logging.getLogger(__name__)

Expand All @@ -27,8 +30,10 @@ class Agent(ABC):
If true, makes a deep copy of the environment.
seeder : rlberry.seeding.Seeder, int, or None
Object for random number generation.
_metadata : dict
Extra information (e.g. about which is the process id where the agent is running).
_execution_metadata : ExecutionMetadata (optional)
Extra information about agent execution (e.g. about which is the process id where the agent is running).
_default_writer_kwargs : dict (optional)
Parameters to initialize DefaultWriter (attribute self.writer).
.. note::
Classes that implement this interface should send ``**kwargs`` to :code:`Agent.__init__()`
Expand All @@ -45,6 +50,11 @@ class Agent(ABC):
Writer object (e.g. tensorboard SummaryWriter).
seeder : rlberry.seeding.Seeder, int, or None
Object for random number generation.
output_dir : str or Path
Directory that the agent can use to store data.
unique_id : str
Unique identifier for the agent instance. Can be used, for example,
to create files/directories for the agent to log data safely.
"""

name = ""
Expand All @@ -54,22 +64,47 @@ def __init__(self,
eval_env: Optional[types.Env] = None,
copy_env: bool = True,
seeder: Optional[types.Seed] = None,
_metadata: Optional[Mapping[str, Any]] = None,
output_dir: Optional[str] = None,
_execution_metadata: Optional[metadata_utils.ExecutionMetadata] = None,
_default_writer_kwargs: Optional[dict] = None,
**kwargs):
# Check if wrong parameters have been sent to an agent.
assert kwargs == {}, \
'Unknown parameters sent to agent:' + str(kwargs.keys())

self.seeder = Seeder(seeder)
self.env = process_env(env, self.seeder, copy_env=copy_env)
self.writer = None

# evaluation environment
eval_env = eval_env or env
self.eval_env = process_env(eval_env, self.seeder, copy_env=True)

# metadata
self._metadata = _metadata or dict()
self._execution_metadata = _execution_metadata or metadata_utils.ExecutionMetadata()
self._unique_id = metadata_utils.get_unique_id(self)
if self.name:
self._unique_id = self.name + '_' + self._unique_id

# create writer
_default_writer_kwargs = _default_writer_kwargs or dict(
name=self.name, execution_metadata=self._execution_metadata)
self._writer = DefaultWriter(**_default_writer_kwargs)

# output directory for the agent instance
self._output_dir = output_dir or f"output_{self._unique_id}"
self._output_dir = Path(self._output_dir)

@property
def writer(self):
return self._writer

@property
def unique_id(self):
return self._unique_id

@property
def output_dir(self):
return self._output_dir

@abstractmethod
def fit(self, budget: int, **kwargs):
Expand All @@ -87,6 +122,8 @@ def fit(self, budget: int, **kwargs):
optimization (by allowing early stopping), but it is not strictly required
elsewhere in the library.
If the agent does not require a budget, set it to -1.
Parameters
----------
budget: int
Expand All @@ -110,9 +147,9 @@ def eval(self, **kwargs):
pass

def set_writer(self, writer):
self.writer = writer
self._writer = writer

if self.writer:
if self._writer:
init_args = signature(self.__init__).parameters
kwargs = [f"| {key} | {getattr(self, key, None)} |" for key in init_args]
writer.add_text(
Expand Down
Loading

0 comments on commit 1701a48

Please sign in to comment.