Skip to content

Commit

Permalink
debugging partial_to_dict usage
Browse files Browse the repository at this point in the history
  • Loading branch information
yannbouteiller committed Jan 25, 2022
1 parent 7237773 commit ab5c069
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 21 deletions.
21 changes: 12 additions & 9 deletions readme/tuto_library.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ The full script for this tutorial is available [here](https://github.com/trackma
## Tools

### partial() method
We use this method a lot in `tmrl`, it enables partially instantiating a class.
We use this method a lot in `tmrl`, it enables partially initializing the kwargs of a class.
Import this method in your script:

```python
Expand All @@ -63,7 +63,7 @@ my_partially_instantiated_class = partial(my_class,
And the partially instantiated class can then be fully instantiated as:

```python
my_object = my_partially_instantiated_class(missing_args_and_kwargs)
my_object = my_partially_instantiated_class(missing_kwargs)
```

### Constants
Expand Down Expand Up @@ -235,8 +235,8 @@ import tmrl.config.config_constants as cfg # constants from the config.json fil
class RolloutWorker:
def __init__(
self,
env_cls, # class of the Gym environment
actor_module_cls, # class of a module containing the policy
env_cls=None, # class of the Gym environment
actor_module_cls=None, # class of a module containing the policy
sample_compressor: callable = None, # compressor for sending samples over the Internet
device="cpu", # device on which the policy is running
server_ip=None, # ip of the central server
Expand Down Expand Up @@ -753,8 +753,8 @@ Thus, we will use the action buffer length as an additional argument to our cust

```python
def __init__(self,
device,
nb_steps,
device=None,
nb_steps=None,
obs_preprocessor: callable = None,
sample_preprocessor: callable = None,
memory_size=1000000,
Expand Down Expand Up @@ -1010,9 +1010,9 @@ class MyTrainingAgent(TrainingAgent):
model_nograd = cached_property(lambda self: no_grad(copy_shared(self.model)))

def __init__(self,
observation_space,
action_space,
device,
observation_space=None,
action_space=None,
device=None,
model_cls=MyActorCriticModule, # an actor-critic module, encapsulating our ActorModule
gamma=0.99, # discount factor
polyak=0.995, # exponential averaging factor for the target critic
Expand Down Expand Up @@ -1244,6 +1244,9 @@ my_trainer.run_with_wandb(entity=my_wandb_entity,
key=my_wandb_key)
```

_(**WARNING**: when using `run_with_wandb`, make sure all the partially instantiated classes that are part of the `Trainer` have kwargs only, no args, otherwise you will get an error complaining about invalid keywords.
When it does not make sense to have default values, just set the default values to `None` as done in, e.g., `MyMemoryDataloading`)_

But as for the `RolloutWorker`, this would block the code here until all `epochs` are complete, which in itself would require the `RolloutWorker` to also be running.

In fact, the `RolloutWorker`, `Trainer` and `Server` are best run in separate terminals (see TrackMania) because currently they are all quite verbose.
Expand Down
12 changes: 6 additions & 6 deletions tmrl/custom/custom_memories.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def replace_hist_before_done(hist, done_idx_in_hist):

class MemoryTMNF(MemoryDataloading):
def __init__(self,
memory_size,
batch_size,
memory_size=None,
batch_size=None,
dataset_path="",
imgs_obs=4,
act_buf_len=1,
Expand Down Expand Up @@ -219,8 +219,8 @@ def append_buffer(self, buffer):

class TrajMemoryTMNF(TrajMemoryDataloading):
def __init__(self,
memory_size,
batch_size,
memory_size=None,
batch_size=None,
dataset_path="",
imgs_obs=4,
act_buf_len=1,
Expand Down Expand Up @@ -358,8 +358,8 @@ def append_buffer(self, buffer):

class MemoryTM2020(MemoryDataloading): # TODO: reset transitions
def __init__(self,
memory_size,
batch_size,
memory_size=None,
batch_size=None,
dataset_path="",
imgs_obs=4,
act_buf_len=1,
Expand Down
12 changes: 6 additions & 6 deletions tmrl/tuto/tuto.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,9 +304,9 @@ def my_observation_preprocessor(obs):

class MyMemoryDataloading(MemoryDataloading):
def __init__(self,
act_buf_len,
device,
nb_steps,
act_buf_len=None,
device=None,
nb_steps=None,
obs_preprocessor: callable = None,
sample_preprocessor: callable = None,
memory_size=1000000,
Expand Down Expand Up @@ -457,9 +457,9 @@ class MyTrainingAgent(TrainingAgent):
model_nograd = cached_property(lambda self: no_grad(copy_shared(self.model)))

def __init__(self,
observation_space,
action_space,
device,
observation_space=None,
action_space=None,
device=None,
model_cls=MyActorCriticModule, # an actor-critic module, encapsulating our ActorModule
gamma=0.99, # discount factor
polyak=0.995, # exponential averaging factor for the target critic
Expand Down

0 comments on commit ab5c069

Please sign in to comment.