-
Notifications
You must be signed in to change notification settings - Fork 0
/
minimalConfig.yaml
122 lines (78 loc) · 4.98 KB
/
minimalConfig.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
training:
# Training batch size, if applicable.
train_batch_size: 30000
# Arguments passed into the policy model. See models/catalog.py for a full list of the available model options.
# TODO: Provide ModelConfig objects instead of dicts.
model:
fcnet_hiddens: [256, 256]
fcnet_activation: swish
# sgd_minibatch_size – Total SGD batch size across all devices for SGD. This defines the minibatch size within each epoch.
sgd_minibatch_size: 5000
# num_sgd_iter – Number of SGD iterations in each outer loop (i.e., number of epochs to execute per train batch).
num_sgd_iter: 6
environment:
# Algorithm's environment specifier.
env: cassie-v0
# If True, RLlib will learn entirely inside a normalized action space (0.0 centered with small stddev; only affecting Box components).
# We will unsquash actions (and clip, just in case) to the bounds of the env’s action space before sending actions back to the env.
normalize_actions: false
# If True, RLlib will clip actions according to the env’s bounds before sending them back to the env.
# TODO: (sven) This option should be deprecated and always be False.
clip_actions: true
framework:
# TensorFlow (static-graph); tf2: TensorFlow 2.x (eager or traced, if eager_tracing=True); torch: PyTorch
framework: tf2
# Enable tracing in eager mode. This greatly improves performance (speedup ~2x),
# but makes it slightly harder to debug since Python code won’t be evaluated after the initial eager pass.
# Only possible if framework=tf2.
eager_tracing: false
rollouts:
# How to build per-Sampler (RolloutWorker) batches, which are then usually concat’d to form the train batch.
# Note that “steps” below can mean different things (either env- or agent-steps) and depends on the count_steps_by setting,
# adjustable via AlgorithmConfig.multi_agent(count_steps_by=..):
# 1) “truncate_episodes”: Each call to sample() will return a batch of at most rollout_fragment_length * num_envs_per_worker in size.
# The batch will be exactly rollout_fragment_length * num_envs in size if postprocessing does not change batch sizes.
# Episodes may be truncated in order to meet this size requirement.
# This mode guarantees evenly sized batches, but increases variance as the future return must now be estimated at truncation boundaries.
# 2) “complete_episodes”: Each call to sample() will return a batch of at least rollout_fragment_length * num_envs_per_worker in size.
# Episodes will not be truncated, but multiple episodes may be packed within one batch to meet the (minimum) batch size.
# Note that when num_envs_per_worker > 1, episode steps will be buffered until the episode completes,
# and hence batches may contain significant amounts of off-policy data.
batch_mode: truncate_episodes
# Element-wise observation filter, either “NoFilter” or “MeanStdFilter”.
observation_filter: NoFilter
evaluation:
# Evaluate with every evaluation_interval training iterations.
# The evaluation stats will be reported under the "evaluation" metric key.
# Note that for Ape-X metrics are already only reported for the lowest epsilon workers (least random workers).
# Set to None (or 0) for no evaluation.
evaluation_interval: 2
# Duration for which to run evaluation each evaluation_interval.
# The unit for the duration can be set via evaluation_duration_unit to either "episodes" (default) or "timesteps".
# If using multiple evaluation workers (evaluation_num_workers > 1), the load to run will be split amongst these.
# If the value is "auto":
# - For evaluation_parallel_to_training=True: Will run as many episodes/timesteps that fit into the (parallel) training step.
# - For evaluation_parallel_to_training=False: Error.
evaluation_duration: 10
checkpointing:
# Whether to include (tf or torch) native model files in the individual Policy or Algorithm checkpoints.
# These files can be used to restore the NN models without requiring RLlib.
# These files are generated using the tf- or torch- built-in saving utility methods on the actual models.
export_native_model_files: true
debugging:
# This argument, in conjunction with worker_index, sets the random seed of each worker, so that identically configured trials will have identical results. This makes experiments reproducible.
seed: 1234
callbacks:
# Callbacks class, whose methods will be run during various phases of training and environment sample collection.
# See the DefaultCallbacks class and examples/custom_metrics_and_callbacks.py for more usage information.
callbacks_class: null
resources:
# Number of GPUs to allocate to the algorithm process.
# Note that not all algorithms can take advantage of GPUs.
# Support for multi-GPU is currently only available for tf-[PPO/IMPALA/DQN/PG].
# This can be fractional (e.g., 0.3 GPUs).
num_gpus: 1
# Number of CPUs to allocate per worker.
num_cpus_per_worker: 1
# Number of GPUs to allocate per worker.
num_gpus_per_worker: 0.05