minimalConfig.yaml


training: 


  # Training batch size, if applicable.
  train_batch_size: 30000

  # Arguments passed into the policy model. See models/catalog.py for a full list of the available model options. 
  # TODO: Provide ModelConfig objects instead of dicts.
  model:
    fcnet_hiddens: [256, 256]
    fcnet_activation: swish


  # sgd_minibatch_size – Total SGD batch size across all devices for SGD. This defines the minibatch size within each epoch.
  sgd_minibatch_size: 5000

  # num_sgd_iter – Number of SGD iterations in each outer loop (i.e., number of epochs to execute per train batch).
  num_sgd_iter: 6


environment:

  # Algorithm's environment specifier.
  env: cassie-v0

  # If True, RLlib will learn entirely inside a normalized action space (0.0 centered with small stddev; only affecting Box components).
  # We will unsquash actions (and clip, just in case) to the bounds of the env’s action space before sending actions back to the env.
  normalize_actions: false

  # If True, RLlib will clip actions according to the env’s bounds before sending them back to the env.
  # TODO: (sven) This option should be deprecated and always be False.
  clip_actions: true


framework:


  # TensorFlow (static-graph); tf2: TensorFlow 2.x (eager or traced, if eager_tracing=True); torch: PyTorch
  framework: tf2

  # Enable tracing in eager mode. This greatly improves performance (speedup ~2x), 
  # but makes it slightly harder to debug since Python code won’t be evaluated after the initial eager pass. 
  # Only possible if framework=tf2.
  eager_tracing: false


rollouts: 


  # How to build per-Sampler (RolloutWorker) batches, which are then usually concat’d to form the train batch.
  # Note that “steps” below can mean different things (either env- or agent-steps) and depends on the count_steps_by setting,
  # adjustable via AlgorithmConfig.multi_agent(count_steps_by=..):
  # 1) “truncate_episodes”: Each call to sample() will return a batch of at most rollout_fragment_length * num_envs_per_worker in size.
  # The batch will be exactly rollout_fragment_length * num_envs in size if postprocessing does not change batch sizes.
  # Episodes may be truncated in order to meet this size requirement.
  # This mode guarantees evenly sized batches, but increases variance as the future return must now be estimated at truncation boundaries.
  # 2) “complete_episodes”: Each call to sample() will return a batch of at least rollout_fragment_length * num_envs_per_worker in size.
  # Episodes will not be truncated, but multiple episodes may be packed within one batch to meet the (minimum) batch size.
  # Note that when num_envs_per_worker > 1, episode steps will be buffered until the episode completes,
  # and hence batches may contain significant amounts of off-policy data.
  batch_mode: truncate_episodes


  # Element-wise observation filter, either “NoFilter” or “MeanStdFilter”.
  observation_filter: NoFilter  

  
evaluation: 
  # Evaluate with every evaluation_interval training iterations.
  # The evaluation stats will be reported under the "evaluation" metric key.
  # Note that for Ape-X metrics are already only reported for the lowest epsilon workers (least random workers).
  # Set to None (or 0) for no evaluation.
  evaluation_interval: 2

  # Duration for which to run evaluation each evaluation_interval.
  # The unit for the duration can be set via evaluation_duration_unit to either "episodes" (default) or "timesteps".
  # If using multiple evaluation workers (evaluation_num_workers > 1), the load to run will be split amongst these.
  # If the value is "auto":
  # - For evaluation_parallel_to_training=True: Will run as many episodes/timesteps that fit into the (parallel) training step.
  # - For evaluation_parallel_to_training=False: Error.
  evaluation_duration: 10

checkpointing: 
  # Whether to include (tf or torch) native model files in the individual Policy or Algorithm checkpoints.
  # These files can be used to restore the NN models without requiring RLlib.
  # These files are generated using the tf- or torch- built-in saving utility methods on the actual models.
  export_native_model_files: true


debugging: 

  
  # This argument, in conjunction with worker_index, sets the random seed of each worker, so that identically configured trials will have identical results. This makes experiments reproducible.
  seed: 1234

callbacks:
  # Callbacks class, whose methods will be run during various phases of training and environment sample collection.
  # See the DefaultCallbacks class and examples/custom_metrics_and_callbacks.py for more usage information.
  callbacks_class: null

resources: 
  # Number of GPUs to allocate to the algorithm process.
  # Note that not all algorithms can take advantage of GPUs.
  # Support for multi-GPU is currently only available for tf-[PPO/IMPALA/DQN/PG].
  # This can be fractional (e.g., 0.3 GPUs).
  num_gpus: 1

  # Number of CPUs to allocate per worker.
  num_cpus_per_worker: 1

  # Number of GPUs to allocate per worker.
  num_gpus_per_worker: 0.05