config/train/default.yaml

# Parameters for training

# Optuna setting
# defaults:
#   - override hydra/sweeper: optuna
#   - override hydra/sweeper/sampler: tpe
# hydra:
#   sweeper:
#     n_trials: 100
#     direction: maximize
#     storage: null
#     study_name: RL
#     n_jobs: 1
#     sampler:
#       _target_: optuna.samplers.TPESampler
#       seed: 123
#     params:
#       train.dqn_params.net_arch: choice([128, 128], [256, 256], [512, 512])
#       train.dqn_params.learning_rate: choice(1e-3, 1e-4, 1e-5)
#       train.dqn_params.buffer_size: range(1e4, 1e5, step=1e4)
#       train.dqn_params.batch_size: choice(32, 64, 128)
#       train.dqn_params.train_freq: choice(2, 4, 8)
#       train.dqn_params.target_update_interval: range(1e3, 1e4, step=1e3)
#       train.dqn_params.learning_starts: range(1e3, 1e4, step=1e3)

hydra:
  sweeper:
    params:
      # train.rl_algorithm: choice("DQN", "DQN-PRB")
      train.seed: range(10000, 10010, step=1)
      # train.navigation_scenarios: choice(
      #   ["config/scenario/four_pillar/random.yaml"],
      #   ["config/scenario/sixteen_pillar/random.yaml"],
      #   ["config/scenario/no_pillar/random.yaml"]
      #   )

train:
  # common settings
  log_root_dir: "log"
  log_save_dir: "train"
  model_dir: "model"
  model_name: "best_model.zip"
  env_id: "NavigationStackEnv-v0"
  rl_algorithm: "DQN-PRB" # DQN, DQN-HER, DQN-PRB
  rl_config: "config/rl_params/default.yaml"
  navigation_scenarios: ["config/scenario/sixteen_pillar/random.yaml"]
  num_processes: 20 # number of processes for evaluation
  seed: 10001
  num_test_episodes: 100

  # random maps
  is_generate_random_map: False
  random_map_num: 100
  map_config: "config/map_creator/train_map.yaml"

  # training parameters
  train_mode: "new" # new or continue
  total_timesteps: 1e5
  tensor_board_path: "tensorboard"
  model_save_freq: 10000
  tensorboard_check_freq: 100
  verbose: 1 # 0: no output, 1: info, 2: debug

  # DQN parameters
  dqn_params:
    net_arch: [128, 128] # network architecture
    learning_schedule: "constant" # "linear" or "constant"
    learning_rate: 1e-4
    buffer_size: 1e5
    learning_starts: 4e3
    batch_size: 128
    train_freq: 2
    target_update_interval: 3e3

  # HER parameters
  her_params:
    n_sampled_goal: 4
    goal_selection_strategy: "future" # future or final

  # Prioritized Replay Buffer parameters
  prb_params:
    prioritized_error: "q_error" # q_error or td_error
    alpha: 0.6
    initial_beta: 0.4
    beta_increment: 0.00001

navigation:
  common:
    max_episode_steps: 500
    global_planner_calculation_interval: 10
    global_planner: "Dijkstra" # "DijkstraPlanner" or "RRT" or "RRT-star" or "PRM"
    local_planner: "DWA" # "DWA" or "MPC"
    control_interval_time: 0.1 # [s]
    robot_radius: 1.0 # [m]
    inflation_radius: 0.1 # [m] inflation radius with robot radius for global and local planner

  baseline_params:
    time_triggered_replan_threshold: 10 # [-] must >= global_planner_calculation_interval [step]
    stuck_replan_time_threshold: 3 # [s] if robot is stuck over this time, replan
    distance_replan_threshold: 1.0 # [m] Replan every the robot move this distance
    patience_dist_threshold: 3.0 # [m] if the robot is close to the goal, replan based on stuck time
    patience_time_threshold: 5 # [s] if the robot is close to the goal, replan when stuck time is over this time

  handmade_detection:
    # Stuck
    stuck_check_interval: 1.0 # [s]
    stuck_radius: 0.5 # [m]
    # oscillation
    oscillation_check_interval: 1.0 # [s]
    oscillation_detect_time: 4.0 # [s] if robot continue to oscillate over time, it seems oscillate
    oscillation_goal_dist_threshold: 0.5 # [m] if the robot is close to the goal, not detect oscillation
    flipping_angle_threshold: 0.02 # [rad/s]
    flipping_count_num: 10 # number of flipping counter deque
    flipping_num_threshold: 3 # if the robot flips over this number, it seems oscillate

  dwa_config:
    # DWA parameters
    vel_x_limit:
      max: 2.0 # [m/s]
      min: 0.0 # [m/s]
    vel_y_limit:
      max: 0.1 # [m/s]
      min: -0.1 # [m/s]
    vel_theta_limit:
      max: 2.0 # [rad/s]
      min: -2.0 # [rad/s]
    vel_x_samples: 5
    vel_y_samples: 3
    vel_theta_samples: 8
    # weights
    path_distance_bias: 1.0
    path_angle_bias: 0.0
    sub_goal_distance_bias: 1.0
    sub_goal_angle_bias: 0.0001

  dijkstra_config:
    # Dijkstra parameters
    connectedness: 8 # 4, 8, 16, or 32