-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdefault.yaml
138 lines (125 loc) · 4.35 KB
/
default.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# Parameters for training
# Optuna setting
# defaults:
# - override hydra/sweeper: optuna
# - override hydra/sweeper/sampler: tpe
# hydra:
# sweeper:
# n_trials: 100
# direction: maximize
# storage: null
# study_name: RL
# n_jobs: 1
# sampler:
# _target_: optuna.samplers.TPESampler
# seed: 123
# params:
# train.dqn_params.net_arch: choice([128, 128], [256, 256], [512, 512])
# train.dqn_params.learning_rate: choice(1e-3, 1e-4, 1e-5)
# train.dqn_params.buffer_size: range(1e4, 1e5, step=1e4)
# train.dqn_params.batch_size: choice(32, 64, 128)
# train.dqn_params.train_freq: choice(2, 4, 8)
# train.dqn_params.target_update_interval: range(1e3, 1e4, step=1e3)
# train.dqn_params.learning_starts: range(1e3, 1e4, step=1e3)
hydra:
sweeper:
params:
# train.rl_algorithm: choice("DQN", "DQN-PRB")
train.seed: range(10000, 10010, step=1)
# train.navigation_scenarios: choice(
# ["config/scenario/four_pillar/random.yaml"],
# ["config/scenario/sixteen_pillar/random.yaml"],
# ["config/scenario/no_pillar/random.yaml"]
# )
train:
# common settings
log_root_dir: "log"
log_save_dir: "train"
model_dir: "model"
model_name: "best_model.zip"
env_id: "NavigationStackEnv-v0"
rl_algorithm: "DQN-PRB" # DQN, DQN-HER, DQN-PRB
rl_config: "config/rl_params/default.yaml"
navigation_scenarios: ["config/scenario/sixteen_pillar/random.yaml"]
num_processes: 20 # number of processes for evaluation
seed: 10001
num_test_episodes: 100
# random maps
is_generate_random_map: False
random_map_num: 100
map_config: "config/map_creator/train_map.yaml"
# training parameters
train_mode: "new" # new or continue
total_timesteps: 1e5
tensor_board_path: "tensorboard"
model_save_freq: 10000
tensorboard_check_freq: 100
verbose: 1 # 0: no output, 1: info, 2: debug
# DQN parameters
dqn_params:
net_arch: [128, 128] # network architecture
learning_schedule: "constant" # "linear" or "constant"
learning_rate: 1e-4
buffer_size: 1e5
learning_starts: 4e3
batch_size: 128
train_freq: 2
target_update_interval: 3e3
# HER parameters
her_params:
n_sampled_goal: 4
goal_selection_strategy: "future" # future or final
# Prioritized Replay Buffer parameters
prb_params:
prioritized_error: "q_error" # q_error or td_error
alpha: 0.6
initial_beta: 0.4
beta_increment: 0.00001
navigation:
common:
max_episode_steps: 500
global_planner_calculation_interval: 10
global_planner: "Dijkstra" # "DijkstraPlanner" or "RRT" or "RRT-star" or "PRM"
local_planner: "DWA" # "DWA" or "MPC"
control_interval_time: 0.1 # [s]
robot_radius: 1.0 # [m]
inflation_radius: 0.1 # [m] inflation radius with robot radius for global and local planner
baseline_params:
time_triggered_replan_threshold: 10 # [-] must >= global_planner_calculation_interval [step]
stuck_replan_time_threshold: 3 # [s] if robot is stuck over this time, replan
distance_replan_threshold: 1.0 # [m] Replan every the robot move this distance
patience_dist_threshold: 3.0 # [m] if the robot is close to the goal, replan based on stuck time
patience_time_threshold: 5 # [s] if the robot is close to the goal, replan when stuck time is over this time
handmade_detection:
# Stuck
stuck_check_interval: 1.0 # [s]
stuck_radius: 0.5 # [m]
# oscillation
oscillation_check_interval: 1.0 # [s]
oscillation_detect_time: 4.0 # [s] if robot continue to oscillate over time, it seems oscillate
oscillation_goal_dist_threshold: 0.5 # [m] if the robot is close to the goal, not detect oscillation
flipping_angle_threshold: 0.02 # [rad/s]
flipping_count_num: 10 # number of flipping counter deque
flipping_num_threshold: 3 # if the robot flips over this number, it seems oscillate
dwa_config:
# DWA parameters
vel_x_limit:
max: 2.0 # [m/s]
min: 0.0 # [m/s]
vel_y_limit:
max: 0.1 # [m/s]
min: -0.1 # [m/s]
vel_theta_limit:
max: 2.0 # [rad/s]
min: -2.0 # [rad/s]
vel_x_samples: 5
vel_y_samples: 3
vel_theta_samples: 8
# weights
path_distance_bias: 1.0
path_angle_bias: 0.0
sub_goal_distance_bias: 1.0
sub_goal_angle_bias: 0.0001
dijkstra_config:
# Dijkstra parameters
connectedness: 8 # 4, 8, 16, or 32