Skip to content
This repository has been archived by the owner on Aug 15, 2024. It is now read-only.

Commit

Permalink
adjust parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
yasuohayashibara committed Mar 7, 2024
1 parent 554141e commit 6a61dd7
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 28 deletions.
2 changes: 1 addition & 1 deletion controllers/mat_train/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def get_config():
parser.add_argument("--use_linear_lr_decay", action='store_true',
default=False, help='use a linear schedule on the learning rate')
# save parameters
parser.add_argument("--save_interval", type=int, default=100, help="time duration between contiunous twice models saving.")
parser.add_argument("--save_interval", type=int, default=1, help="time duration between contiunous twice models saving.")

# log parameters
parser.add_argument("--log_interval", type=int, default=1, help="time duration between contiunous twice log printing.")
Expand Down
4 changes: 2 additions & 2 deletions controllers/mat_train/mat_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def init_env():
#env = FootballEnv(env_args=env_args)

env_args = {"scenario": all_args.scenario_name,
"episode_length": all_args.episode_length}
"episode_length": 300}
env = SoccerEnv(env_args=env_args)
else:
print("Can not support the " +
Expand Down Expand Up @@ -174,5 +174,5 @@ def main(args):

if __name__ == "__main__":
#main(sys.argv[1:])
arg_list = ['--seed', '1', '--env_name', 'soccer', '--algorithm_name', 'mat_dec', '--experiment_name', 'single', '--scenario_name', 'self-play', '--num_agents', '3', '--lr', '5e-4', '--entropy_coef', '0.0', '--max_grad_norm', '0.5', '--n_training_threads', '16', '--n_rollout_threads', '1', '--num_mini_batch', '1', '--episode_length', '100', '--num_env_steps', '10000000', '--ppo_epoch', '30', '--clip_param', '0.05', '--use_value_active_masks', '--use_policy_active_masks', '--model_dir', './transformer_2440.pt']
arg_list = ['--seed', '1', '--env_name', 'soccer', '--algorithm_name', 'mat_dec', '--experiment_name', 'single', '--scenario_name', 'self-play', '--num_agents', '3', '--lr', '5e-4', '--entropy_coef', '0.01', '--max_grad_norm', '0.5', '--n_training_threads', '16', '--n_rollout_threads', '1', '--num_mini_batch', '1', '--episode_length', '5000', '--num_env_steps', '10000000', '--ppo_epoch', '10', '--clip_param', '0.05', '--use_value_active_masks', '--use_policy_active_masks', '--model_dir', './transformer_1600.pt']
main(arg_list)
2 changes: 1 addition & 1 deletion controllers/mat_train/runner/base_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def __init__(self, config):
device=self.device)

if self.model_dir is not None:
self.c_restore(self.model_dir)
self.c_restore("./transformer_0.pt")

# algorithm
self.c_trainer = TrainAlgo(self.all_args, self.c_policy, self.num_agents, device=self.device)
Expand Down
26 changes: 13 additions & 13 deletions controllers/mat_train/runner/soccer_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def run(self):
self.trainer.policy.lr_decay(episode, episodes)

for step in range(self.episode_length):
print(step, endl=" ", flush=True)
print(step, end='\r', flush=True)
# Sample actions
values, actions, action_log_probs, rnn_states, rnn_states_critic = self.collect(step)
c_values, c_actions, c_action_log_probs, c_rnn_states, c_rnn_states_critic = self.c_collect(step)
Expand Down Expand Up @@ -80,10 +80,10 @@ def run(self):
self.save(save_episode)

# copy team update network
if (episode % self.self_play_interval == 0 and episode > self.save_interval):
print("copy team update network")
self.c_restore(str(self.save_dir) + "/transformer_" + str(save_episode) + ".pt")
self.c_buffer.after_update()
#if (episode % self.self_play_interval == 0 and episode > self.save_interval):
# print("copy team update network")
# self.c_restore(str(self.save_dir) + "/transformer_" + str(save_episode) + ".pt")
#self.c_buffer.after_update()

# log information
if episode % self.log_interval == 0:
Expand All @@ -99,24 +99,24 @@ def run(self):
int(total_num_steps / (end - start))))

self.log_train(train_infos, total_num_steps)
print("rewards: {}".format(sum_episode_rewards[0]/num))

num = self.episode_length * self.log_interval
print("rewards: {}".format(sum_episode_rewards[0]/num))
self.writter.add_scalars("train_episode_rewards", {"total_rewards": sum_episode_rewards[0]/num}, total_num_steps)
sum_episode_rewards = [0 for _ in range(self.n_rollout_threads)]
self.writter.add_scalars("train_episode_rewards", {"ball_distance_reward": sum_ball_distance_reward[0]/num}, total_num_steps)
self.writter.add_scalars("train_episode_rewards", {"ball_distance_reward": sum_ball_distance_reward[0]/self.num_agents/num}, total_num_steps)
sum_ball_distance_reward = [0 for _ in range(self.n_rollout_threads)]
self.writter.add_scalars("train_episode_rewards", {"goal_reward": sum_goal_reward[0]/num}, total_num_steps)
self.writter.add_scalars("train_episode_rewards", {"goal_reward": sum_goal_reward[0]/self.num_agents/num}, total_num_steps)
sum_goal_reward = [0 for _ in range(self.n_rollout_threads)]
self.writter.add_scalars("train_episode_rewards", {"ball_vel_reward": sum_ball_vel_reward[0]/num}, total_num_steps)
self.writter.add_scalars("train_episode_rewards", {"ball_vel_reward": sum_ball_vel_reward[0]/self.num_agents/num}, total_num_steps)
sum_ball_vel_reward = [0 for _ in range(self.n_rollout_threads)]
self.writter.add_scalars("train_episode_rewards", {"out_of_field_reward": sum_out_of_field_reward[0]/num}, total_num_steps)
self.writter.add_scalars("train_episode_rewards", {"out_of_field_reward": sum_out_of_field_reward[0]/self.num_agents/num}, total_num_steps)
sum_out_of_field_reward = [0 for _ in range(self.n_rollout_threads)]
self.writter.add_scalars("train_episode_rewards", {"collision_reward": sum_collision_reward[0]/num}, total_num_steps)
self.writter.add_scalars("train_episode_rewards", {"collision_reward": sum_collision_reward[0]/self.num_agents/num}, total_num_steps)
sum_collision_reward = [0 for _ in range(self.n_rollout_threads)]
self.writter.add_scalars("train_episode_rewards", {"ball_position_reward": sum_ball_position_reward[0]/num}, total_num_steps)
self.writter.add_scalars("train_episode_rewards", {"ball_position_reward": sum_ball_position_reward[0]/self.num_agents/num}, total_num_steps)
sum_ball_position_reward = [0 for _ in range(self.n_rollout_threads)]
self.writter.add_scalars("train_episode_rewards", {"ball_tracking_reward": sum_ball_tracking_reward[0]/num}, total_num_steps)
self.writter.add_scalars("train_episode_rewards", {"ball_tracking_reward": sum_ball_tracking_reward[0]/self.num_agents/num}, total_num_steps)
sum_ball_tracking_reward = [0 for _ in range(self.n_rollout_threads)]

# eval
Expand Down
19 changes: 8 additions & 11 deletions controllers/mat_train/soccer/soccer.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ def observe(self, agent):
obs[3] = -obs[3]
obs[4] = -obs[4]
obs[5] = -obs[5]
#obs[4] = normalize_angle_rad(obs[4]+math.pi)
return obs

def state(self):
Expand All @@ -142,8 +141,6 @@ def state(self):
for i in range(len(self.agent_list)):
player.append(self.agent_list[i].pos)
state = [ball_x, ball_y, 0, player[0][0], player[0][1], player[0][2], player[1][0], player[1][1], player[1][2], player[2][0], player[2][1], player[2][2], player[3][0], player[3][1], player[3][2], player[4][0], player[4][1], player[4][2], player[5][0], player[5][1], player[5][2]]
#state = [ball_x, ball_y, 0, player[0][0], player[0][1], player[0][2], player[1][0], player[1][1], player[1][2], player[2][0], player[2][1], player[2][2], player[3][0], player[3][1], player[3][2]]
#state = [ball_x, ball_y, 0, player[0][0], player[0][1], player[0][2], player[1][0], player[1][1], player[1][2], player[2][0], player[2][1], player[2][2]]
return state

def step(self, action):
Expand All @@ -159,8 +156,6 @@ def step(self, action):
truncate = False
goal = False

#print("frames: "+str(self.frames))

i = self.agent_name_mapping[self.agent_selection]
if self.agent_list[i].is_fall:
while True:
Expand All @@ -187,19 +182,21 @@ def step(self, action):
agent.send(message)

if self._agent_selector.is_last():
# rewards
ball_distance_reward = 1.0
rew_ball_distance = dict(zip(self.agents, [0.0 for _ in self.agents]))
goal_reward = 1000.0
velocity_reward = 10.0
out_of_field_reward = -100.0
collision_reward = -1.0
ball_position_reward = 2.0
ball_tracking_reward = 0.1

rew_ball_distance = dict(zip(self.agents, [0.0 for _ in self.agents]))
rew_goal = dict(zip(self.agents, [0.0 for _ in self.agents]))
velocity_reward = 100.0
rew_ball_vel = dict(zip(self.agents, [0.0 for _ in self.agents]))
out_of_field_reward = -100.0
rew_out_of_field = dict(zip(self.agents, [0.0 for _ in self.agents]))
collision_reward = -1.0
rew_collision = dict(zip(self.agents, [0.0 for _ in self.agents]))
ball_position_reward = 10.0
rew_ball_position = dict(zip(self.agents, [0.0 for _ in self.agents]))
ball_tracking_reward = 0.1
rew_ball_tracking = dict(zip(self.agents, [0.0 for _ in self.agents]))

self.frames += 1
Expand Down

0 comments on commit 6a61dd7

Please sign in to comment.