From 179933ff14f567d0ae7a5cbdaad1cd5f665780e7 Mon Sep 17 00:00:00 2001 From: Matheus Albuquerque Date: Fri, 19 Mar 2021 16:58:39 -0300 Subject: [PATCH 1/3] Rebase vss_atk_vs_gk with master --- agents/deepvss/deepvss_atk_vs_gk.py | 248 ++++++++++++ envs/rc_gym/__init__.py | 4 + envs/rc_gym/vss/env_atk_vs_gk/__init__.py | 1 + .../vss/env_atk_vs_gk/goalkeeper/models.py | 81 ++++ .../rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py | 382 ++++++++++++++++++ 5 files changed, 716 insertions(+) create mode 100644 agents/deepvss/deepvss_atk_vs_gk.py create mode 100644 envs/rc_gym/vss/env_atk_vs_gk/__init__.py create mode 100644 envs/rc_gym/vss/env_atk_vs_gk/goalkeeper/models.py create mode 100644 envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py diff --git a/agents/deepvss/deepvss_atk_vs_gk.py b/agents/deepvss/deepvss_atk_vs_gk.py new file mode 100644 index 00000000..0680ddee --- /dev/null +++ b/agents/deepvss/deepvss_atk_vs_gk.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +import argparse +import ctypes +import math +import os +import sys +import threading +import time +# VSS customization: +import traceback +from importlib.machinery import SourceFileLoader + +import gym +import torch +import torch.multiprocessing as mp +from tensorboardX import SummaryWriter + +import rc_gym +import parameters_dqn +import numpy as np + +# Global variables +writer = None +collected_samples = None + + +def dict_to_str(dct): + dict_str = "" + for key, value in dct.items(): + dict_str += "{}: {}\n".format(key, value) + + return dict_str + + +class NormalizedWrapper(gym.Wrapper): + """ + :param env: (gym.Env) Gym environment that will be wrapped + """ + + def __init__(self, env): + # Call the parent constructor, so we can access self.env later + super(NormalizedWrapper, self).__init__(env) + + assert isinstance(self.env.action_space, + gym.spaces.Box), "This wrapper only works with continuous action space (spaces.Box)" + assert isinstance(self.env.observation_space, + gym.spaces.Box), "This wrapper only works with continuous observation space (spaces.Box)" + + # We modify the wrapper action space, so all actions will lie in [-1, 1] + self.action_space = gym.spaces.Box( + low=-1, high=1, shape=self.env.action_space.shape, dtype=np.float32) + self.observation_space = gym.spaces.Box( + low=-1, high=1, shape=self.env.observation_space.shape, dtype=np.float32) + + def rescale_action(self, scaled_action): + """ + Rescale the action from [-1, 1] to [low, high] + (no need for symmetric action space) + :param scaled_action: (np.ndarray) + :return: (np.ndarray) + """ + return self.env.action_space.low + ( + 0.5 * (scaled_action + 1.0) * (self.env.action_space.high - self.env.action_space.low)) + + def scale_observation(self, observation): + """ + Scale the observation to bounds [-1, 1] + """ + return (2 * ((observation - self.env.observation_space.low) / + (self.env.observation_space.high - self.env.observation_space.low))) - 1 + + def reset(self): + """ + Reset the environment + """ + # Reset the counter + return self.scale_observation(self.env.reset()) + + def step(self, action): + """ + :param action: ([float] or int) Action taken by the agent + :return: (np.ndarray, float, bool, dict) observation, reward, is the episode over?, additional informations + """ + # Rescale action from [-1, 1] to original [low, high] interval + rescaled_action = self.rescale_action(action) + obs, reward, done, info = self.env.step(rescaled_action) + return self.scale_observation(obs), reward, done, info + + +if __name__ == "__main__": + env = None + play_threads = [] + train_process = None + mp.set_start_method('spawn') + collected_samples = mp.Value(ctypes.c_ulonglong, 0) + finish_event = mp.Event() + exp_queue = None + + try: + parser = argparse.ArgumentParser() + parser.add_argument("--cuda", default=False, + action="store_true", help="Enable cuda") + parser.add_argument("--name", default='Lambada', + help="Experiment name") + parser.add_argument("--resume", default=[], action='append', + nargs='?', help="Pre-trained model to be loaded") + parser.add_argument("--exp", default=[], action='append', + nargs='?', help="Load experience buffer") + parser.add_argument("--test", default=False, + action='store_true', help="Test mode") + parser.add_argument("--collected", default=-1, type=int, + help="The starting collected samples: defaults to zero or the value stored with the pretrained model") + parser.add_argument("--processed", default=-1, type=int, + help="The starting processed samples: defaults to zero or the value stored with the pretrained model") + parser.add_argument("--real", default=False, + action="store_true", help='set env to vss_real_soccer') + parser.add_argument("--params", default=None, + help="Path to a python parameters file") + + args = parser.parse_args() + + if args.real: + args.simulator = 'real' + + model_params = parameters_dqn.MODEL_HYPERPARAMS + + if args.params is not None: + imported_parameters = SourceFileLoader( + "module.name", args.params).load_module() + model_params = imported_parameters.MODEL_HYPERPARAMS + + run_name = str(args.name) + + if args.test: + model_params['epsilon_high_start'] = model_params['epsilon_high_final'] = model_params['epsilon_low'] = 0.02 + + sys.stdout.flush() + + env = gym.make('VSSAtkVsGk-v0') + state_shape, action_shape = env.observation_space, env.action_space + model_params['state_shape'] = state_shape + model_params['action_shape'] = action_shape + + print(f' === Running: <{run_name}> experiment ===') + print(f'Model params:\n{dict_to_str(model_params)}\n') + + if model_params['agent'] == 'DDPG': + from agents.agentDDPG import train, play, create_actor_model, load_actor_model + elif model_params['agent'] == 'SAC': + from agents.agentSAC import train, play, create_actor_model, load_actor_model + + device = torch.device("cuda" if args.cuda else "cpu") + net = create_actor_model(model_params, state_shape, + action_shape, device) + + checkpoint = {} + + if len(args.resume) > 0: # load checkpoint + args.resume = args.resume[0] + load = True + if args.resume is None: + print("Looking for default pth file") + args.resume = "model/" + run_name + ".pth" + load = os.path.isfile(args.resume) + if not load: + print('File not found:"%s" (nothing to resume)' % + args.resume) + + if load: + print("=> loading checkpoint '%s'" % args.resume) + checkpoint = torch.load(args.resume, map_location=device) + net = load_actor_model(net, checkpoint) + + if args.test: + checkpoint['collected_samples'] = checkpoint['processed_samples'] = 0 + + if args.processed >= 0: # override values in checkpoint + checkpoint['processed_samples'] = args.processed + + if args.collected >= 0: # override values in checkpoint + checkpoint['collected_samples'] = args.collected + + if 'collected_samples' in checkpoint: + collected_samples.value = checkpoint['collected_samples'] + + print("=> collected samples: %d, processed_samples: %d" % + (collected_samples.value, checkpoint['processed_samples'])) + + if len(args.exp) > 0: # load experience buffer + checkpoint['exp'] = args.exp[0] + + writer_path = model_params['data_path'] + \ + '/logs/' + run_name + writer = SummaryWriter(log_dir=writer_path+"/agents", comment="-agent") + + queue_size = model_params['batch_size'] + exp_queue = mp.Queue(maxsize=queue_size) + + torch.set_num_threads(20) + print("Threads available: %d" % torch.get_num_threads()) + + th_a = threading.Thread(target=play, args=( + model_params, net, device, exp_queue, env, args.test, writer, collected_samples, finish_event)) + play_threads.append(th_a) + th_a.start() + + if args.test: + while True: + time.sleep(0.01) + + else: # crate train process: + model_params['run_name'] = run_name + model_params['writer_path'] = writer_path + model_params['action_format'] = '2f' + model_params['state_format'] = f"{state_shape.shape[0]}f" + net.share_memory() + train_process = mp.Process(target=train, args=( + model_params, net, device, exp_queue, finish_event, checkpoint)) + train_process.start() + train_process.join() + print("Train process joined.") + + except KeyboardInterrupt: + print("...Finishing...") + finish_event.set() + + except Exception: + print("!!! Exception caught on main !!!") + print(traceback.format_exc()) + + finally: + + finish_event.set() + if exp_queue: + while exp_queue.qsize() > 0: + exp_queue.get() + + print('queue is empty') + + if train_process is not None: + train_process.join() + + print("Waiting for threads to finish...") + play_threads = [t.join(1) + for t in play_threads if t is not None and t.isAlive()] + + env.close() + sys.exit(1) diff --git a/envs/rc_gym/__init__.py b/envs/rc_gym/__init__.py index bbe8f197..714e3ce4 100644 --- a/envs/rc_gym/__init__.py +++ b/envs/rc_gym/__init__.py @@ -19,3 +19,7 @@ register(id='VSS3v3FIRA-v0', entry_point='rc_gym.vss.env_3v3:VSS3v3FIRAEnv' ) + +register(id='VSSAtkVsGk-v0', + entry_point='rc_gym.vss.env_atk_vs_gk:VSSAtkVsGkEnv' + ) diff --git a/envs/rc_gym/vss/env_atk_vs_gk/__init__.py b/envs/rc_gym/vss/env_atk_vs_gk/__init__.py new file mode 100644 index 00000000..72f12e34 --- /dev/null +++ b/envs/rc_gym/vss/env_atk_vs_gk/__init__.py @@ -0,0 +1 @@ +from rc_gym.vss.env_atk_vs_gk.vss_atk_vs_gk import VSSAtkVsGkEnv diff --git a/envs/rc_gym/vss/env_atk_vs_gk/goalkeeper/models.py b/envs/rc_gym/vss/env_atk_vs_gk/goalkeeper/models.py new file mode 100644 index 00000000..e7581c8e --- /dev/null +++ b/envs/rc_gym/vss/env_atk_vs_gk/goalkeeper/models.py @@ -0,0 +1,81 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.distributions import Normal + +LOG_SIG_MAX = 2 +LOG_SIG_MIN = -20 +epsilon = 1e-6 + + +def weights_init_(m): + if isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight, gain=1) + nn.init.constant_(m.bias, 0) + + +class DDPGActor(nn.Module): + def __init__(self, obs_size, act_size): + super(DDPGActor, self).__init__() + self.net = nn.Sequential( + nn.Linear(obs_size, 400), + nn.ReLU(), + nn.Linear(400, 300), + nn.ReLU(), + nn.Linear(300, act_size), + nn.Tanh() + ) + + def forward(self, x): + return self.net(x) + + def get_action(self, state): + x = torch.FloatTensor(state) + action = self.forward(x) + action = action.cpu().detach().numpy() + return action + + +class GaussianPolicy(nn.Module): + def __init__(self, num_inputs, num_actions, hidden_dim=256, action_space=None): + super(GaussianPolicy, self).__init__() + + self.linear1 = nn.Linear(num_inputs, hidden_dim) + self.linear2 = nn.Linear(hidden_dim, hidden_dim) + + self.mean_linear = nn.Linear(hidden_dim, num_actions) + self.log_std_linear = nn.Linear(hidden_dim, num_actions) + + self.apply(weights_init_) + + # action rescaling + if action_space is None: + self.action_scale = torch.tensor(1.) + self.action_bias = torch.tensor(0.) + else: + self.action_scale = torch.FloatTensor( + (action_space.high - action_space.low) / 2.) + self.action_bias = torch.FloatTensor( + (action_space.high + action_space.low) / 2.) + + def _forward(self, state): + x = F.relu(self.linear1(state)) + x = F.relu(self.linear2(x)) + mean = self.mean_linear(x) + log_std = self.log_std_linear(x) + log_std = torch.clamp(log_std, min=LOG_SIG_MIN, max=LOG_SIG_MAX) + return mean, log_std + + def forward(self, state): + mean, log_std = self._forward(state) + std = log_std.exp() + normal = Normal(mean, std) + x_t = normal.rsample() # for reparameterization trick (mean + std * N(0,1)) + y_t = torch.tanh(x_t) + action = y_t * self.action_scale + self.action_bias + return action + + def to(self, device): + self.action_scale = self.action_scale.to(device) + self.action_bias = self.action_bias.to(device) + return super(GaussianPolicy, self).to(device) diff --git a/envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py b/envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py new file mode 100644 index 00000000..bd1ea361 --- /dev/null +++ b/envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py @@ -0,0 +1,382 @@ +import math +import random +import torch +import os +from rc_gym.Utils.Utils import OrnsteinUhlenbeckAction +from typing import Dict + +import gym +import numpy as np +from rc_gym.Entities import Frame, Robot +from rc_gym.vss.vss_gym_base import VSSBaseEnv +from rc_gym.vss.env_atk_vs_gk.goalkeeper.models import DDPGActor + + +class VSSAtkVsGkEnv(VSSBaseEnv): + """This environment controls a single robot in a VSS soccer League 3v3 match + + + Description: + Observation: + Type: Box(40) + Normalized Bounds to [-1.25, 1.25] + Num Observation normalized + 0 Ball X + 1 Ball Y + 2 Ball Vx + 3 Ball Vy + 4 + (7 * i) id i Blue Robot X + 5 + (7 * i) id i Blue Robot Y + 6 + (7 * i) id i Blue Robot sin(theta) + 7 + (7 * i) id i Blue Robot cos(theta) + 8 + (7 * i) id i Blue Robot Vx + 9 + (7 * i) id i Blue Robot Vy + 10 + (7 * i) id i Blue Robot v_theta + 25 + (5 * i) id i Yellow Robot X + 26 + (5 * i) id i Yellow Robot Y + 27 + (5 * i) id i Yellow Robot Vx + 28 + (5 * i) id i Yellow Robot Vy + 29 + (5 * i) id i Yellow Robot v_theta + Actions: + Type: Box(2, ) + Num Action + 0 id 0 Blue Left Wheel Speed (%) + 1 id 0 Blue Right Wheel Speed (%) + Reward: + Sum of Rewards: + Goal + Ball Potential Gradient + Move to Ball + Energy Penalty + Starting State: + Randomized Robots and Ball initial Position + Episode Termination: + 5 minutes match time + """ + + def __init__(self): + super().__init__(field_type=0, n_robots_blue=3, n_robots_yellow=3, + time_step=0.032) + + self.action_space = gym.spaces.Box(low=-1, high=1, + shape=(2, ), dtype=np.float32) + self.observation_space = gym.spaces.Box(low=-self.NORM_BOUNDS, + high=self.NORM_BOUNDS, + shape=(40, ), dtype=np.float32) + + # Initialize Class Atributes + self.previous_ball_potential = None + self.actions: Dict = None + self.reward_shaping_total = None + self.v_wheel_deadzone = 0.05 + self.goalkeeper = None + + self.ou_actions = [] + for i in range(self.n_robots_blue + self.n_robots_yellow): + self.ou_actions.append( + OrnsteinUhlenbeckAction(self.action_space, dt=self.time_step) + ) + + self.load_gk() + print('Environment initialized') + + def reset(self): + self.actions = None + self.reward_shaping_total = None + self.previous_ball_potential = None + for ou in self.ou_actions: + ou.reset() + + return super().reset() + + def step(self, action): + observation, reward, done, _ = super().step(action) + return observation, reward, done, self.reward_shaping_total + + def load_gk(self): + device = torch.device('cuda') + gk_path = os.path.dirname(os.path.realpath( + __file__)) + '/goalkeeper/gk_model.pth' + self.goalkeeper = DDPGActor(40, 2) + print(gk_path) + gk_checkpoint = torch.load(gk_path, map_location=device) + self.goalkeeper.load_state_dict(gk_checkpoint['state_dict_act']) + self.goalkeeper.eval() + + def _gk_obs(self): + observation = [] + observation.append(self.norm_pos(-self.frame.ball.x)) + observation.append(self.norm_pos(self.frame.ball.y)) + observation.append(self.norm_v(-self.frame.ball.v_x)) + observation.append(self.norm_v(self.frame.ball.v_y)) + + # we reflect the side that the attacker is attacking, + # so that he will attack towards the goal where the goalkeeper is + for i in range(self.n_robots_yellow): + observation.append(self.norm_pos(-self.frame.robots_yellow[i].x)) + observation.append(self.norm_pos(self.frame.robots_yellow[i].y)) + observation.append( + np.sin(np.deg2rad(self.frame.robots_yellow[i].theta)) + ) + observation.append( + -np.cos(np.deg2rad(self.frame.robots_yellow[i].theta)) + ) + observation.append(self.norm_v(-self.frame.robots_yellow[i].v_x)) + observation.append(self.norm_v(self.frame.robots_yellow[i].v_y)) + observation.append(self.norm_w(-self.frame.robots_yellow[i].v_theta)) + + for i in range(self.n_robots_blue): + observation.append(self.norm_pos(-self.frame.robots_blue[i].x)) + observation.append(self.norm_pos(self.frame.robots_blue[i].y)) + observation.append(self.norm_v(-self.frame.robots_blue[i].v_x)) + observation.append(self.norm_v(self.frame.robots_blue[i].v_y)) + observation.append(self.norm_w(-self.frame.robots_blue[i].v_theta)) + + return np.array(observation) + + def _frame_to_observations(self): + + observation = [] + + observation.append(self.norm_pos(self.frame.ball.x)) + observation.append(self.norm_pos(self.frame.ball.y)) + observation.append(self.norm_v(self.frame.ball.v_x)) + observation.append(self.norm_v(self.frame.ball.v_y)) + + for i in range(self.n_robots_blue): + observation.append(self.norm_pos(self.frame.robots_blue[i].x)) + observation.append(self.norm_pos(self.frame.robots_blue[i].y)) + observation.append( + np.sin(np.deg2rad(self.frame.robots_blue[i].theta)) + ) + observation.append( + np.cos(np.deg2rad(self.frame.robots_blue[i].theta)) + ) + observation.append(self.norm_v(self.frame.robots_blue[i].v_x)) + observation.append(self.norm_v(self.frame.robots_blue[i].v_y)) + observation.append(self.norm_w(self.frame.robots_blue[i].v_theta)) + + for i in range(self.n_robots_yellow): + observation.append(self.norm_pos(self.frame.robots_yellow[i].x)) + observation.append(self.norm_pos(self.frame.robots_yellow[i].y)) + observation.append(self.norm_v(self.frame.robots_yellow[i].v_x)) + observation.append(self.norm_v(self.frame.robots_yellow[i].v_y)) + observation.append( + self.norm_w(self.frame.robots_yellow[i].v_theta) + ) + + return np.array(observation) + + def _get_commands(self, actions): + commands = [] + self.actions = {} + + self.actions[0] = actions + v_wheel1, v_wheel2 = self._actions_to_v_wheels(actions) + commands.append(Robot(yellow=False, id=0, v_wheel1=v_wheel1, + v_wheel2=v_wheel2)) + + # Send random commands to the other robots + for i in range(1, self.n_robots_blue): + actions = self.ou_actions[i].sample() + self.actions[i] = actions + v_wheel1, v_wheel2 = self._actions_to_v_wheels(actions) + commands.append(Robot(yellow=False, id=i, v_wheel1=v_wheel1, + v_wheel2=v_wheel2)) + + gk_action = self.goalkeeper.get_action(self._gk_obs()) + # we invert the speed on the wheels because of the goalkeeper's reflection on the Y axis. + commands.append(Robot(yellow=True, id=0, v_wheel1=gk_action[1], + v_wheel2=gk_action[0])) + for i in range(1, self.n_robots_yellow): + actions = self.ou_actions[self.n_robots_blue+i].sample() + v_wheel1, v_wheel2 = self._actions_to_v_wheels(actions) + commands.append(Robot(yellow=True, id=i, v_wheel1=v_wheel1, + v_wheel2=v_wheel2)) + + return commands + + def __ball_grad(self): + '''Calculate ball potential gradient + Difference of potential of the ball in time_step seconds. + ''' + # Calculate ball potential + length_cm = self.field_params['field_length'] * 100 + half_lenght = (self.field_params['field_length'] / 2.0)\ + + self.field_params['goal_depth'] + + # distance to defence + dx_d = (half_lenght + self.frame.ball.x) * 100 + # distance to attack + dx_a = (half_lenght - self.frame.ball.x) * 100 + dy = (self.frame.ball.y) * 100 + + dist_1 = -math.sqrt(dx_a ** 2 + 2 * dy ** 2) + dist_2 = math.sqrt(dx_d ** 2 + 2 * dy ** 2) + ball_potential = ((dist_1 + dist_2) / length_cm - 1) / 2 + + grad_ball_potential = 0 + # Calculate ball potential gradient + # = actual_potential - previous_potential + if self.previous_ball_potential is not None: + diff = ball_potential - self.previous_ball_potential + grad_ball_potential = np.clip(diff * 3 / self.time_step, + -5.0, 5.0) + + self.previous_ball_potential = ball_potential + + return grad_ball_potential + + def __move_reward(self): + '''Calculate Move to ball reward + + Cosine between the robot vel vector and the vector robot -> ball. + This indicates rather the robot is moving towards the ball or not. + ''' + + ball = np.array([self.frame.ball.x, self.frame.ball.y]) + robot = np.array([self.frame.robots_blue[0].x, + self.frame.robots_blue[0].y]) + robot_vel = np.array([self.frame.robots_blue[0].v_x, + self.frame.robots_blue[0].v_y]) + robot_ball = ball - robot + robot_ball = robot_ball/np.linalg.norm(robot_ball) + + move_reward = np.dot(robot_ball, robot_vel) + + move_reward = np.clip(move_reward / 0.4, -5.0, 5.0) + return move_reward + + def __energy_penalty(self): + '''Calculates the energy penalty''' + + en_penalty_1 = abs(self.sent_commands[0].v_wheel1) + en_penalty_2 = abs(self.sent_commands[0].v_wheel2) + energy_penalty = - (en_penalty_1 + en_penalty_2) + energy_penalty /= self.rsim.robot_wheel_radius + return energy_penalty + + def _calculate_reward_and_done(self): + reward = 0 + goal = False + w_move = 0.2 + w_ball_grad = 0.8 + w_energy = 2e-4 + if self.reward_shaping_total is None: + self.reward_shaping_total = {'goal_score': 0, 'move': 0, + 'ball_grad': 0, 'energy': 0, + 'goals_blue': 0, 'goals_yellow': 0} + + # Check if goal ocurred + if self.frame.ball.x > (self.field_params['field_length'] / 2): + self.reward_shaping_total['goal_score'] += 1 + self.reward_shaping_total['goals_blue'] += 1 + reward = 10 + goal = True + elif self.frame.ball.x < -(self.field_params['field_length'] / 2): + self.reward_shaping_total['goal_score'] -= 1 + self.reward_shaping_total['goals_yellow'] += 1 + reward = -10 + goal = True + else: + + if self.last_frame is not None: + # Calculate ball potential + grad_ball_potential = self.__ball_grad() + # Calculate Move ball + move_reward = self.__move_reward() + # Calculate Energy penalty + energy_penalty = self.__energy_penalty() + + reward = w_move * move_reward + \ + w_ball_grad * grad_ball_potential + \ + w_energy * energy_penalty + + self.reward_shaping_total['move'] += w_move * move_reward + self.reward_shaping_total['ball_grad'] += w_ball_grad \ + * grad_ball_potential + self.reward_shaping_total['energy'] += w_energy \ + * energy_penalty + + if goal: + initial_pos_frame: Frame = self._get_initial_positions_frame() + self.rsim.reset(initial_pos_frame) + self.frame = self.rsim.get_frame() + self.previous_ball_potential = None + self.last_frame = None + + done = self.steps * self.time_step >= 300 + + return reward, done + + def _get_initial_positions_frame(self): + '''Returns the position of each robot and ball for the inicial frame''' + field_half_length = self.field_params['field_length'] / 2 + field_half_width = self.field_params['field_width'] / 2 + + def x(): return random.uniform(-field_half_length + 0.1, + field_half_length - 0.1) + + def y(): return random.uniform(-field_half_width + 0.1, + field_half_width - 0.1) + + def theta(): return random.uniform(-180, 180) + + pos_frame: Frame = Frame() + + pos_frame.ball.x = x() + pos_frame.ball.y = y() + pos_frame.ball.v_x = 0. + pos_frame.ball.v_y = 0. + + agents = [] + for i in range(self.n_robots_blue): + pos_frame.robots_blue[i] = Robot(x=x(), y=y(), theta=theta()) + agents.append(pos_frame.robots_blue[i]) + + for i in range(self.n_robots_yellow): + pos_frame.robots_yellow[i] = Robot(x=x(), y=y(), theta=theta()) + agents.append(pos_frame.robots_yellow[i]) + + def same_position_ref(obj, ref, radius): + if obj.x >= ref.x - radius and obj.x <= ref.x + radius and \ + obj.y >= ref.y - radius and obj.y <= ref.y + radius: + return True + return False + + radius_ball = 0.04 + radius_robot = 0.07 + + for i in range(len(agents)): + while same_position_ref(agents[i], pos_frame.ball, radius_ball): + agents[i] = Robot(x=x(), y=y(), theta=theta()) + for j in range(i): + while same_position_ref(agents[i], agents[j], radius_robot): + agents[i] = Robot(x=x(), y=y(), theta=theta()) + + for i in range(self.n_robots_blue): + pos_frame.robots_blue[i] = agents[i] + + for i in range(self.n_robots_yellow): + pos_frame.robots_yellow[i] = agents[i+self.n_robots_blue] + + return pos_frame + + def _actions_to_v_wheels(self, actions): + left_wheel_speed = actions[0] * self.rsim.linear_speed_range + right_wheel_speed = actions[1] * self.rsim.linear_speed_range + + left_wheel_speed, right_wheel_speed = np.clip( + (left_wheel_speed, right_wheel_speed), + -self.rsim.linear_speed_range, + self.rsim.linear_speed_range + ) + + # Deadzone + if -self.v_wheel_deadzone < left_wheel_speed < self.v_wheel_deadzone: + left_wheel_speed = 0 + + if -self.v_wheel_deadzone < right_wheel_speed < self.v_wheel_deadzone: + right_wheel_speed = 0 + + return left_wheel_speed, right_wheel_speed From d287e51e5533faf916ed86f9f32dd351517f2f14 Mon Sep 17 00:00:00 2001 From: Matheus Albuquerque Date: Mon, 15 Mar 2021 12:56:39 -0300 Subject: [PATCH 2/3] Gk initilizes in the center of the goal --- envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py b/envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py index bd1ea361..e822ea08 100644 --- a/envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py +++ b/envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py @@ -310,7 +310,10 @@ def _calculate_reward_and_done(self): return reward, done def _get_initial_positions_frame(self): - '''Returns the position of each robot and ball for the inicial frame''' + """ + Goalie starts at the center of the goal, Attaker and ball randomly. + Other robots also starts at random positions. + """ field_half_length = self.field_params['field_length'] / 2 field_half_width = self.field_params['field_width'] / 2 @@ -333,8 +336,11 @@ def theta(): return random.uniform(-180, 180) for i in range(self.n_robots_blue): pos_frame.robots_blue[i] = Robot(x=x(), y=y(), theta=theta()) agents.append(pos_frame.robots_blue[i]) - - for i in range(self.n_robots_yellow): + + pos_frame.robots_yellow[0] = Robot(x= field_half_length - 0.05, + y=0., + theta=0) + for i in range(1, self.n_robots_yellow): pos_frame.robots_yellow[i] = Robot(x=x(), y=y(), theta=theta()) agents.append(pos_frame.robots_yellow[i]) From 9dbd3ef019d471434ba89dfd04daf33bc6527632 Mon Sep 17 00:00:00 2001 From: Matheus Albuquerque Date: Thu, 18 Mar 2021 13:20:17 -0300 Subject: [PATCH 3/3] The trained gk can come from envs or rl-agents --- envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py b/envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py index e822ea08..87a70a3f 100644 --- a/envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py +++ b/envs/rc_gym/vss/env_atk_vs_gk/vss_atk_vs_gk.py @@ -100,7 +100,11 @@ def load_gk(self): self.goalkeeper = DDPGActor(40, 2) print(gk_path) gk_checkpoint = torch.load(gk_path, map_location=device) - self.goalkeeper.load_state_dict(gk_checkpoint['state_dict_act']) + # print("-------> type: ", type(gk_checkpoint)) + if 'state_dict_act' in gk_checkpoint: + self.goalkeeper.load_state_dict(gk_checkpoint['state_dict_act']) + else: + self.goalkeeper.load_state_dict(gk_checkpoint) self.goalkeeper.eval() def _gk_obs(self): @@ -340,6 +344,7 @@ def theta(): return random.uniform(-180, 180) pos_frame.robots_yellow[0] = Robot(x= field_half_length - 0.05, y=0., theta=0) + agents.append(pos_frame.robots_yellow[0]) for i in range(1, self.n_robots_yellow): pos_frame.robots_yellow[i] = Robot(x=x(), y=y(), theta=theta()) agents.append(pos_frame.robots_yellow[i])