Skip to content

Commit

Permalink
tested version
Browse files Browse the repository at this point in the history
  • Loading branch information
zhaoyi11 committed Oct 26, 2020
1 parent 8a79a17 commit 4ec1198
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 36 deletions.
60 changes: 37 additions & 23 deletions agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,28 +142,34 @@ def __init__(self, args):
for p in self.target_value_model2.parameters():
p.requires_grad = False

# setup the paras to update
self.world_param = list(self.transition_model.parameters())\
+ list(self.observation_model.parameters())\
+ list(self.reward_model.parameters())\
+ list(self.encoder.parameters())
if args.pcont:
self.world_param += list(self.pcont_model.parameters())

# setup optimizer
self.world_optimizer = optim.Adam(self.world_param, lr=args.world_lr)
self.actor_optimizer = optim.Adam(self.actor_model.parameters(), lr=args.actor_lr)
self.value_optimizer = optim.Adam(list(self.value_model.parameters())+list(self.value_model2.parameters()), lr=args.value_lr)

# setup the free_nat to
self.free_nats = torch.full((1, ), args.free_nats, dtype=torch.float32, device=args.device) # Allowed deviation in KL divergence

# TODO: change it to the new replay buffer, in buffer.py
self.D = ExperienceReplay(args.experience_size, args.symbolic, args.observation_size, args.action_size, args.bit_depth, args.device)

# TODO: print out the param used in Dreamer
# var_counts = tuple(count_vars(module) for module in [self., self.ac.q1, self.ac.q2])
# print('\nNumber of parameters: \t pi: %d, \t q1: %d, \t q2: %d\n' % var_counts)
# setup the paras to update
self.world_param = list(self.transition_model.parameters())\
+ list(self.observation_model.parameters())\
+ list(self.reward_model.parameters())\
+ list(self.encoder.parameters())
if args.pcont:
self.world_param += list(self.pcont_model.parameters())

# setup optimizer
self.world_optimizer = optim.Adam(self.world_param, lr=args.world_lr)
self.actor_optimizer = optim.Adam(self.actor_model.parameters(), lr=args.actor_lr)
self.value_optimizer = optim.Adam(list(self.value_model.parameters())+list(self.value_model2.parameters()), lr=args.value_lr)

# setup the free_nat to
self.free_nats = torch.full((1, ), args.free_nats, dtype=torch.float32, device=args.device) # Allowed deviation in KL divergence

# TODO: change it to the new replay buffer, in buffer.py
self.D = ExperienceReplay(args.experience_size, args.symbolic, args.observation_size, args.action_size, args.bit_depth, args.device)

if self.args.auto_temp:
# setup for learning of alpha term (temp of the entropy term)
self.log_temp = torch.zeros(1, requires_grad=True, device=args.device)
self.target_entropy = -np.prod(args.action_size if not args.fix_speed else self.args.action_size - 1).item() # heuristic value from SAC paper
self.temp_optimizer = optim.Adam([self.log_temp], lr=args.value_lr) # use the same value_lr

# TODO: print out the param used in Dreamer
# var_counts = tuple(count_vars(module) for module in [self., self.ac.q1, self.ac.q2])
# print('\nNumber of parameters: \t pi: %d, \t q1: %d, \t q2: %d\n' % var_counts)

# def process_im(self, image, image_size=None, rgb=None):
# # Resize, put channel first, convert it to a tensor, centre it to [-0.5, 0.5] and add batch dimenstion.
Expand Down Expand Up @@ -369,7 +375,15 @@ def update_parameters(self, gradient_steps):

# latent imagination
imag_beliefs, imag_states, imag_ac_logps = self._latent_imagination(beliefs, posterior_states, with_logprob=self.args.with_logprob)
# print("imag_ac_logps", imag_ac_logps)

# update temp
if self.args.auto_temp:
temp_loss = - (self.log_temp * (imag_ac_logps[0] + self.target_entropy).detach()).mean()
self.temp_optimizer.zero_grad()
temp_loss.backward()
self.temp_optimizer.step()
self.args.temp = self.log_temp.exp()

# update actor
actor_loss = self._compute_loss_actor(imag_beliefs, imag_states, imag_ac_logps=imag_ac_logps)

Expand Down
4 changes: 2 additions & 2 deletions dreamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@
parser.add_argument('--host', type=str, default='127.0.0.1', help='host ip')
# por sac
parser.add_argument('--with_logprob', action='store_true')
parser.add_argument('--use_automatic_entropy_tuning', action='store_true', help="Use the entropy regularization")
parser.add_argument('--temp', type=float, default=0.03) # temp for entropy
parser.add_argument('--auto_temp', action='store_true', help="Use the entropy regularization")
parser.add_argument('--temp', type=float, default=0.003) # temp for entropy

parser.add_argument('--action_size', default=2)
parser.add_argument('--observation_size', default=(1, 40, 40))
Expand Down
22 changes: 11 additions & 11 deletions dreamer_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

parser = argparse.ArgumentParser()

parser.add_argument("--car_name", help="Name of the car on MQTT-server", default="ari_dreamer")
parser.add_argument("--car_name", help="Name of the car on MQTT-server", default="dreamer")
parser.add_argument("--episode_steps", help="Number of steps per episode", default=1000, type=int)
parser.add_argument("--episodes", help="Number of steps episodes per run", default=100, type=int)
parser.add_argument("--encoder_update", help="Type of encoder to be used", default="aesac")
Expand All @@ -40,8 +40,8 @@
SAVE_MODEL = args.save_model

# DONKEY_NAME = args.car_name
TRAINING_TIMEOUT = 300
BLOCK_SIZE = 200
TRAINING_TIMEOUT = 400
BLOCK_SIZE = 300


class AttrDict(dict):
Expand All @@ -52,16 +52,16 @@ class AttrDict(dict):
def define_config():
config = AttrDict()
# parameter for dreamer
config.car_name = "ari_dreamer"
config.car_name = "dreamer"
config.episodes_steps = 1000
config.episodes = 1000

config.belief_size = 200
config.state_size = 30
config.hidden_size = 300
config.embedding_size = 1024
config.observation_size = (1, 40, 40) # TODO: change this latter
config.action_size = 2 # TODO: change this latter
config.observation_size = (1, 40, 40)
config.action_size = 2
config.device = "cuda" if torch.cuda.is_available() else "cpu"
config.testing_device = "cpu"
config.symbolic = False
Expand Down Expand Up @@ -92,23 +92,23 @@ def define_config():
config.angle_min = -1
config.angle_max = 1
# I didn't limit the max steering_diff yet
config.max_steering_diff = 0.25
config.max_steering_diff = 0.25 # Not be used
config.step_length = 0.1

# add prefill episodes
config.prefill_episodes = 5
config.random_episodes = 6
config.gradient_steps = 100
config.skip_initial_steps = 20
config.block_size = 200
config.block_size = 300

config.max_episodes_steps = config.episodes_steps + config.skip_initial_steps

# set up for experiments
config.pcont = False # whether to use a learned pcont
config.pcont = True # whether to use a learned pcont
config.with_logprob = True # whether to use the soft actor-critic
config.fix_speed = True # whether to use fixed speed, fixed speed equals to throttle_base

config.auto_temp = False # whether to use fixed speed, fixed speed equals to throttle_base
config.temp = 0.03 # entropy temperature
return config

Expand Down Expand Up @@ -390,7 +390,7 @@ def enforce_limits(self, action, prev_steering):
print("Starting as training server")
load_model = args.load_model
config = define_config()
agent = RL_Agent("ari_dreamer", True, args.car_name) # TODO: remember to change to use sim or real car
agent = RL_Agent("dreamer", True, args.car_name) # TODO: remember to change to use sim or real car

if args.load_model:
agent.agent = torch.load(args.load_model)
Expand Down

0 comments on commit 4ec1198

Please sign in to comment.