Skip to content

Commit

Permalink
Merge pull request #4 from goncamateus:dqn-DyLam
Browse files Browse the repository at this point in the history
Dqn-DyLam
  • Loading branch information
goncamateus authored Sep 14, 2024
2 parents 129b30f + 7ce8702 commit 487b514
Show file tree
Hide file tree
Showing 17 changed files with 551 additions and 35 deletions.
20 changes: 20 additions & 0 deletions envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,23 @@
entry_point="envs.taxi:Taxi",
max_episode_steps=200,
)

register(
id="mo-MountainCar-v0",
entry_point="envs.mountain_car:MountainCar",
max_episode_steps=200,
)

register(
id="mo-LunarLander-v2",
entry_point="envs.lunar_lander:LunarLanderStrat",
kwargs={"continuous": False},
max_episode_steps=1000,
reward_threshold=200,
)

register(
id="mo-Minecart-v0",
entry_point="envs.minecart:MinecartEnv",
max_episode_steps=1000,
)
30 changes: 30 additions & 0 deletions envs/minecart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from mo_gymnasium.envs.minecart.minecart import Minecart


class MinecartEnv(Minecart):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.cumulative_info = {
"reward_First_minerium": 0,
"reward_Second_minerium": 0,
"reward_Fuel": 0,
"Original_reward": 0,
}

def reset(self, *args, **kwargs):
obs, info = super().reset(*args, **kwargs)
self.cumulative_info = {
"reward_First_minerium": 0,
"reward_Second_minerium": 0,
"reward_Fuel": 0,
"Original_reward": 0,
}
return obs, info

def step(self, action):
obs, reward, termination, truncation, info = super().step(action)
self.cumulative_info["reward_First_minerium"] += reward[0]
self.cumulative_info["reward_Second_minerium"] += reward[1]
self.cumulative_info["reward_Fuel"] += reward[2]
self.cumulative_info["Original_reward"] += reward.sum()
return obs, reward, termination, truncation, self.cumulative_info
31 changes: 31 additions & 0 deletions envs/mountain_car.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from mo_gymnasium.envs.mountain_car.mountain_car import MOMountainCar


class MountainCar(MOMountainCar):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.cumulative_reward_info = {
"reward_time": 0,
"reward_reverse": 0,
"reward_forward": 0,
"Original_reward": 0,
}

def reset(self, *, seed: int | None = None, options: dict | None = None):
result = super().reset(seed=seed, options=options)
self.cumulative_reward_info = {
"reward_time": 0,
"reward_reverse": 0,
"reward_forward": 0,
"Original_reward": 0,
}
return result

def step(self, action: int):
state, reward, terminated, truncated, info = super().step(action)
self.cumulative_reward_info["reward_time"] += reward[0]
self.cumulative_reward_info["reward_reverse"] += reward[1]
self.cumulative_reward_info["reward_forward"] += reward[2]
self.cumulative_reward_info["Original_reward"] += -1
return state, reward, terminated, truncated, self.cumulative_reward_info
72 changes: 67 additions & 5 deletions experiments.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,32 @@ Baseline:
gym_id: mo-LunarLanderContinuous-v2
num_envs: 16
total_timesteps: 100000

HOPPER:
gym_id: mo-Hopper-v4
num_envs: 16
total_timesteps: 500000

PENDULUM:
gym_id: mo-Pendulum-v1
num_envs: 16
total_timesteps: 50000

HUMANOID:
gym_id: mo-Humanoid-v4
num_envs: 16
total_timesteps: 500000

HALFCHEETAH:
gym_id: mo-HalfCheetah-v4
num_envs: 16
total_timesteps: 500000

HALFCHEETAHEF:
gym_id: mo-HalfCheetahEF-v4
num_envs: 16
total_timesteps: 500000

VSS:
gym_id: mo-VSS-v0
num_envs: 20
Expand All @@ -36,6 +42,7 @@ Baseline:
n_hidden: 4
gamma: 0.95
update_frequency: 1

VSSEF:
gym_id: mo-VSSEF-v0
num_envs: 20
Expand All @@ -49,10 +56,28 @@ Baseline:
n_hidden: 4
gamma: 0.95
update_frequency: 1

TAXI:
gym_id: mo-Taxi-v3
gamma: 0.9999
q_lr: 0.1
epsilon: 0.8

QLUNARLANDER:
gym_id: mo-LunarLander-v2
num_envs: 16
total_timesteps: 100000
epsilon: 0.8

MINECART:
gym_id: mo-Minecart-v0
num_envs: 16
total_timesteps: 200000
gamma: 0.98
target_network_frequency: 200
tau: 0.00001
epsilon: 1.0
epsilon_decay_factor: 0.99997

Dylam:
LUNARLANDER:
Expand All @@ -65,6 +90,7 @@ Dylam:
r_max: [1000, -100, -20, 1]
r_min: [-1000, -1000, -1000, -1]
dylam_rb: 100

HOPPER:
gym_id: mo-Hopper-v4
num_envs: 16
Expand All @@ -76,6 +102,7 @@ Dylam:
r_min: [0, -200, -250, 0]
dylam_rb: 100
dylam_tau: 0.9999

PENDULUM:
gym_id: mo-Pendulum-v1
num_envs: 16
Expand All @@ -87,6 +114,7 @@ Dylam:
r_min: [-100, -100, -100]
dylam_rb: 100
dylam_tau: 0.9999

HUMANOID:
gym_id: mo-Humanoid-v4
num_envs: 16
Expand All @@ -98,6 +126,7 @@ Dylam:
r_min: [0, -1000, 0]
dylam_rb: 100
dylam_tau: 0.9999

HALFCHEETAH:
gym_id: mo-HalfCheetah-v4
num_envs: 16
Expand All @@ -109,6 +138,7 @@ Dylam:
r_min: [0, -800]
dylam_rb: 500
dylam_tau: 0.9999

HALFCHEETAHEF:
gym_id: mo-HalfCheetahEF-v4
num_envs: 16
Expand All @@ -120,6 +150,7 @@ Dylam:
r_min: [0, 0]
dylam_rb: 500
dylam_tau: 0.9999

VSS:
gym_id: mo-VSS-v0
num_envs: 20
Expand All @@ -140,6 +171,7 @@ Dylam:
r_min: [0, 0, -600]
dylam_rb: 500
dylam_tau: 0.9999

VSSEF:
gym_id: mo-VSSEF-v0
num_envs: 20
Expand All @@ -160,18 +192,47 @@ Dylam:
r_min: [0, 0, 0]
dylam_rb: 500
dylam_tau: 0.9999

TAXI:
gym_id: mo-Taxi-v3
gamma: 0.9999
q_lr: 0.1
epsilon: 0.8
stratified: True
num_rewards: 3
dylam: True
lambdas: [0.3333, 0.3333, 0.3333]
r_max: [0, 1, 0]
r_min: [-200, 0, -10]
dylam_tau: 0.995
dylam_rb: 10

QLUNARLANDER:
gym_id: mo-LunarLander-v2
num_envs: 16
total_timesteps: 100000
epsilon: 0.8
stratified: True
dylam: True
num_rewards: 4
r_max: [1000, -100, -20, 1]
r_min: [-1000, -1000, -1000, -1]
dylam_rb: 100

MINECART:
gym_id: mo-Minecart-v0
num_envs: 16
total_timesteps: 200000
gamma: 0.98
target_network_frequency: 200
tau: 0.00001
epsilon: 1.0
epsilon_decay_factor: 0.99997
stratified: True
dylam: True
num_rewards: 3
gamma: 0.9999
q_lr: 0.1
r_max: [1.5, 1.5, 0]
r_min: [0, 0, -20]

GPILS:
LUNARLANDER:
Expand Down Expand Up @@ -227,8 +288,9 @@ GPILS:
Drq:
TAXI:
gym_id: mo-Taxi-v3
epsilon: 0.8
gamma: 0.9999
q_lr: 0.1
stratified: True
lambdas: [0.3333, 0.3333, 0.3333]
num_rewards: 3
gamma: 0.9999
q_lr: 0.1
lambdas: [0.3333, 0.3333, 0.3333]
Loading

0 comments on commit 487b514

Please sign in to comment.