Skip to content

Commit

Permalink
ommit so can get Felipe's new id line in python train line
Browse files Browse the repository at this point in the history
  • Loading branch information
Niki Love committed Jul 5, 2024
2 parents 8b3c063 + 45367b3 commit d931cfd
Show file tree
Hide file tree
Showing 4 changed files with 431 additions and 329 deletions.
8 changes: 4 additions & 4 deletions hyperpars/ppo-caribou.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
algo: "PPO"
total_timesteps: 5000000
algo_config:
tensorboard_log: "../../../logs"
tensorboard_log: "../../logs"
#
policy: 'MlpPolicy'
# batch_size: 512
Expand All @@ -35,16 +35,16 @@ algo_config:

# env
env_id: "CaribouScipy"
config: {"r_b":.25 } #this is what is passed into the creation of the environment, could pass diff param values here to change the defaults (instead of messing with the params defined in caribou_ode.py)
config: {} #this is what is passed into the creation of the environment, could pass diff param values here to change the defaults (instead of messing with the params defined in caribou_ode.py)
n_envs: 12

tensorboard: "/home/rstudio/logs"
#total_timesteps: 50000
total_timesteps: 5000000
use_sde: True
repo: "boettiger-lab/rl4eco"
save_path: "./saved_agents"
id: "3"
save_path: "../saved_agents"
id: "LFupdate_shortTF1strat"
#rppo - ppo that has memory (should be better than PPO but hard to train lol)
#td3 - giving some weird errors
#tqc is actually better if we can give it enough time to run
Expand Down
694 changes: 381 additions & 313 deletions notebooks/4-15-Meeting-Notebook.ipynb

Large diffs are not rendered by default.

Binary file added saved_agents/PPO-CaribouScipy-LFupdate.zip
Binary file not shown.
58 changes: 46 additions & 12 deletions src/rl4caribou/envs/caribou_ode.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

def dynamics_scipy(pop, effort, p, timestep, singularities):
#
# parameters of the ODE are s.t. t is in years, so lets make the time-step a tenth of a year
# parameters of the ODE are s.t. t is in years, so lets make the time-step a twelfth of a year
# (this ad hoc rule gives better convergence than if we set dt = 1 full year)
dt = 1./12
dt = 1
t_interval = np.float32([timestep, timestep+dt])
y0 = pop
timestep_randomness = (
Expand Down Expand Up @@ -114,28 +114,49 @@ def harvest(pop, effort):
return pop


def utility(pop, effort):
def utility(pop, effort, *args, **kwargs):
benefits = 1 * pop[1] # benefit from Caribou
costs = 0.1 * (effort[0] + effort[1]) + 0.4 * effort[2] # cost to culling + cost of restoring
if np.any(pop <= [0.05, 0.01, 0.001]):
if np.any(pop <= [0.01, 0.05, 0.001]):
benefits -= 1
return benefits - costs

def utility2(pop, effort, used_budget, budget, *args, **kwargs):
# caribou health
reward = 1 * pop[1]

# ecosystem balance
if np.any(pop <= [0.05, 0.01, 0.001]):
reward -= 1

# budget
if used_budget > budget:
reward -= 5

# choose only one strategy at the same time
numerical_threshold = 10**(-4)
nontrivial_efforts = [1 if eff > numerical_threshold else 0 for eff in effort]
if sum(nontrivial_efforts) > 1:
reward -= 5

return reward

def observe_3pop_restoration(env):
rest_obs = -1 + 2 * (
rest_obs = -1 + 2 * ( #get a value between -1 and 1
(env.current_ab - env.parameters["a_B"])
/ (env.current_ab - env.restored_ab)
)
return np.float32([*env.state, rest_obs])


class CaribouScipy(gym.Env):
"""A 3-species ecosystem model with two control actions"""

def __init__(self, config=None):
config = config or {}

## these parameters may be specified in config
self.Tmax = config.get("Tmax", 800)
self.Tmax = config.get("Tmax", 100)
self.max_episode_steps = self.Tmax
self.threshold = config.get("threshold", np.float32(1e-4))
self.init_sigma = config.get("init_sigma", np.float32(1e-3))
Expand All @@ -159,10 +180,10 @@ def __init__(self, config=None):
"a_B": self.current_ab,
#
"K_m": config.get("K_m", np.float32(1.1)),
"K_b": config.get("K_b", np.float32(0.40)),
"K_b": config.get("K_b", np.float32(0.5)),
#
"h_M": config.get("h_M", np.float32(0.112)),
"h_B": config.get("h_B", np.float32(0.112)),
"h_B": config.get("h_B", np.float32(0.110)),
#
"x": config.get("x", np.float32(2)),
"u": config.get("u", np.float32(1)),
Expand All @@ -177,7 +198,14 @@ def __init__(self, config=None):
self.singularities = config.get("singularities", None)
self.dynamics = config.get("dynamics", dynamics_scipy)
self.harvest = config.get("harvest", harvest)
self.utility = config.get("utility", utility)
#self.utility = config.get("utility", utility)
self.utility = config.get("utility", utility2)
self.budget = config.get('budget', 5)
self.w_cull_cost = config.get('w_cull_cost', 0.1)
self.m_cull_cost = config.get('m_cull_cost', 0.1)
self.LF_rest_cost = config.get('LF_rest_cost', 0.4)
self.used_budget = 0

self.observe = config.get(
"observe", observe_3pop_restoration
) # default to perfectly observed case
Expand Down Expand Up @@ -208,7 +236,12 @@ def step(self, action):
action = np.clip(action, self.action_space.low, self.action_space.high)
pop = self.population_units() # current state in natural units
effort = (action + 1.0) / 2 # (moose_cull, wolf_cull, restoration_intensity)


self.used_budget += ( # shorthand for self.used_budget = self.used_budget + (...)
effort[0] * self.m_cull_cost
+ effort[1] * self.w_cull_cost
+ effort[2] * self.LF_rest_cost
)
# harvest and recruitment
nextpop = self.dynamics(
pop, effort, self.parameters, self.timestep, singularities=self.singularities
Expand All @@ -219,7 +252,8 @@ def step(self, action):
self.parameters["a_B"] = self.parameters["a_B"] - (self.parameters["a_B"] - self.restored_ab) * effort[2] * self.a_restoration_change

## linear approx to rewards
reward = self.utility((pop+nextpop)/2., effort)
#reward = self.utility((pop+nextpop)/2., effort)
reward = self.utility((pop+nextpop)/2., effort, self.used_budget, self.budget)

self.timestep += 1
truncated = bool(self.timestep > self.Tmax) # or bool(any(nextpop < 1e-7))
Expand All @@ -242,7 +276,7 @@ def population_units(self):
return np.clip(
pop, np.repeat(0, pop.__len__()), np.repeat(np.Inf, pop.__len__())
)



# class CaribouNumba(gym.Env):
Expand Down

0 comments on commit d931cfd

Please sign in to comment.